summaryrefslogtreecommitdiffstats
path: root/gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader')
-rw-r--r--gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader419
1 files changed, 0 insertions, 419 deletions
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader b/gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader
deleted file mode 100644
index 8d4c1b8e8db..00000000000
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader
+++ /dev/null
@@ -1,419 +0,0 @@
-#!perl
-#
-# This auxiliary script makes five header files
-# used for building XSUB of Unicode::Normalize.
-#
-# Usage:
-# <do 'mkheader'> in perl, or <perl mkheader> in command line
-#
-# Input files:
-# unicore/CombiningClass.pl (or unicode/CombiningClass.pl)
-# unicore/Decomposition.pl (or unicode/Decomposition.pl)
-#
-# Output files:
-# unfcan.h
-# unfcpt.h
-# unfcmb.h
-# unfcmp.h
-# unfexc.h
-#
-use 5.006;
-use strict;
-use warnings;
-use Carp;
-use File::Spec;
-use SelectSaver;
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- die "Unicode::Normalize cannot stringify a Unicode code point\n";
- }
- unless (0x41 == unpack('U', 'A')) {
- die "Unicode::Normalize cannot get Unicode code point\n";
- }
-}
-
-our $PACKAGE = 'Unicode::Normalize, mkheader';
-
-our $prefix = "UNF_";
-our $structname = "${prefix}complist";
-
-# Starting in v5.20, the tables in lib/unicore are built using the platform's
-# native character set for code points 0-255.
-*pack_U = ($] ge 5.020)
- ? sub { return pack('W*', @_).pack('U*'); } # The empty pack returns
- # an empty UTF-8 string,
- # so the effect is to
- # force the return into
- # being UTF-8.
- : sub { return pack('U*', @_); };
-
-# %Canon and %Compat will be ($codepoint => $hexstring) after _U_stringify()
-our %Comp1st; # $codepoint => $listname : may be composed with a next char.
-our %CompList; # $listname,$2nd => $codepoint : composite
-
-##### The below part is common to mkheader and PP #####
-
-our %Combin; # $codepoint => $number : combination class
-our %Canon; # $codepoint => \@codepoints : canonical decomp.
-our %Compat; # $codepoint => \@codepoints : compat. decomp.
-our %Compos; # $1st,$2nd => $codepoint : composite
-our %Exclus; # $codepoint => 1 : composition exclusions
-our %Single; # $codepoint => 1 : singletons
-our %NonStD; # $codepoint => 1 : non-starter decompositions
-our %Comp2nd; # $codepoint => 1 : may be composed with a prev char.
-
-# from core Unicode database
-our $Combin = do "unicore/CombiningClass.pl"
- || do "unicode/CombiningClass.pl"
- || croak "$PACKAGE: CombiningClass.pl not found";
-our $Decomp = do "unicore/Decomposition.pl"
- || do "unicode/Decomposition.pl"
- || croak "$PACKAGE: Decomposition.pl not found";
-
-# CompositionExclusions.txt since Unicode 3.2.0. If this ever changes, it
-# would be better to get the values from Unicode::UCD rather than hard-code
-# them here, as that will protect from having to make fixes for future
-# changes.
-our @CompEx = qw(
- 0958 0959 095A 095B 095C 095D 095E 095F 09DC 09DD 09DF 0A33 0A36
- 0A59 0A5A 0A5B 0A5E 0B5C 0B5D 0F43 0F4D 0F52 0F57 0F5C 0F69 0F76
- 0F78 0F93 0F9D 0FA2 0FA7 0FAC 0FB9 FB1D FB1F FB2A FB2B FB2C FB2D
- FB2E FB2F FB30 FB31 FB32 FB33 FB34 FB35 FB36 FB38 FB39 FB3A FB3B
- FB3C FB3E FB40 FB41 FB43 FB44 FB46 FB47 FB48 FB49 FB4A FB4B FB4C
- FB4D FB4E 2ADC 1D15E 1D15F 1D160 1D161 1D162 1D163 1D164 1D1BB
- 1D1BC 1D1BD 1D1BE 1D1BF 1D1C0
-);
-
-# definition of Hangul constants
-use constant SBase => 0xAC00;
-use constant SFinal => 0xD7A3; # SBase -1 + SCount
-use constant SCount => 11172; # LCount * NCount
-use constant NCount => 588; # VCount * TCount
-use constant LBase => 0x1100;
-use constant LFinal => 0x1112;
-use constant LCount => 19;
-use constant VBase => 0x1161;
-use constant VFinal => 0x1175;
-use constant VCount => 21;
-use constant TBase => 0x11A7;
-use constant TFinal => 0x11C2;
-use constant TCount => 28;
-
-sub decomposeHangul {
- my $sindex = $_[0] - SBase;
- my $lindex = int( $sindex / NCount);
- my $vindex = int(($sindex % NCount) / TCount);
- my $tindex = $sindex % TCount;
- my @ret = (
- LBase + $lindex,
- VBase + $vindex,
- $tindex ? (TBase + $tindex) : (),
- );
- return wantarray ? @ret : pack_U(@ret);
-}
-
-########## getting full decomposition ##########
-
-## converts string "hhhh hhhh hhhh" to a numeric list
-## (hex digits separated by spaces)
-sub _getHexArray { map hex, $_[0] =~ /\G *([0-9A-Fa-f]+)/g }
-
-while ($Combin =~ /(.+)/g) {
- my @tab = split /\t/, $1;
- my $ini = hex $tab[0];
- if ($tab[1] eq '') {
- $Combin{$ini} = $tab[2];
- } else {
- $Combin{$_} = $tab[2] foreach $ini .. hex($tab[1]);
- }
-}
-
-while ($Decomp =~ /(.+)/g) {
- my @tab = split /\t/, $1;
- my $compat = $tab[2] =~ s/<[^>]+>//;
- my $dec = [ _getHexArray($tab[2]) ]; # decomposition
- my $ini = hex($tab[0]); # initial decomposable character
- my $end = $tab[1] eq '' ? $ini : hex($tab[1]);
- # ($ini .. $end) is the range of decomposable characters.
-
- foreach my $u ($ini .. $end) {
- $Compat{$u} = $dec;
- $Canon{$u} = $dec if ! $compat;
- }
-}
-
-for my $s (@CompEx) {
- my $u = hex $s;
- next if !$Canon{$u}; # not assigned
- next if $u == 0xFB1D && !$Canon{0x1D15E}; # 3.0.1 before Corrigendum #2
- $Exclus{$u} = 1;
-}
-
-foreach my $u (keys %Canon) {
- my $dec = $Canon{$u};
-
- if (@$dec == 2) {
- if ($Combin{ $dec->[0] }) {
- $NonStD{$u} = 1;
- } else {
- $Compos{ $dec->[0] }{ $dec->[1] } = $u;
- $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u};
- }
- } elsif (@$dec == 1) {
- $Single{$u} = 1;
- } else {
- my $h = sprintf '%04X', $u;
- croak("Weird Canonical Decomposition of U+$h");
- }
-}
-
-# modern HANGUL JUNGSEONG and HANGUL JONGSEONG jamo
-foreach my $j (0x1161..0x1175, 0x11A8..0x11C2) {
- $Comp2nd{$j} = 1;
-}
-
-sub getCanonList {
- my @src = @_;
- my @dec = map {
- (SBase <= $_ && $_ <= SFinal) ? decomposeHangul($_)
- : $Canon{$_} ? @{ $Canon{$_} } : $_
- } @src;
- return join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec);
- # condition @src == @dec is not ok.
-}
-
-sub getCompatList {
- my @src = @_;
- my @dec = map {
- (SBase <= $_ && $_ <= SFinal) ? decomposeHangul($_)
- : $Compat{$_} ? @{ $Compat{$_} } : $_
- } @src;
- return join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec);
- # condition @src == @dec is not ok.
-}
-
-# exhaustive decomposition
-foreach my $key (keys %Canon) {
- $Canon{$key} = [ getCanonList($key) ];
-}
-
-# exhaustive decomposition
-foreach my $key (keys %Compat) {
- $Compat{$key} = [ getCompatList($key) ];
-}
-
-##### The above part is common to mkheader and PP #####
-
-foreach my $comp1st (keys %Compos) {
- my $listname = sprintf("${structname}_%06x", $comp1st);
- # %04x is bad since it'd place _3046 after _1d157.
- $Comp1st{$comp1st} = $listname;
- my $rh1st = $Compos{$comp1st};
-
- foreach my $comp2nd (keys %$rh1st) {
- my $uc = $rh1st->{$comp2nd};
- $CompList{$listname}{$comp2nd} = $uc;
- }
-}
-
-sub split_into_char {
- use bytes;
- my $uni = shift;
- my $len = length($uni);
- my @ary;
- for(my $i = 0; $i < $len; ++$i) {
- push @ary, ord(substr($uni,$i,1));
- }
- return @ary;
-}
-
-sub _U_stringify {
- sprintf '"%s"', join '',
- map sprintf("\\x%02x", $_), split_into_char(pack_U(@_));
-}
-
-foreach my $hash (\%Canon, \%Compat) {
- foreach my $key (keys %$hash) {
- $hash->{$key} = _U_stringify( @{ $hash->{$key} } );
- }
-}
-
-########## writing header files ##########
-
-my @boolfunc = (
- {
- name => "Exclusion",
- type => "bool",
- hash => \%Exclus,
- },
- {
- name => "Singleton",
- type => "bool",
- hash => \%Single,
- },
- {
- name => "NonStDecomp",
- type => "bool",
- hash => \%NonStD,
- },
- {
- name => "Comp2nd",
- type => "bool",
- hash => \%Comp2nd,
- },
-);
-
-my $orig_fh = SelectSaver->new;
-{
-
-my $file = "unfexc.h";
-open FH, ">$file" or croak "$PACKAGE: $file can't be made";
-binmode FH; select FH;
-
- print << 'EOF';
-/*
- * This file is auto-generated by mkheader.
- * Any changes here will be lost!
- */
-EOF
-
-foreach my $tbl (@boolfunc) {
- my @temp = sort {$a <=> $b} keys %{$tbl->{hash}};
- my $type = $tbl->{type};
- my $name = $tbl->{name};
- print "$type is$name (UV uv)\n{\nreturn\n\t";
-
- while (@temp) {
- my $cur = shift @temp;
- if (@temp && $cur + 1 == $temp[0]) {
- print "($cur <= uv && uv <= ";
- while (@temp && $cur + 1 == $temp[0]) {
- $cur = shift @temp;
- }
- print "$cur)";
- print "\n\t|| " if @temp;
- } else {
- print "uv == $cur";
- print "\n\t|| " if @temp;
- }
- }
- print "\n\t? TRUE : FALSE;\n}\n\n";
-}
-
-close FH;
-
-####################################
-
-my $compinit =
- "typedef struct { UV nextchar; UV composite; } $structname;\n\n";
-
-foreach my $i (sort keys %CompList) {
- $compinit .= "$structname $i [] = {\n";
- $compinit .= join ",\n",
- map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),
- sort {$a <=> $b } keys %{ $CompList{$i} };
- $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel
-}
-
-my @tripletable = (
- {
- file => "unfcmb",
- name => "combin",
- type => "STDCHAR",
- hash => \%Combin,
- null => 0,
- },
- {
- file => "unfcan",
- name => "canon",
- type => "char*",
- hash => \%Canon,
- null => "NULL",
- },
- {
- file => "unfcpt",
- name => "compat",
- type => "char*",
- hash => \%Compat,
- null => "NULL",
- },
- {
- file => "unfcmp",
- name => "compos",
- type => "$structname *",
- hash => \%Comp1st,
- null => "NULL",
- init => $compinit,
- },
-);
-
-foreach my $tbl (@tripletable) {
- my $file = "$tbl->{file}.h";
- my $head = "${prefix}$tbl->{name}";
- my $type = $tbl->{type};
- my $hash = $tbl->{hash};
- my $null = $tbl->{null};
- my $init = $tbl->{init};
-
- open FH, ">$file" or croak "$PACKAGE: $file can't be made";
- binmode FH; select FH;
- my %val;
-
- print FH << 'EOF';
-/*
- * This file is auto-generated by mkheader.
- * Any changes here will be lost!
- */
-EOF
-
- print $init if defined $init;
-
- foreach my $uv (keys %$hash) {
- croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)
- unless $uv <= 0x10FFFF;
- my @c = unpack 'CCCC', pack 'N', $uv;
- $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
- }
-
- foreach my $p (sort { $a <=> $b } keys %val) {
- next if ! $val{ $p };
- for (my $r = 0; $r < 256; $r++) {
- next if ! $val{ $p }{ $r };
- printf "static $type ${head}_%02x_%02x [256] = {\n", $p, $r;
- for (my $c = 0; $c < 256; $c++) {
- print "\t", defined $val{$p}{$r}{$c}
- ? "($type)".$val{$p}{$r}{$c}
- : $null;
- print ',' if $c != 255;
- print "\n" if $c % 8 == 7;
- }
- print "};\n\n";
- }
- }
- foreach my $p (sort { $a <=> $b } keys %val) {
- next if ! $val{ $p };
- printf "static $type* ${head}_%02x [256] = {\n", $p;
- for (my $r = 0; $r < 256; $r++) {
- print $val{ $p }{ $r }
- ? sprintf("${head}_%02x_%02x", $p, $r)
- : "NULL";
- print ',' if $r != 255;
- print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
- }
- print "};\n\n";
- }
- print "static $type** $head [] = {\n";
- for (my $p = 0; $p <= 0x10; $p++) {
- print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
- print ',' if $p != 0x10;
- print "\n";
- }
- print "};\n\n";
- close FH;
-}
-
-} # End of block for SelectSaver
-
-1;
-__END__