diff options
author | 2019-02-13 21:15:00 +0000 | |
---|---|---|
committer | 2019-02-13 21:15:00 +0000 | |
commit | 9f11ffb7133c203312a01e4b986886bc88c7d74b (patch) | |
tree | 6618511204c614b20256e4ef9dea39a7b311d638 /gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm | |
parent | Import perl-5.28.1 (diff) | |
download | wireguard-openbsd-9f11ffb7133c203312a01e4b986886bc88c7d74b.tar.xz wireguard-openbsd-9f11ffb7133c203312a01e4b986886bc88c7d74b.zip |
Fix merge issues, remove excess files - match perl-5.28.1 dist
looking good sthen@, Great! bluhm@
Diffstat (limited to 'gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm')
-rw-r--r-- | gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm | 94 |
1 files changed, 54 insertions, 40 deletions
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm b/gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm index 493c281a837..ac1cd03f604 100644 --- a/gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm +++ b/gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm @@ -17,17 +17,16 @@ use File::Spec; no warnings 'utf8'; -our $VERSION = '1.14'; +our $VERSION = '1.25'; our $PACKAGE = __PACKAGE__; ### begin XS only ### -require DynaLoader; -our @ISA = qw(DynaLoader); -bootstrap Unicode::Collate $VERSION; +use XSLoader (); +XSLoader::load('Unicode::Collate', $VERSION); ### end XS only ### my @Path = qw(Unicode Collate); -my $KeyFile = "allkeys.txt"; +my $KeyFile = 'allkeys.txt'; # Perl's boolean use constant TRUE => 1; @@ -89,9 +88,9 @@ my $DefaultRearrange = [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ]; my $HighestVCE = pack(VCE_TEMPLATE, 0, 0xFFFE, 0x20, 0x5, 0xFFFF); my $minimalVCE = pack(VCE_TEMPLATE, 0, 1, 0x20, 0x5, 0xFFFE); -sub UCA_Version { "30" } +sub UCA_Version { '34' } -sub Base_Unicode_Version { "7.0.0" } +sub Base_Unicode_Version { '9.0.0' } ###### @@ -189,11 +188,14 @@ my %DerivCode = ( 26 => \&_derivCE_24, # 26 == 24 28 => \&_derivCE_24, # 28 == 24 30 => \&_derivCE_24, # 30 == 24 + 32 => \&_derivCE_32, + 34 => \&_derivCE_34, + 36 => \&_derivCE_36, ); sub checkCollator { my $self = shift; - _checkLevel($self->{level}, "level"); + _checkLevel($self->{level}, 'level'); $self->{derivCode} = $DerivCode{ $self->{UCA_Version} } or croak "Illegal UCA version (passed $self->{UCA_Version})."; @@ -207,13 +209,13 @@ sub checkCollator { if (! defined $self->{backwards}) { $self->{backwardsFlag} = 0; } elsif (! ref $self->{backwards}) { - _checkLevel($self->{backwards}, "backwards"); + _checkLevel($self->{backwards}, 'backwards'); $self->{backwardsFlag} = 1 << $self->{backwards}; } else { my %level; $self->{backwardsFlag} = 0; for my $b (@{ $self->{backwards} }) { - _checkLevel($b, "backwards"); + _checkLevel($b, 'backwards'); $level{$b} = 1; } for my $v (sort keys %level) { @@ -438,13 +440,17 @@ sub parseEntry # if and only if "all" CEs are [.0000.0000.0000]. } + # mapping: be an array ref or not exists (any false value is disallowed) $self->{mapping}{$entry} = $is_L3_ignorable ? [] : \@key; + # maxlength: be more than 1 or not exists (any false value is disallowed) if (@uv > 1) { if (!$self->{maxlength}{$uv[0]} || $self->{maxlength}{$uv[0]} < @uv) { $self->{maxlength}{$uv[0]} = @uv; } } + + # contraction: be 1 or not exists (any false value is disallowed) while (@uv > 2) { pop @uv; my $fake_entry = join(CODE_SEP, @uv); # in JCPS @@ -513,7 +519,7 @@ sub splitEnt if ($vers <= 20 && _isIllegal($src[$i])) { $src[$i] = undef; } elsif ($ver9) { - $src[$i] = undef if $map->{ $src[$i] } + $src[$i] = undef if exists $map->{ $src[$i] } ? @{ $map->{ $src[$i] } } == 0 : $uXS && _ignorable_simple($src[$i]); ### XS only } @@ -533,7 +539,7 @@ sub splitEnt my $i_orig = $i; # find contraction - if ($max->{$jcps}) { + if (exists $max->{$jcps}) { my $temp_jcps = $jcps; my $jcpsLen = 1; my $maxLen = $max->{$jcps}; @@ -542,7 +548,7 @@ sub splitEnt next if ! defined $src[$p]; $temp_jcps .= CODE_SEP . $src[$p]; $jcpsLen++; - if ($map->{$temp_jcps}) { + if (exists $map->{$temp_jcps}) { $jcps = $temp_jcps; $i = $p; } @@ -569,7 +575,7 @@ sub splitEnt last unless $curCC; my $tail = CODE_SEP . $src[$p]; - if ($preCC != $curCC && $map->{$jcps.$tail}) { + if ($preCC != $curCC && exists $map->{$jcps.$tail}) { $jcps .= $tail; push @out, $p; } else { @@ -578,8 +584,9 @@ sub splitEnt next if !$long; - if ($preCC_uc != $curCC && ($map->{$jcps_uc.$tail} || - $cont->{$jcps_uc.$tail})) { + if ($preCC_uc != $curCC && + (exists $map->{$jcps_uc.$tail} || + exists $cont->{$jcps_uc.$tail})) { $jcps_uc .= $tail; push @out_uc, $p; } else { @@ -587,7 +594,7 @@ sub splitEnt } } - if (@out_uc && $map->{$jcps_uc}) { + if (@out_uc && exists $map->{$jcps_uc}) { $jcps = $jcps_uc; $src[$_] = undef for @out_uc; } else { @@ -597,7 +604,7 @@ sub splitEnt } # skip completely ignorable - if ($map->{$jcps} ? @{ $map->{$jcps} } == 0 : + if (exists $map->{$jcps} ? @{ $map->{$jcps} } == 0 : $uXS && $jcps !~ /;/ && _ignorable_simple($jcps)) { ### XS only if ($wLen && @buf) { $buf[-1][2] = $i + 1; @@ -646,7 +653,7 @@ sub getWt $u = 0xFFFD if $u !~ /;/ && 0x10FFFF < $u && !$out; my @ce; - if ($map->{$u}) { + if (exists $map->{$u}) { @ce = @{ $map->{$u} }; # $u may be a contraction ### begin XS only ### } elsif ($uXS && _exists_simple($u)) { @@ -664,27 +671,27 @@ sub getWt if (@decH == 2) { my $contract = join(CODE_SEP, @decH); - @decH = ($contract) if $map->{$contract}; + @decH = ($contract) if exists $map->{$contract}; } else { # must be <@decH == 3> - if ($max->{$decH[0]}) { + if (exists $max->{$decH[0]}) { my $contract = join(CODE_SEP, @decH); - if ($map->{$contract}) { + if (exists $map->{$contract}) { @decH = ($contract); } else { $contract = join(CODE_SEP, @decH[0,1]); - $map->{$contract} and @decH = ($contract, $decH[2]); + exists $map->{$contract} and @decH = ($contract, $decH[2]); } # even if V's ignorable, LT contraction is not supported. # If such a situation were required, NFD should be used. } - if (@decH == 3 && $max->{$decH[1]}) { + if (@decH == 3 && exists $max->{$decH[1]}) { my $contract = join(CODE_SEP, @decH[1,2]); - $map->{$contract} and @decH = ($decH[0], $contract); + exists $map->{$contract} and @decH = ($decH[0], $contract); } } @ce = map({ - $map->{$_} ? @{ $map->{$_} } : + exists $map->{$_} ? @{ $map->{$_} } : $uXS && _exists_simple($_) ? _fetch_simple($_) : ### XS only $der->($_); } @decH); @@ -1097,7 +1104,7 @@ If the revision (previously "tracking version") number of UCA is given, behavior of that revision is emulated on collating. If omitted, the return value of C<UCA_Version()> is used. -The following revisions are supported. The default is 30. +The following revisions are supported. The default is 34. UCA Unicode Standard DUCET (@version) ------------------------------------------------------- @@ -1113,8 +1120,11 @@ The following revisions are supported. The default is 30. 26 6.2.0 6.2.0 (6.2.0) 28 6.3.0 6.3.0 (6.3.0) 30 7.0.0 7.0.0 (7.0.0) + 32 8.0.0 8.0.0 (8.0.0) + 34 9.0.0 9.0.0 (9.0.0) + 36 10.0.0 10.0.0(10.0.0) -* See below C<long_contraction> with C<UCA_Version> 22 and 24. +* See below for C<long_contraction> with C<UCA_Version> 22 and 24. * Noncharacters (e.g. U+FFFF) are not ignored, and can be overridden since C<UCA_Version> 22. @@ -1229,7 +1239,7 @@ table beforehand. =item highestFFFF --- see 5.14 Collation Elements, UTS #35. +-- see 2.4 Tailored noncharacter weights, UTS #35 (LDML) Part 5: Collation. If the parameter is made true, C<U+FFFF> has a highest primary weight. When a boolean of C<$coll-E<gt>ge($str, "abc")> and @@ -1373,7 +1383,7 @@ contraction C<0FB2 0F71> prohibits C<0FB2 0F71 0F80> from being detected. =item minimalFFFE --- see 5.14 Collation Elements, UTS #35. +-- see 1.1.1 U+FFFE, UTS #35 (LDML) Part 5: Collation. If the parameter is made true, C<U+FFFE> has a minimal primary weight. The comparison between C<"$a1\x{FFFE}$a2"> and C<"$b1\x{FFFE}$b2"> @@ -1451,12 +1461,16 @@ those in the CJK Unified Ideographs Extension A etc. U+4E00..U+9FBB if UCA_Version is 14 or 16. U+4E00..U+9FC3 if UCA_Version is 18. U+4E00..U+9FCB if UCA_Version is 20 or 22. - U+4E00..U+9FCC if UCA_Version is 24 or later. + U+4E00..U+9FCC if UCA_Version is 24 to 30. + U+4E00..U+9FD5 if UCA_Version is 32 or 34. + U+4E00..U+9FEA if UCA_Version is 36. In the CJK Unified Ideographs Extension blocks: Ext.A (U+3400..U+4DB5) and Ext.B (U+20000..U+2A6D6) in any UCA_Version. Ext.C (U+2A700..U+2B734) if UCA_Version is 20 or later. Ext.D (U+2B740..U+2B81D) if UCA_Version is 22 or later. + Ext.E (U+2B820..U+2CEA1) if UCA_Version is 32 or later. + Ext.F (U+2CEB0..U+2EBE0) if UCA_Version is 36. Through C<overrideCJK>, ordering of CJK unified ideographs (including extensions) can be overridden. @@ -1648,8 +1662,7 @@ rewriting lines on reading an unmodified table every time. =item suppress --- see suppress contractions in 5.14.11 Special-Purpose Commands, -UTS #35 (LDML). +-- see 3.12 Special-Purpose Commands, UTS #35 (LDML) Part 5: Collation. Contractions beginning with the specified characters are suppressed, even if those contractions are defined in C<table>. @@ -1660,7 +1673,7 @@ An example for Russian and some languages using the Cyrillic script: where 0x0400 stands for C<U+0400>, CYRILLIC CAPITAL LETTER IE WITH GRAVE. -B<NOTE>: Contractions via C<entry> are not be suppressed. +B<NOTE>: Contractions via C<entry> will not be suppressed. =item table @@ -1707,7 +1720,7 @@ specified as a comment (following C<#>) on each line. =item undefName --- see 6.3.4 Reducing the Repertoire, UTS #10. +-- see 6.3.3 Reducing the Repertoire, UTS #10. Undefines the collation element as if it were unassigned in the C<table>. This reduces the size of the table. @@ -2034,7 +2047,8 @@ The most preferable one is "The Default Unicode Collation Element Table" http://www.unicode.org/Public/UCA/ - http://www.unicode.org/Public/UCA/latest/allkeys.txt (latest version) + http://www.unicode.org/Public/UCA/latest/allkeys.txt + (latest version) If DUCET is not installed, it is recommended to copy the file from http://www.unicode.org/Public/UCA/latest/allkeys.txt @@ -2078,16 +2092,16 @@ B<Unicode::Normalize is required to try The Conformance Test.> =head1 AUTHOR, COPYRIGHT AND LICENSE The Unicode::Collate module for perl was written by SADAHIRO Tomoyuki, -<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2014, +<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2017, SADAHIRO Tomoyuki. Japan. All rights reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The file Unicode/Collate/allkeys.txt was copied verbatim -from L<http://www.unicode.org/Public/UCA/6.3.0/allkeys.txt>. -For this file, Copyright (c) 2001-2012 Unicode, Inc. -Distributed under the Terms of Use in L<http://www.unicode.org/copyright.html>. +from L<http://www.unicode.org/Public/UCA/9.0.0/allkeys.txt>. +For this file, Copyright (c) 2016 Unicode, Inc.; distributed +under the Terms of Use in L<http://www.unicode.org/terms_of_use.html> =head1 SEE ALSO |