import perl 5.16.3 from CPAN - worked on by Andrew Fresh and myself

author: sthen <sthen@openbsd.org> 2013-03-25 20:06:16 +0000
committer: sthen <sthen@openbsd.org> 2013-03-25 20:06:16 +0000
commit: 898184e3e61f9129feb5978fad5a8c6865f00b92 (patch)
tree: 56f32aefc1eed60b534611007c7856f82697a205 /gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm
parent: PGSHIFT -> PAGE_SHIFT (diff)
download: wireguard-openbsd-898184e3e61f9129feb5978fad5a8c6865f00b92.tar.xz
wireguard-openbsd-898184e3e61f9129feb5978fad5a8c6865f00b92.zip
1 files changed, 409 insertions, 372 deletions
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm b/gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm
index 2e989d6bb54..da362c15b43 100644
--- a/gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm
+++ b/gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm
@@ -14,9 +14,15 @@ use File::Spec;
 
 no warnings 'utf8';
 
-our $VERSION = '0.52_01';
+our $VERSION = '0.89';
 our $PACKAGE = __PACKAGE__;
 
+### begin XS only ###
+require DynaLoader;
+our @ISA = qw(DynaLoader);
+bootstrap Unicode::Collate $VERSION;
+### end XS only ###
+
 my @Path = qw(Unicode Collate);
 my $KeyFile = "allkeys.txt";
 
@@ -65,42 +71,22 @@ use constant LEVEL_SEP => "\0\0";
 # This character must not be included in any stringified
 # representation of an integer.
 use constant CODE_SEP => ';';
+	# NOTE: in regex /;/ is used for $jcps!
 
 # boolean values of variable weights
 use constant NON_VAR => 0; # Non-Variable character
 use constant VAR     => 1; # Variable character
 
 # specific code points
-use constant Hangul_LBase  => 0x1100;
-use constant Hangul_LIni   => 0x1100;
-use constant Hangul_LFin   => 0x1159;
-use constant Hangul_LFill  => 0x115F;
-use constant Hangul_VBase  => 0x1161;
-use constant Hangul_VIni   => 0x1160; # from Vowel Filler
-use constant Hangul_VFin   => 0x11A2;
-use constant Hangul_TBase  => 0x11A7; # from "no-final" codepoint
-use constant Hangul_TIni   => 0x11A8;
-use constant Hangul_TFin   => 0x11F9;
-use constant Hangul_TCount => 28;
-use constant Hangul_NCount => 588;
-use constant Hangul_SBase  => 0xAC00;
 use constant Hangul_SIni   => 0xAC00;
 use constant Hangul_SFin   => 0xD7A3;
-use constant CJK_UidIni    => 0x4E00;
-use constant CJK_UidFin    => 0x9FA5;
-use constant CJK_UidF41    => 0x9FBB;
-use constant CJK_ExtAIni   => 0x3400;
-use constant CJK_ExtAFin   => 0x4DB5;
-use constant CJK_ExtBIni   => 0x20000;
-use constant CJK_ExtBFin   => 0x2A6D6;
-use constant BMP_Max       => 0xFFFF;
 
 # Logical_Order_Exception in PropList.txt
 my $DefaultRearrange = [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ];
 
-sub UCA_Version { "14" }
+sub UCA_Version { "24" }
 
-sub Base_Unicode_Version { "4.1.0" }
+sub Base_Unicode_Version { "6.1.0" }
 
 ######
 
@@ -108,10 +94,6 @@ sub pack_U {
     return pack('U*', @_);
 }
 
-sub unpack_U {
-    return unpack('U*', shift(@_).pack('U*'));
-}
-
 ######
 
 my (%VariableOK);
@@ -121,18 +103,19 @@ my (%VariableOK);
 
 our @ChangeOK = qw/
     alternate backwards level normalization rearrange
-    katakana_before_hiragana upper_before_lower
+    katakana_before_hiragana upper_before_lower ignore_level2
     overrideHangul overrideCJK preprocess UCA_Version
     hangul_terminator variable
   /;
 
 our @ChangeNG = qw/
-    entry mapping table maxlength
-    ignoreChar ignoreName undefChar undefName variableTable
-    versionTable alternateTable backwardsTable forwardsTable rearrangeTable
-    derivCode normCode rearrangeHash
-    backwardsFlag
-  /;
+    entry mapping table maxlength contraction
+    ignoreChar ignoreName undefChar undefName rewrite
+    versionTable alternateTable backwardsTable forwardsTable
+    rearrangeTable variableTable
+    derivCode normCode rearrangeHash backwardsFlag
+    suppress suppressHash
+    __useXS /; ### XS only
 # The hash key 'ignored' is deleted at v 0.21.
 # The hash key 'isShift' is deleted at v 0.23.
 # The hash key 'combining' is deleted at v 0.24.
@@ -188,6 +171,11 @@ my %DerivCode = (
     9 => \&_derivCE_9,
    11 => \&_derivCE_9, # 11 == 9
    14 => \&_derivCE_14,
+   16 => \&_derivCE_14, # 16 == 14
+   18 => \&_derivCE_18,
+   20 => \&_derivCE_20,
+   22 => \&_derivCE_22,
+   24 => \&_derivCE_24,
 );
 
 sub checkCollator {
@@ -261,13 +249,28 @@ sub new
     my $class = shift;
     my $self = bless { @_ }, $class;
 
+### begin XS only ###
+    if (! exists $self->{table}     && !defined $self->{rewrite} &&
+	!defined $self->{undefName} && !defined $self->{ignoreName} &&
+	!defined $self->{undefChar} && !defined $self->{ignoreChar}) {
+	$self->{__useXS} = \&_fetch_simple;
+    } else {
+	$self->{__useXS} = undef;
+    }
+### end XS only ###
+
+    # keys of $self->{suppressHash} are $self->{suppress}.
+    if ($self->{suppress} && @{ $self->{suppress} }) {
+	@{ $self->{suppressHash} }{ @{ $self->{suppress} } } = ();
+    } # before read_table()
+
     # If undef is passed explicitly, no file is read.
     $self->{table} = $KeyFile if ! exists $self->{table};
     $self->read_table() if defined $self->{table};
 
     if ($self->{entry}) {
 	while ($self->{entry} =~ /([^\n]+)/g) {
-	    $self->parseEntry($1);
+	    $self->parseEntry($1, TRUE);
 	}
     }
 
@@ -291,9 +294,49 @@ sub new
     return $self;
 }
 
+sub parseAtmark {
+    my $self = shift;
+    my $line = shift; # after s/^\s*\@//
+
+    if ($line =~ /^version\s*(\S*)/) {
+	$self->{versionTable} ||= $1;
+    }
+    elsif ($line =~ /^variable\s+(\S*)/) { # since UTS #10-9
+	$self->{variableTable} ||= $1;
+    }
+    elsif ($line =~ /^alternate\s+(\S*)/) { # till UTS #10-8
+	$self->{alternateTable} ||= $1;
+    }
+    elsif ($line =~ /^backwards\s+(\S*)/) {
+	push @{ $self->{backwardsTable} }, $1;
+    }
+    elsif ($line =~ /^forwards\s+(\S*)/) { # parhaps no use
+	push @{ $self->{forwardsTable} }, $1;
+    }
+    elsif ($line =~ /^rearrange\s+(.*)/) { # (\S*) is NG
+	push @{ $self->{rearrangeTable} }, _getHexArray($1);
+    }
+}
+
 sub read_table {
     my $self = shift;
 
+### begin XS only ###
+    if ($self->{__useXS}) {
+	my @rest = _fetch_rest(); # complex matter need to parse
+	for my $line (@rest) {
+	    next if $line =~ /^\s*#/;
+
+	    if ($line =~ s/^\s*\@//) {
+		$self->parseAtmark($line);
+	    } else {
+		$self->parseEntry($line);
+	    }
+	}
+	return;
+    }
+### end XS only ###
+
     my($f, $fh);
     foreach my $d (@INC) {
 	$f = File::Spec->catfile($d, @Path, $self->{table});
@@ -307,29 +350,11 @@ sub read_table {
 
     while (my $line = <$fh>) {
 	next if $line =~ /^\s*#/;
-	unless ($line =~ s/^\s*\@//) {
-	    $self->parseEntry($line);
-	    next;
-	}
 
-	# matched ^\s*\@
-	if ($line =~ /^version\s*(\S*)/) {
-	    $self->{versionTable} ||= $1;
-	}
-	elsif ($line =~ /^variable\s+(\S*)/) { # since UTS #10-9
-	    $self->{variableTable} ||= $1;
-	}
-	elsif ($line =~ /^alternate\s+(\S*)/) { # till UTS #10-8
-	    $self->{alternateTable} ||= $1;
-	}
-	elsif ($line =~ /^backwards\s+(\S*)/) {
-	    push @{ $self->{backwardsTable} }, $1;
-	}
-	elsif ($line =~ /^forwards\s+(\S*)/) { # parhaps no use
-	    push @{ $self->{forwardsTable} }, $1;
-	}
-	elsif ($line =~ /^rearrange\s+(.*)/) { # (\S*) is NG
-	    push @{ $self->{rearrangeTable} }, _getHexArray($1);
+	if ($line =~ s/^\s*\@//) {
+	    $self->parseAtmark($line);
+	} else {
+	    $self->parseEntry($line);
 	}
     }
     close $fh;
@@ -343,8 +368,13 @@ sub parseEntry
 {
     my $self = shift;
     my $line = shift;
+    my $tailoring = shift;
     my($name, $entry, @uv, @key);
 
+    if (defined $self->{rewrite}) {
+	$line = $self->{rewrite}->($line);
+    }
+
     return if $line !~ /^\s*[0-9A-Fa-f]/;
 
     # removes comment and gets name
@@ -359,7 +389,8 @@ sub parseEntry
 
     @uv = _getHexArray($e);
     return if !@uv;
-
+    return if @uv > 1 && $self->{suppressHash} && !$tailoring &&
+		  exists $self->{suppressHash}{$uv[0]};
     $entry = join(CODE_SEP, @uv); # in JCPS
 
     if (defined $self->{undefChar} || defined $self->{ignoreChar}) {
@@ -395,56 +426,26 @@ sub parseEntry
     $self->{mapping}{$entry} = $is_L3_ignorable ? [] : \@key;
 
     if (@uv > 1) {
-	(!$self->{maxlength}{$uv[0]} || $self->{maxlength}{$uv[0]} < @uv)
-	    and $self->{maxlength}{$uv[0]} = @uv;
-    }
-}
-
-
-##
-## VCE = _varCE(variable term, VCE)
-##
-sub _varCE
-{
-    my $vbl = shift;
-    my $vce = shift;
-    if ($vbl eq 'non-ignorable') {
-	return $vce;
-    }
-    my ($var, @wt) = unpack VCE_TEMPLATE, $vce;
-
-    if ($var) {
-	return pack(VCE_TEMPLATE, $var, 0, 0, 0,
-		$vbl eq 'blanked' ? $wt[3] : $wt[0]);
-    }
-    elsif ($vbl eq 'blanked') {
-	return $vce;
+	if (!$self->{maxlength}{$uv[0]} || $self->{maxlength}{$uv[0]} < @uv) {
+	    $self->{maxlength}{$uv[0]} = @uv;
+	}
     }
-    else {
-	return pack(VCE_TEMPLATE, $var, @wt[0..2],
-	    $vbl eq 'shifted' && $wt[0]+$wt[1]+$wt[2] ? Shift4Wt : 0);
+    if (@uv > 2) {
+	while (@uv) {
+	    pop @uv;
+	    my $fake_entry = join(CODE_SEP, @uv); # in JCPS
+	    $self->{contraction}{$fake_entry} = 1;
+	}
     }
 }
 
+
 sub viewSortKey
 {
     my $self = shift;
     $self->visualizeSortKey($self->getSortKey(@_));
 }
 
-sub visualizeSortKey
-{
-    my $self = shift;
-    my $view = join " ", map sprintf("%04X", $_), unpack(KEY_TEMPLATE, shift);
-
-    if ($self->{UCA_Version} <= 8) {
-	$view =~ s/ ?0000 ?/|/g;
-    } else {
-	$view =~ s/\b0000\b/|/g;
-    }
-    return "[$view]";
-}
-
 
 ##
 ## arrayref of JCPS   = splitEnt(string to be collated)
@@ -460,7 +461,9 @@ sub splitEnt
     my $map  = $self->{mapping};
     my $max  = $self->{maxlength};
     my $reH  = $self->{rearrangeHash};
-    my $ver9 = $self->{UCA_Version} >= 9 && $self->{UCA_Version} <= 11;
+    my $vers = $self->{UCA_Version};
+    my $ver9 = $vers >= 9 && $vers <= 11;
+    my $uXS  = $self->{__useXS}; ### XS only
 
     my ($str, @buf);
 
@@ -494,9 +497,17 @@ sub splitEnt
 
     # remove a code point marked as a completely ignorable.
     for (my $i = 0; $i < @src; $i++) {
-	$src[$i] = undef
-	    if _isIllegal($src[$i]) || ($ver9 &&
-		$map->{ $src[$i] } && @{ $map->{ $src[$i] } } == 0);
+	if (_isIllegal($src[$i]) || $vers <= 20 && _isNonchar($src[$i])) {
+	    $src[$i] = undef;
+	} elsif ($ver9) {
+	    $src[$i] = undef if $map->{ $src[$i] } &&
+			     @{ $map->{ $src[$i] } } == 0;
+### begin XS only ###
+	    if ($uXS) {
+		$src[$i] = undef if _ignorable_simple($src[$i]);
+	    }
+### end XS only ###
+	}
     }
 
     for (my $i = 0; $i < @src; $i++) {
@@ -528,36 +539,55 @@ sub splitEnt
 		}
 	    }
 
-	# not-contiguous contraction with Combining Char (cf. UTS#10, S2.1).
+	# discontiguous contraction with Combining Char (cf. UTS#10, S2.1).
 	# This process requires Unicode::Normalize.
 	# If "normalization" is undef, here should be skipped *always*
 	# (in spite of bool value of $CVgetCombinClass),
 	# since canonical ordering cannot be expected.
 	# Blocked combining character should not be contracted.
 
-	    if ($self->{normalization})
 	    # $self->{normCode} is false in the case of "prenormalized".
-	    {
+	    if ($self->{normalization}) {
+		my $cont = $self->{contraction};
 		my $preCC = 0;
-		my $curCC = 0;
+		my $preCC_uc = 0;
+		my $jcps_uc = $jcps;
+		my(@out, @out_uc);
 
 		for (my $p = $i + 1; $p < @src; $p++) {
 		    next if ! defined $src[$p];
-		    $curCC = $CVgetCombinClass->($src[$p]);
+		    my $curCC = $CVgetCombinClass->($src[$p]);
 		    last unless $curCC;
 		    my $tail = CODE_SEP . $src[$p];
+
+		    if ($preCC_uc != $curCC && ($map->{$jcps_uc.$tail} ||
+					       $cont->{$jcps_uc.$tail})) {
+			$jcps_uc .= $tail;
+			push @out_uc, $p;
+		    } else {
+			$preCC_uc = $curCC;
+		    }
+
 		    if ($preCC != $curCC && $map->{$jcps.$tail}) {
 			$jcps .= $tail;
-			$src[$p] = undef;
+			push @out, $p;
 		    } else {
 			$preCC = $curCC;
 		    }
 		}
+
+		if ($map->{$jcps_uc}) {
+		    $jcps = $jcps_uc;
+		    $src[$_] = undef for @out_uc;
+		} else {
+		    $src[$_] = undef for @out;
+		}
 	    }
 	}
 
 	# skip completely ignorable
-	if ($map->{$jcps} && @{ $map->{$jcps} } == 0) {
+	if ($uXS && $jcps !~ /;/ && _ignorable_simple($jcps) || ### XS only
+	    $map->{$jcps} && @{ $map->{$jcps} } == 0) {
 	    if ($wLen && @buf) {
 		$buf[-1][2] = $i + 1;
 	    }
@@ -569,6 +599,22 @@ sub splitEnt
     return \@buf;
 }
 
+##
+## VCE = _pack_override(input, codepoint, derivCode)
+##
+sub _pack_override ($$$) {
+    my $r = shift;
+    my $u = shift;
+    my $der = shift;
+
+    if (ref $r) {
+	return pack(VCE_TEMPLATE, NON_VAR, @$r);
+    } elsif (defined $r) {
+	return pack(VCE_TEMPLATE, NON_VAR, $r, Min2Wt, Min3Wt, $u);
+    } else {
+	return $der->($u);
+    }
+}
 
 ##
 ## list of VCE = getWt(JCPS)
@@ -577,25 +623,27 @@ sub getWt
 {
     my $self = shift;
     my $u    = shift;
-    my $vbl  = $self->{variable};
     my $map  = $self->{mapping};
     my $der  = $self->{derivCode};
+    my $uXS  = $self->{__useXS}; ### XS only
 
     return if !defined $u;
-    return map(_varCE($vbl, $_), @{ $map->{$u} })
+    return map($self->varCE($_), @{ $map->{$u} })
 	if $map->{$u};
+### begin XS only ###
+    return map($self->varCE($_), _fetch_simple($u))
+	if $uXS && _exists_simple($u);
+### end XS only ###
 
     # JCPS must not be a contraction, then it's a code point.
     if (Hangul_SIni <= $u && $u <= Hangul_SFin) {
 	my $hang = $self->{overrideHangul};
 	my @hangulCE;
 	if ($hang) {
-	    @hangulCE = map(pack(VCE_TEMPLATE, NON_VAR, @$_), &$hang($u));
-	}
-	elsif (!defined $hang) {
+	    @hangulCE = map _pack_override($_, $u, $der), $hang->($u);
+	} elsif (!defined $hang) {
 	    @hangulCE = $der->($u);
-	}
-	else {
+	} else {
 	    my $max  = $self->{maxlength};
 	    my @decH = _decompHangul($u);
 
@@ -612,7 +660,7 @@ sub getWt
 			$map->{$contract} and @decH = ($contract, $decH[2]);
 		    }
 		    # even if V's ignorable, LT contraction is not supported.
-		    # If such a situatution were required, NFD should be used.
+		    # If such a situation were required, NFD should be used.
 		}
 		if (@decH == 3 && $max->{$decH[1]}) {
 		    my $contract = join(CODE_SEP, @decH[1,2]);
@@ -621,22 +669,23 @@ sub getWt
 	    }
 
 	    @hangulCE = map({
-		    $map->{$_} ? @{ $map->{$_} } : $der->($_);
+		    $map->{$_} ? @{ $map->{$_} } :
+		$uXS && _exists_simple($_) ? _fetch_simple($_) : ### XS only
+		    $der->($_);
 		} @decH);
 	}
-	return map _varCE($vbl, $_), @hangulCE;
-    }
-    elsif (_isUIdeo($u, $self->{UCA_Version})) {
+	return map $self->varCE($_), @hangulCE;
+    } else {
 	my $cjk  = $self->{overrideCJK};
-	return map _varCE($vbl, $_),
-	    $cjk
-		? map(pack(VCE_TEMPLATE, NON_VAR, @$_), &$cjk($u))
-		: defined $cjk && $self->{UCA_Version} <= 8 && $u < 0x10000
-		    ? _uideoCE_8($u)
-		    : $der->($u);
-    }
-    else {
-	return map _varCE($vbl, $_), $der->($u);
+	my $vers = $self->{UCA_Version};
+	if ($cjk && _isUIdeo($u, $vers)) {
+	    my @cjkCE = map _pack_override($_, $u, $der), $cjk->($u);
+	    return map $self->varCE($_), @cjkCE;
+	}
+	if ($vers == 8 && defined $cjk && _isUIdeo($u, 0)) {
+	    return map $self->varCE($_), _uideoCE_8($u);
+	}
+	return map $self->varCE($_), $der->($u);
     }
 }
 
@@ -647,89 +696,34 @@ sub getWt
 sub getSortKey
 {
     my $self = shift;
-    my $lev  = $self->{level};
     my $rEnt = $self->splitEnt(shift); # get an arrayref of JCPS
-    my $v2i  = $self->{UCA_Version} >= 9 &&
-		$self->{variable} ne 'non-ignorable';
+    my $vers = $self->{UCA_Version};
+    my $term = $self->{hangul_terminator};
 
     my @buf; # weight arrays
-    if ($self->{hangul_terminator}) {
+    if ($term) {
 	my $preHST = '';
+	my $termCE = $self->varCE(pack(VCE_TEMPLATE, NON_VAR, $term, 0,0,0));
 	foreach my $jcps (@$rEnt) {
 	    # weird things like VL, TL-contraction are not considered!
-	    my $curHST = '';
-	    foreach my $u (split /;/, $jcps) {
-		$curHST .= getHST($u);
-	    }
+	    my $curHST = join '', map getHST($_, $vers), split /;/, $jcps;
 	    if ($preHST && !$curHST || # hangul before non-hangul
 		$preHST =~ /L\z/ && $curHST =~ /^T/ ||
 		$preHST =~ /V\z/ && $curHST =~ /^L/ ||
 		$preHST =~ /T\z/ && $curHST =~ /^[LV]/) {
-
-		push @buf, $self->getWtHangulTerm();
+		push @buf, $termCE;
 	    }
 	    $preHST = $curHST;
-
 	    push @buf, $self->getWt($jcps);
 	}
-	$preHST # end at hangul
-	    and push @buf, $self->getWtHangulTerm();
-    }
-    else {
+	push @buf, $termCE if $preHST; # end at hangul
+    } else {
 	foreach my $jcps (@$rEnt) {
 	    push @buf, $self->getWt($jcps);
 	}
     }
 
-    # make sort key
-    my @ret = ([],[],[],[]);
-    my $last_is_variable;
-
-    foreach my $vwt (@buf) {
-	my($var, @wt) = unpack(VCE_TEMPLATE, $vwt);
-
-	# "Ignorable (L1, L2) after Variable" since track. v. 9
-	if ($v2i) {
-	    if ($var) {
-		$last_is_variable = TRUE;
-	    }
-	    elsif (!$wt[0]) { # ignorable
-		next if $last_is_variable;
-	    }
-	    else {
-		$last_is_variable = FALSE;
-	    }
-	}
-	foreach my $v (0..$lev-1) {
-	    0 < $wt[$v] and push @{ $ret[$v] }, $wt[$v];
-	}
-    }
-
-    # modification of tertiary weights
-    if ($self->{upper_before_lower}) {
-	foreach my $w (@{ $ret[2] }) {
-	    if    (0x8 <= $w && $w <= 0xC) { $w -= 6 } # lower
-	    elsif (0x2 <= $w && $w <= 0x6) { $w += 6 } # upper
-	    elsif ($w == 0x1C)             { $w += 1 } # square upper
-	    elsif ($w == 0x1D)             { $w -= 1 } # square lower
-	}
-    }
-    if ($self->{katakana_before_hiragana}) {
-	foreach my $w (@{ $ret[2] }) {
-	    if    (0x0F <= $w && $w <= 0x13) { $w -= 2 } # katakana
-	    elsif (0x0D <= $w && $w <= 0x0E) { $w += 5 } # hiragana
-	}
-    }
-
-    if ($self->{backwardsFlag}) {
-	for (my $v = MinLevel; $v <= MaxLevel; $v++) {
-	    if ($self->{backwardsFlag} & (1 << $v)) {
-		@{ $ret[$v-1] } = reverse @{ $ret[$v-1] };
-	    }
-	}
-    }
-
-    join LEVEL_SEP, map pack(KEY_TEMPLATE, @$_), @ret;
+    return $self->mk_SortKey(\@buf); ### XS only
 }
 
 
@@ -756,118 +750,6 @@ sub sort {
 }
 
 
-sub _derivCE_14 {
-    my $u = shift;
-    my $base =
-	(CJK_UidIni  <= $u && $u <= CJK_UidF41)
-	    ? 0xFB40 : # CJK
-	(CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
-	 CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
-	    ? 0xFB80   # CJK ext.
-	    : 0xFBC0;  # others
-
-    my $aaaa = $base + ($u >> 15);
-    my $bbbb = ($u & 0x7FFF) | 0x8000;
-    return
-	pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
-	pack(VCE_TEMPLATE, NON_VAR, $bbbb,      0,      0, $u);
-}
-
-sub _derivCE_9 {
-    my $u = shift;
-    my $base =
-	(CJK_UidIni  <= $u && $u <= CJK_UidFin)
-	    ? 0xFB40 : # CJK
-	(CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
-	 CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
-	    ? 0xFB80   # CJK ext.
-	    : 0xFBC0;  # others
-
-    my $aaaa = $base + ($u >> 15);
-    my $bbbb = ($u & 0x7FFF) | 0x8000;
-    return
-	pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
-	pack(VCE_TEMPLATE, NON_VAR, $bbbb,      0,      0, $u);
-}
-
-sub _derivCE_8 {
-    my $code = shift;
-    my $aaaa =  0xFF80 + ($code >> 15);
-    my $bbbb = ($code & 0x7FFF) | 0x8000;
-    return
-	pack(VCE_TEMPLATE, NON_VAR, $aaaa, 2, 1, $code),
-	pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $code);
-}
-
-sub _uideoCE_8 {
-    my $u = shift;
-    return pack(VCE_TEMPLATE, NON_VAR, $u, Min2Wt, Min3Wt, $u);
-}
-
-sub _isUIdeo {
-    my ($u, $uca_vers) = @_;
-    return(
-	(CJK_UidIni <= $u &&
-	    ($uca_vers >= 14 ? ( $u <= CJK_UidF41) : ($u <= CJK_UidFin)))
-		||
-	(CJK_ExtAIni <= $u && $u <= CJK_ExtAFin)
-		||
-	(CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
-    );
-}
-
-
-sub getWtHangulTerm {
-    my $self = shift;
-    return _varCE($self->{variable},
-	pack(VCE_TEMPLATE, NON_VAR, $self->{hangul_terminator}, 0,0,0));
-}
-
-
-##
-## "hhhh hhhh hhhh" to (dddd, dddd, dddd)
-##
-sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
-
-#
-# $code *must* be in Hangul syllable.
-# Check it before you enter here.
-#
-sub _decompHangul {
-    my $code = shift;
-    my $si = $code - Hangul_SBase;
-    my $li = int( $si / Hangul_NCount);
-    my $vi = int(($si % Hangul_NCount) / Hangul_TCount);
-    my $ti =      $si % Hangul_TCount;
-    return (
-	Hangul_LBase + $li,
-	Hangul_VBase + $vi,
-	$ti ? (Hangul_TBase + $ti) : (),
-    );
-}
-
-sub _isIllegal {
-    my $code = shift;
-    return ! defined $code                      # removed
-	|| ($code < 0 || 0x10FFFF < $code)      # out of range
-	|| (($code & 0xFFFE) == 0xFFFE)         # ??FFF[EF] (cf. utf8.c)
-	|| (0xD800 <= $code && $code <= 0xDFFF) # unpaired surrogates
-	|| (0xFDD0 <= $code && $code <= 0xFDEF) # other non-characters
-    ;
-}
-
-# Hangul Syllable Type
-sub getHST {
-    my $u = shift;
-    return
-	Hangul_LIni <= $u && $u <= Hangul_LFin || $u == Hangul_LFill ? "L" :
-	Hangul_VIni <= $u && $u <= Hangul_VFin	     ? "V" :
-	Hangul_TIni <= $u && $u <= Hangul_TFin	     ? "T" :
-	Hangul_SIni <= $u && $u <= Hangul_SFin ?
-	    ($u - Hangul_SBase) % Hangul_TCount ? "LVT" : "LV" : "";
-}
-
-
 ##
 ## bool _nonIgnorAtLevel(arrayref weights, int level)
 ##
@@ -894,7 +776,7 @@ sub _eqArray($$$)
     my $lev = shift;
 
     for my $g (0..@$substr-1){
-	# Do the $g'th graphemes have the same number of AV weigths?
+	# Do the $g'th graphemes have the same number of AV weights?
 	return if @{ $source->[$g] } != @{ $substr->[$g] };
 
 	for my $w (0..@{ $substr->[$g] }-1) {
@@ -908,9 +790,9 @@ sub _eqArray($$$)
 
 ##
 ## (int position, int length)
-## int position = index(string, substring, position, [undoc'ed grobal])
+## int position = index(string, substring, position, [undoc'ed global])
 ##
-## With "grobal" (only for the list context),
+## With "global" (only for the list context),
 ##  returns list of arrayref[position, length].
 ##
 sub index
@@ -921,7 +803,7 @@ sub index
     my $subE = $self->splitEnt(shift);
     my $pos  = @_ ? shift : 0;
        $pos  = 0 if $pos < 0;
-    my $grob = shift;
+    my $glob = shift;
 
     my $lev  = $self->{level};
     my $v2i  = $self->{UCA_Version} >= 9 &&
@@ -929,7 +811,7 @@ sub index
 
     if (! @$subE) {
 	my $temp = $pos <= 0 ? 0 : $len <= $pos ? $len : $pos;
-	return $grob
+	return $glob
 	    ? map([$_, 0], $temp..$len)
 	    : wantarray ? ($temp,0) : $temp;
     }
@@ -961,9 +843,10 @@ sub index
 
 	if (@subWt && !$var && !$wt[0]) {
 	    push @{ $subWt[-1] }, \@wt if $to_be_pushed;
-	} else {
+	} elsif ($to_be_pushed) {
 	    push @subWt, [ \@wt ];
 	}
+	# else ===> skipped
     }
 
     my $count = 0;
@@ -1014,7 +897,7 @@ sub index
 			_eqArray(\@strWt, \@subWt, $lev)) {
 		my $temp = $iniPos[0] + $pos;
 
-		if ($grob) {
+		if ($glob) {
 		    push @g_ret, [$temp, $finPos[$#subWt] - $iniPos[0]];
 		    splice @strWt,  0, $#subWt;
 		    splice @iniPos, 0, $#subWt;
@@ -1032,7 +915,7 @@ sub index
 	}
     }
 
-    return $grob
+    return $glob
 	? @g_ret
 	: wantarray ? () : NOMATCHPOS;
 }
@@ -1130,8 +1013,10 @@ Unicode::Collate - Unicode Collation Algorithm
   #compare
   $result = $Collator->cmp($a, $b); # returns 1, 0, or -1.
 
-  # If %tailoring is false (i.e. empty),
-  # $Collator should do the default collation.
+B<Note:> Strings in C<@not_sorted>, C<$a> and C<$b> are interpreted
+according to Perl's Unicode support. See L<perlunicode>,
+L<perluniintro>, L<perlunitut>, L<perlunifaq>, L<utf8>.
+Otherwise you can use C<preprocess> or should decode them before.
 
 =head1 DESCRIPTION
 
@@ -1140,16 +1025,18 @@ This module is an implementation of Unicode Technical Standard #10
 
 =head2 Constructor and Tailoring
 
-The C<new> method returns a collator object.
+The C<new> method returns a collator object. If new() is called
+with no parameters, the collator should do the default collation.
 
    $Collator = Unicode::Collate->new(
       UCA_Version => $UCA_Version,
-      alternate => $alternate, # deprecated: use of 'variable' is recommended.
+      alternate => $alternate, # alias for 'variable'
       backwards => $levelNumber, # or \@levelNumbers
       entry => $element,
       hangul_terminator => $term_primary_weight,
       ignoreName => qr/$ignoreName/,
       ignoreChar => qr/$ignoreChar/,
+      ignore_level2 => $bool,
       katakana_before_hiragana => $bool,
       level => $collationLevel,
       normalization  => $normalization_form,
@@ -1157,6 +1044,8 @@ The C<new> method returns a collator object.
       overrideHangul => \&overrideHangul,
       preprocess => \&preprocess,
       rearrange => \@charList,
+      rewrite => \&rewrite,
+      suppress => \@charList,
       table => $filename,
       undefName => qr/$undefName/,
       undefChar => qr/$undefChar/,
@@ -1168,21 +1057,38 @@ The C<new> method returns a collator object.
 
 =item UCA_Version
 
-If the tracking version number of UCA is given,
-behavior of that tracking version is emulated on collating.
+If the revision (previously "tracking version") number of UCA is given,
+behavior of that revision is emulated on collating.
 If omitted, the return value of C<UCA_Version()> is used.
-C<UCA_Version()> should return the latest tracking version supported.
 
-The supported tracking version: 8, 9, 11, or 14.
+The following revisions are supported.  The default is 24.
 
      UCA       Unicode Standard         DUCET (@version)
-     ---------------------------------------------------
+   -------------------------------------------------------
       8              3.1                3.0.1 (3.0.1d9)
       9     3.1 with Corrigendum 3      3.1.1 (3.1.1)
      11              4.0                4.0.0 (4.0.0)
      14             4.1.0               4.1.0 (4.1.0)
+     16              5.0                5.0.0 (5.0.0)
+     18             5.1.0               5.1.0 (5.1.0)
+     20             5.2.0               5.2.0 (5.2.0)
+     22             6.0.0               6.0.0 (6.0.0)
+     24             6.1.0               6.1.0 (6.1.0)
 
-Note: Recent UTS #10 renames "Tracking Version" to "Revision."
+* Noncharacters (e.g. U+FFFF) are not ignored, and can be overridden
+since C<UCA_Version> 22.
+
+* Fully ignorable characters were ignored, and would not interrupt
+contractions with C<UCA_Version> 9 and 11.
+
+* Treatment of ignorables after variables and some behaviors
+were changed at C<UCA_Version> 9.
+
+* Characters regarded as CJK unified ideographs (cf. C<overrideCJK>)
+depend on C<UCA_Version>.
+
+* Many hangul jamo are assigned at C<UCA_Version> 20, that will affect
+C<hangul_terminator>.
 
 =item alternate
 
@@ -1198,7 +1104,8 @@ as an alias for C<variable>.
      backwards => $levelNumber or \@levelNumbers
 
 Weights in reverse order; ex. level 2 (diacritic ordering) in French.
-If omitted, forwards at all the levels.
+If omitted (or C<$levelNumber> is C<undef> or C<\@levelNumbers> is C<[]>),
+forwards at all the levels.
 
 =item entry
 
@@ -1206,7 +1113,7 @@ If omitted, forwards at all the levels.
 
 If the same character (or a sequence of characters) exists
 in the collation element table through C<table>,
-mapping to collation elements is overrided.
+mapping to collation elements is overridden.
 If it does not exist, the mapping is defined additionally.
 
     entry => <<'ENTRY', # for DUCET v4.0.0 (allkeys-4.0.0.txt)
@@ -1293,6 +1200,18 @@ will be ignored.
 E.g. when 'a' and 'e' are ignorable,
 'element' is equal to 'lament' (or 'lmnt').
 
+=item ignore_level2
+
+-- see 5.1 Parametric Tailoring, UTS #10.
+
+By default, case-sensitive comparison (that is level 3 difference)
+won't ignore accents (that is level 2 difference).
+
+If the parameter is made true, accents (and other primary ignorable
+characters) are ignored, even though cases are taken into account.
+
+B<NOTE>: C<level> should be 3 or greater.
+
 =item katakana_before_hiragana
 
 -- see 7.3.1 Tertiary Weight Table, UTS #10.
@@ -1353,7 +1272,7 @@ B<is not> equivalent to C<(normalization =E<gt> 'NFD')>.
 
 In the case of C<(normalization =E<gt> "prenormalized")>,
 any normalization is not performed, but
-non-contiguous contractions with combining characters are performed.
+discontiguous contractions with combining characters are performed.
 Therefore
 C<(normalization =E<gt> 'prenormalized', preprocess =E<gt> sub { NFD(shift) })>
 B<is> equivalent to C<(normalization =E<gt> 'NFD')>.
@@ -1367,15 +1286,26 @@ B<Unicode::Normalize> is required (see also B<CAVEAT>).
 
 -- see 7.1 Derived Collation Elements, UTS #10.
 
-By default, CJK Unified Ideographs are ordered in Unicode codepoint order
-but C<CJK Unified Ideographs> (if C<UCA_Version> is 8 to 11, its range is
-C<U+4E00..U+9FA5>; if C<UCA_Version> is 14, its range is C<U+4E00..U+9FBB>)
-are lesser than C<CJK Unified Ideographs Extension> (its range is
-C<U+3400..U+4DB5> and C<U+20000..U+2A6D6>).
+By default, CJK unified ideographs are ordered in Unicode codepoint
+order, but those in the CJK Unified Ideographs block are lesser than
+those in the CJK Unified Ideographs Extension A etc.
+
+    In the CJK Unified Ideographs block:
+    U+4E00..U+9FA5 if UCA_Version is 8, 9 or 11.
+    U+4E00..U+9FBB if UCA_Version is 14 or 16.
+    U+4E00..U+9FC3 if UCA_Version is 18.
+    U+4E00..U+9FCB if UCA_Version is 20 or 22.
+    U+4E00..U+9FCC if UCA_Version is 24.
 
-Through C<overrideCJK>, ordering of CJK Unified Ideographs can be overrided.
+    In the CJK Unified Ideographs Extension blocks:
+    Ext.A (U+3400..U+4DB5) and Ext.B (U+20000..U+2A6D6) in any UCA_Version.
+    Ext.C (U+2A700..U+2B734) if UCA_Version is 20 or greater.
+    Ext.D (U+2B740..U+2B81D) if UCA_Version is 22 or greater.
 
-ex. CJK Unified Ideographs in the JIS code point order.
+Through C<overrideCJK>, ordering of CJK unified ideographs (including
+extensions) can be overridden.
+
+ex. CJK unified ideographs in the JIS code point order.
 
   overrideCJK => sub {
       my $u = shift;             # get a Unicode codepoint
@@ -1385,43 +1315,67 @@ ex. CJK Unified Ideographs in the JIS code point order.
       [ $n, 0x20, 0x2, $u ];     # return the collation element
   },
 
-ex. ignores all CJK Unified Ideographs.
+The return value may be an arrayref of 1st to 4th weights as shown
+above. The return value may be an integer as the primary weight
+as shown below.  If C<undef> is returned, the default derived
+collation element will be used.
+
+  overrideCJK => sub {
+      my $u = shift;             # get a Unicode codepoint
+      my $b = pack('n', $u);     # to UTF-16BE
+      my $s = your_unicode_to_sjis_converter($b); # convert
+      my $n = unpack('n', $s);   # convert sjis to short
+      return $n;                 # return the primary weight
+  },
+
+The return value may be a list containing zero or more of
+an arrayref, an integer, or C<undef>.
+
+ex. ignores all CJK unified ideographs.
 
   overrideCJK => sub {()}, # CODEREF returning empty list
 
    # where ->eq("Pe\x{4E00}rl", "Perl") is true
-   # as U+4E00 is a CJK Unified Ideograph and to be ignorable.
+   # as U+4E00 is a CJK unified ideograph and to be ignorable.
 
 If C<undef> is passed explicitly as the value for this key,
-weights for CJK Unified Ideographs are treated as undefined.
-But assignment of weight for CJK Unified Ideographs
-in table or C<entry> is still valid.
+weights for CJK unified ideographs are treated as undefined.
+But assignment of weight for CJK unified ideographs
+in C<table> or C<entry> is still valid.
+
+B<Note:> In addition to them, 12 CJK compatibility ideographs (C<U+FA0E>,
+C<U+FA0F>, C<U+FA11>, C<U+FA13>, C<U+FA14>, C<U+FA1F>, C<U+FA21>, C<U+FA23>,
+C<U+FA24>, C<U+FA27>, C<U+FA28>, C<U+FA29>) are also treated as CJK unified
+ideographs. But they can't be overridden via C<overrideCJK> when you use
+DUCET, as the table includes weights for them. C<table> or C<entry> has
+priority over C<overrideCJK>.
 
 =item overrideHangul
 
 -- see 7.1 Derived Collation Elements, UTS #10.
 
-By default, Hangul Syllables are decomposed into Hangul Jamo,
+By default, Hangul syllables are decomposed into Hangul Jamo,
 even if C<(normalization =E<gt> undef)>.
-But the mapping of Hangul Syllables may be overrided.
+But the mapping of Hangul syllables may be overridden.
 
 This parameter works like C<overrideCJK>, so see there for examples.
 
-If you want to override the mapping of Hangul Syllables,
-NFD, NFKD, and FCD are not appropriate,
-since they will decompose Hangul Syllables before overriding.
+If you want to override the mapping of Hangul syllables,
+NFD and NFKD are not appropriate, since NFD and NFKD will decompose
+Hangul syllables before overriding. FCD may decompose Hangul syllables
+as the case may be.
 
 If C<undef> is passed explicitly as the value for this key,
-weight for Hangul Syllables is treated as undefined
+weight for Hangul syllables is treated as undefined
 without decomposition into Hangul Jamo.
-But definition of weight for Hangul Syllables
-in table or C<entry> is still valid.
+But definition of weight for Hangul syllables
+in C<table> or C<entry> is still valid.
 
 =item preprocess
 
 -- see 5.1 Preprocessing, UTS #10.
 
-If specified, the coderef is used to preprocess
+If specified, the coderef is used to preprocess each string
 before the formation of sort keys.
 
 ex. dropping English articles, such as "a" or "the".
@@ -1435,6 +1389,17 @@ Then, "the pen" is before "a pencil".
 
 C<preprocess> is performed before C<normalization> (if defined).
 
+ex. decoding strings in a legacy encoding such as shift-jis:
+
+    $sjis_collator = Unicode::Collate->new(
+        preprocess => \&your_shiftjis_to_unicode_decoder,
+    );
+    @result = $sjis_collator->sort(@shiftjis_strings);
+
+B<Note:> Strings returned from the coderef will be interpreted
+according to Perl's Unicode support. See L<perlunicode>,
+L<perluniintro>, L<perlunitut>, L<perlunifaq>, L<utf8>.
+
 =item rearrange
 
 -- see 3.1.3 Rearrangement, UTS #10.
@@ -1447,11 +1412,50 @@ If C<UCA_Version> is equal to or lesser than 11, default is:
 If you want to disallow any rearrangement, pass C<undef> or C<[]>
 (a reference to empty list) as the value for this key.
 
-If C<UCA_Version> is equal to 14, default is C<[]> (i.e. no rearrangement).
+If C<UCA_Version> is equal to or greater than 14, default is C<[]>
+(i.e. no rearrangement).
 
 B<According to the version 9 of UCA, this parameter shall not be used;
 but it is not warned at present.>
 
+=item rewrite
+
+If specified, the coderef is used to rewrite lines in C<table> or C<entry>.
+The coderef will get each line, and then should return a rewritten line
+according to the UCA file format.
+If the coderef returns an empty line, the line will be skipped.
+
+e.g. any primary ignorable characters into tertiary ignorable:
+
+    rewrite => sub {
+        my $line = shift;
+        $line =~ s/\[\.0000\..{4}\..{4}\./[.0000.0000.0000./g;
+        return $line;
+    },
+
+This example shows rewriting weights. C<rewrite> is allowed to
+affect code points, weights, and the name.
+
+B<NOTE>: C<table> is available to use another table file;
+preparing a modified table once would be more efficient than
+rewriting lines on reading an unmodified table every time.
+
+=item suppress
+
+-- see suppress contractions in 5.14.11 Special-Purpose Commands,
+UTS #35 (LDML).
+
+Contractions beginning with the specified characters are suppressed,
+even if those contractions are defined in C<table>.
+
+An example for Russian and some languages using the Cyrillic script:
+
+    suppress => [0x0400..0x0417, 0x041A..0x0437, 0x043A..0x045F],
+
+where 0x0400 stands for C<U+0400>, CYRILLIC CAPITAL LETTER IE WITH GRAVE.
+
+B<NOTE>: Contractions via C<entry> are not be suppressed.
+
 =item table
 
 -- see 3.2 Default Unicode Collation Element Table, UTS #10.
@@ -1466,6 +1470,12 @@ By default, F<allkeys.txt> (as the filename of DUCET) is used.
 If you will prepare your own table file, any name other than F<allkeys.txt>
 may be better to avoid namespace conflict.
 
+B<NOTE>: When XSUB is used, the DUCET is compiled on building this
+module, and it may save time at the run time.
+Explicit saying C<table =E<gt> 'allkeys.txt'> (or using another table),
+or using C<ignoreChar>, C<ignoreName>, C<undefChar>, C<undefName> or
+C<rewrite> will prevent this module from using the compiled DUCET.
+
 If C<undef> is passed explicitly as the value for this key,
 no file is read (but you can define collation elements via C<entry>).
 
@@ -1493,7 +1503,7 @@ specified as a comment (following C<#>) on each line.
 
 -- see 6.3.4 Reducing the Repertoire, UTS #10.
 
-Undefines the collation element as if it were unassigned in the table.
+Undefines the collation element as if it were unassigned in the C<table>.
 This reduces the size of the table.
 If an unassigned character appears in the string to be collated,
 the sort key is made from its codepoint
@@ -1529,9 +1539,9 @@ this parameter doesn't work validly.
 
 -- see 3.2.2 Variable Weighting, UTS #10.
 
-This key allows to variable weighting for variable collation elements,
+This key allows for variable weighting of variable collation elements,
 which are marked with an ASTERISK in the table
-(NOTE: Many punction marks and symbols are variable in F<allkeys.txt>).
+(NOTE: Many punctuation marks and symbols are variable in F<allkeys.txt>).
 
    variable => 'blanked', 'non-ignorable', 'shifted', or 'shift-trimmed'.
 
@@ -1619,17 +1629,19 @@ If C<UCA_Version> is 8, the output is slightly different.
 
 =head2 Methods for Searching
 
-B<DISCLAIMER:> If C<preprocess> or C<normalization> parameter is true
-for C<$Collator>, calling these methods (C<index>, C<match>, C<gmatch>,
-C<subst>, C<gsubst>) is croaked,
-as the position and the length might differ
-from those on the specified string.
-(And C<rearrange> and C<hangul_terminator> parameters are neglected.)
-
 The C<match>, C<gmatch>, C<subst>, C<gsubst> methods work
 like C<m//>, C<m//g>, C<s///>, C<s///g>, respectively,
 but they are not aware of any pattern, but only a literal substring.
 
+B<DISCLAIMER:> If C<preprocess> or C<normalization> parameter is true
+for C<$Collator>, calling these methods (C<index>, C<match>, C<gmatch>,
+C<subst>, C<gsubst>) is croaked, as the position and the length might
+differ from those on the specified string.
+
+C<rearrange> and C<hangul_terminator> parameters are neglected.
+C<katakana_before_hiragana> and C<upper_before_lower> don't affect
+matching and searching, as it doesn't matter whether greater or lesser.
+
 =over 4
 
 =item C<$position = $Collator-E<gt>index($string, $substring[, $position])>
@@ -1701,7 +1713,7 @@ returns an empty list.
 
 If C<$substring> matches a part of C<$string>,
 the first occurrence of the matching part is replaced by C<$replacement>
-(C<$string> is modified) and return C<$count> (always equals to C<1>).
+(C<$string> is modified) and C<$count> (always equals to C<1>) is returned.
 
 C<$replacement> can be a C<CODEREF>,
 taking the matching part as an argument,
@@ -1711,8 +1723,8 @@ and returning a string to replace the matching part
 =item C<$count = $Collator-E<gt>gsubst($string, $substring, $replacement)>
 
 If C<$substring> matches a part of C<$string>,
-all the occurrences of the matching part is replaced by C<$replacement>
-(C<$string> is modified) and return C<$count>.
+all the occurrences of the matching part are replaced by C<$replacement>
+(C<$string> is modified) and C<$count> is returned.
 
 C<$replacement> can be a C<CODEREF>,
 taking the matching part as an argument,
@@ -1723,12 +1735,29 @@ e.g.
 
   my $Collator = Unicode::Collate->new( normalization => undef, level => 1 );
                                      # (normalization => undef) is REQUIRED.
-  my $str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L...";
+  my $str = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l...";
   $Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" });
 
-  # now $str is "<b>Camel</b> donkey zebra <b>came\x{301}l</b> <b>CAMEL</b> horse <b>cAm\0E\0L</b>...";
+  # now $str is "<b>Camel</b> donkey zebra <b>came\x{301}l</b> <b>CAMEL</b> horse <b>cam\0e\0l</b>...";
   # i.e., all the camels are made bold-faced.
 
+   Examples: levels and ignore_level2 - what does camel match?
+  ---------------------------------------------------------------------------
+   level  ignore_level2  |  camel  Camel  came\x{301}l  c-a-m-e-l  cam\0e\0l
+  -----------------------|---------------------------------------------------
+     1        false      |   yes    yes      yes          yes        yes
+     2        false      |   yes    yes      no           yes        yes
+     3        false      |   yes    no       no           yes        yes
+     4        false      |   yes    no       no           no         yes
+  -----------------------|---------------------------------------------------
+     1        true       |   yes    yes      yes          yes        yes
+     2        true       |   yes    yes      yes          yes        yes
+     3        true       |   yes    no       yes          yes        yes
+     4        true       |   yes    no       yes          no         yes
+  ---------------------------------------------------------------------------
+   note: if variable => non-ignorable, camel doesn't match c-a-m-e-l
+         at any level.
+
 =back
 
 =head2 Other Methods
@@ -1737,7 +1766,9 @@ e.g.
 
 =item C<%old_tailoring = $Collator-E<gt>change(%new_tailoring)>
 
-Change the value of specified keys and returns the changed part.
+=item C<$modified_collator = $Collator-E<gt>change(%new_tailoring)>
+
+Changes the value of specified keys and returns the changed part.
 
     $Collator = Unicode::Collate->new(level => 4);
 
@@ -1772,11 +1803,13 @@ returns C<"unknown">.
 
 =item C<UCA_Version()>
 
-Returns the tracking version number of UTS #10 this module consults.
+Returns the revision number of UTS #10 this module consults,
+that should correspond with the DUCET incorporated.
 
 =item C<Base_Unicode_Version()>
 
-Returns the version number of UTS #10 this module consults.
+Returns the version number of UTS #10 this module consults,
+that should correspond with the DUCET incorporated.
 
 =back
 
@@ -1835,15 +1868,15 @@ B<Unicode::Normalize is required to try The Conformance Test.>
 =head1 AUTHOR, COPYRIGHT AND LICENSE
 
 The Unicode::Collate module for perl was written by SADAHIRO Tomoyuki,
-<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2005,
+<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2012,
 SADAHIRO Tomoyuki. Japan. All rights reserved.
 
 This module is free software; you can redistribute it and/or
 modify it under the same terms as Perl itself.
 
-The file Unicode/Collate/allkeys.txt was copied directly
-from L<http://www.unicode.org/Public/UCA/4.1.0/allkeys.txt>.
-This file is Copyright (c) 1991-2005 Unicode, Inc. All rights reserved.
+The file Unicode/Collate/allkeys.txt was copied verbatim
+from L<http://www.unicode.org/Public/UCA/6.1.0/allkeys.txt>.
+For this file, Copyright (c) 2001-2011 Unicode, Inc.
 Distributed under the Terms of Use in L<http://www.unicode.org/copyright.html>.
 
 =head1 SEE ALSO
@@ -1872,6 +1905,10 @@ L<http://www.unicode.org/Public/UNIDATA/HangulSyllableType.txt>
 
 L<http://www.unicode.org/reports/tr15/>
 
+=item Unicode Locale Data Markup Language (LDML) - UTS #35
+
+L<http://www.unicode.org/reports/tr35/>
+
 =back
 
 =cut
author	sthen <sthen@openbsd.org>	2013-03-25 20:06:16 +0000
committer	sthen <sthen@openbsd.org>	2013-03-25 20:06:16 +0000
commit	898184e3e61f9129feb5978fad5a8c6865f00b92 (patch)
tree	56f32aefc1eed60b534611007c7856f82697a205 /gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.pm
parent	PGSHIFT -> PAGE_SHIFT (diff)
download	wireguard-openbsd-898184e3e61f9129feb5978fad5a8c6865f00b92.tar.xz wireguard-openbsd-898184e3e61f9129feb5978fad5a8c6865f00b92.zip