summaryrefslogtreecommitdiffstats
path: root/gnu/usr.bin/perl/cpan/Unicode-Normalize
diff options
context:
space:
mode:
authorsthen <sthen@openbsd.org>2013-03-25 20:06:16 +0000
committersthen <sthen@openbsd.org>2013-03-25 20:06:16 +0000
commit898184e3e61f9129feb5978fad5a8c6865f00b92 (patch)
tree56f32aefc1eed60b534611007c7856f82697a205 /gnu/usr.bin/perl/cpan/Unicode-Normalize
parentPGSHIFT -> PAGE_SHIFT (diff)
downloadwireguard-openbsd-898184e3e61f9129feb5978fad5a8c6865f00b92.tar.xz
wireguard-openbsd-898184e3e61f9129feb5978fad5a8c6865f00b92.zip
import perl 5.16.3 from CPAN - worked on by Andrew Fresh and myself
Diffstat (limited to 'gnu/usr.bin/perl/cpan/Unicode-Normalize')
-rw-r--r--gnu/usr.bin/perl/cpan/Unicode-Normalize/Changes46
-rw-r--r--gnu/usr.bin/perl/cpan/Unicode-Normalize/Makefile.PL4
-rw-r--r--gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.pm140
-rw-r--r--gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs188
-rw-r--r--gnu/usr.bin/perl/cpan/Unicode-Normalize/README5
-rw-r--r--gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader182
-rwxr-xr-xgnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t28
-rwxr-xr-xgnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t16
-rwxr-xr-xgnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t60
-rwxr-xr-xgnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t15
-rwxr-xr-xgnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t27
-rw-r--r--gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial1.t117
-rw-r--r--gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial2.t113
-rwxr-xr-xgnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t29
-rwxr-xr-xgnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t85
-rwxr-xr-xgnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t50
-rwxr-xr-xgnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t14
17 files changed, 856 insertions, 263 deletions
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Changes b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Changes
index e9cb3918a5e..88df63cb4d7 100644
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Changes
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Changes
@@ -1,12 +1,51 @@
Revision history for Perl extension Unicode::Normalize.
+1.14 Sat Mar 10 13:34:53 2012
+ - avoid "use Test".
+
+1.13 Mon Jul 25 21:07:49 2011
+ - tried fixing the tarball with world writable files.
+ ( http://www.perlmonks.org/?node_id=731935 )
+
+1.12 Mon May 16 23:36:07 2011
+ - removed Normalize/CompExcl.pl and coded Composition Exclusions;
+ how to load CompExcl.pl seems not good, but I'm not sure...
+
+1.11 Sun May 15 20:31:09 2011
+ - As perl 5.14.0 has removed unicore/CompositionExclusions.txt
+ from the installation, Normalize/CompExcl.pl in this distribution
+ is used instead. (see [rt.cpan.org #68106])
+
+1.10 Sun Jan 16 21:00:34 2011
+ - XSUB: reorder() and compose() treat with growing the string.
+ - XSUB: provision against UTF8_ALLOW_* flags to be undefined in future.
+ - doc: about perl 5.13.x and Unicode 6.0.0
+ - doc and comments: [perl #81876] Fix typos by Peter J. Acklam.
+
+1.07 Mon Sep 20 20:20:02 2010
+ - doc: about perl 5.12.x and Unicode 5.2.0
+ - test: prototype of normalize_partial() and cousins in proto.t.
+
+1.06 Thu Feb 11 16:19:54 2010
+ - mkheader/Pure Perl: fixed the internal _getHexArray() for perl 5.11.3
+ changes (Bug #53197).
+
+1.05 Mon Sep 28 21:43:17 2009
+ - normalize_partial() and NFX_partial(). { NFX =~ /^NFK?[CD]\z/ }
+ - added partial1.t for NFX_partial().
+ - added partial2.t for normalize_partial().
+
+1.04 Wed Sep 23 22:32:57 2009
+ - doc: splitOnLastStarter() since 0.24 is now documented.
+ - test: some new tests are added to split.t.
+
1.03 Sun Mar 29 12:56:23 2009
- mkheader: check if no composition needs growing the string.
- Makefile.PL: a tweak
1.02 Tue Jun 5 22:46:45 2007
- XSUB: mkheader, _U_stringify() - avoid unpack('C*') on unicode.
- - test: short.t removed - pure perl is not inapprotiate for test of
+ - test: short.t removed - pure perl is not appropriate for test of
unicode edge cases.
1.01 Tue Jun 13 22:01:53 2006
@@ -43,7 +82,7 @@ Revision history for Perl extension Unicode::Normalize.
- XSUB: even if string contains a malformed, "short" Unicode character,
decompose() and reorder() will be safe. Garbage will be no longer added.
- added null.t and short.t.
- - now truely added illegal.t (in 0.27, forgot to change MANIFEST).
+ - now truly added illegal.t (in 0.27, forgot to change MANIFEST).
0.27 Sun Nov 16 13:16:21 2003
- Illegal code points (surrogate and noncharacter) will be allowed
@@ -69,7 +108,8 @@ Revision history for Perl extension Unicode::Normalize.
script files, named "enableXS" and "disableXS".
(no longer <perl Makefile.PL xs> and <perl Makefile.PL noxs>.)
* simplified Makefile.PL.
- - added fcdc.t and split.t.
+ - added fcdc.t for FCD() and FCC().
+ - added split.t for splitOnLastStarter(): an undocumented function.
0.23 Sat Jun 28 20:38:10 2003
- bug fix: \0-terminate in compose() in XS.
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Makefile.PL b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Makefile.PL
index a04ca62b8f6..f0b94e74963 100644
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Makefile.PL
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Makefile.PL
@@ -12,7 +12,10 @@ if (-f "Normalize.xs") {
}
WriteMakefile(
+ 'AUTHOR' => 'SADAHIRO Tomoyuki <SADAHIRO@cpan.org>',
+ 'ABSTRACT' => 'Unicode Normalization Forms',
'INSTALLDIRS' => $] >= 5.007002 ? 'perl' : 'site',
+ 'LICENSE' => 'perl',
'NAME' => 'Unicode::Normalize',
'VERSION_FROM' => 'Normalize.pm', # finds $VERSION
'clean' => $clean,
@@ -25,7 +28,6 @@ WriteMakefile(
File::Copy => 0,
File::Spec => 0,
strict => 0,
- Test => 0,
warnings => 0,
},
);
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.pm b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.pm
index ad5ff82a836..c580f4852c6 100644
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.pm
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.pm
@@ -13,13 +13,9 @@ use Carp;
no warnings 'utf8';
-our $VERSION = '1.03';
+our $VERSION = '1.14';
our $PACKAGE = __PACKAGE__;
-require Exporter;
-require DynaLoader;
-
-our @ISA = qw(Exporter DynaLoader);
our @EXPORT = qw( NFC NFD NFKC NFKD );
our @EXPORT_OK = qw(
normalize decompose reorder compose
@@ -27,8 +23,8 @@ our @EXPORT_OK = qw(
getCanon getCompat getComposite getCombinClass
isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex
isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE
- FCD checkFCD FCC checkFCC composeContiguous
- splitOnLastStarter
+ FCD checkFCD FCC checkFCC composeContiguous splitOnLastStarter
+ normalize_partial NFC_partial NFD_partial NFKC_partial NFKD_partial
);
our %EXPORT_TAGS = (
all => [ @EXPORT, @EXPORT_OK ],
@@ -37,14 +33,8 @@ our %EXPORT_TAGS = (
fast => [ qw/FCD checkFCD FCC checkFCC composeContiguous/ ],
);
-######
-
-bootstrap Unicode::Normalize $VERSION;
-
-######
-
##
-## utilites for tests
+## utilities for tests
##
sub pack_U {
@@ -55,9 +45,18 @@ sub unpack_U {
return unpack('U*', shift(@_).pack('U*'));
}
+require Exporter;
+
+##### The above part is common to XS and PP #####
+
+our @ISA = qw(Exporter DynaLoader);
+require DynaLoader;
+bootstrap Unicode::Normalize $VERSION;
+
+##### The below part is common to XS and PP #####
##
-## normalization forms
+## normalize
##
sub FCD ($) {
@@ -83,9 +82,27 @@ sub normalize($$)
croak($PACKAGE."::normalize: invalid form name: $form");
}
+##
+## partial
+##
+
+sub normalize_partial ($$) {
+ if (exists $formNorm{$_[0]}) {
+ my $n = normalize($_[0], $_[1]);
+ my($p, $u) = splitOnLastStarter($n);
+ $_[1] = $u;
+ return $p;
+ }
+ croak($PACKAGE."::normalize_partial: invalid form name: $_[0]");
+}
+
+sub NFD_partial ($) { return normalize_partial('NFD', $_[0]) }
+sub NFC_partial ($) { return normalize_partial('NFC', $_[0]) }
+sub NFKD_partial($) { return normalize_partial('NFKD',$_[0]) }
+sub NFKC_partial($) { return normalize_partial('NFKC',$_[0]) }
##
-## quick check
+## check
##
our %formCheck = (
@@ -239,6 +256,82 @@ you can get its NFC/NFKC string, by saying
$NFC_string = compose($NFD_string);
$NFKC_string = compose($NFKD_string);
+=item C<($processed, $unprocessed) = splitOnLastStarter($normalized)>
+
+It returns two strings: the first one, C<$processed>, is a part
+before the last starter, and the second one, C<$unprocessed> is
+another part after the first part. A starter is a character having
+a combining class of zero (see UAX #15).
+
+Note that C<$processed> may be empty (when C<$normalized> contains no
+starter or starts with the last starter), and then C<$unprocessed>
+should be equal to the entire C<$normalized>.
+
+When you have a C<$normalized> string and an C<$unnormalized> string
+following it, a simple concatenation is wrong:
+
+ $concat = $normalized . normalize($form, $unnormalized); # wrong!
+
+Instead of it, do like this:
+
+ ($processed, $unprocessed) = splitOnLastStarter($normalized);
+ $concat = $processed . normalize($form, $unprocessed.$unnormalized);
+
+C<splitOnLastStarter()> should be called with a pre-normalized parameter
+C<$normalized>, that is in the same form as C<$form> you want.
+
+If you have an array of C<@string> that should be concatenated and then
+normalized, you can do like this:
+
+ my $result = "";
+ my $unproc = "";
+ foreach my $str (@string) {
+ $unproc .= $str;
+ my $n = normalize($form, $unproc);
+ my($p, $u) = splitOnLastStarter($n);
+ $result .= $p;
+ $unproc = $u;
+ }
+ $result .= $unproc;
+ # instead of normalize($form, join('', @string))
+
+=item C<$processed = normalize_partial($form, $unprocessed)>
+
+A wrapper for the combination of C<normalize()> and C<splitOnLastStarter()>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+If you have an array of C<@string> that should be concatenated and then
+normalized, you can do like this:
+
+ my $result = "";
+ my $unproc = "";
+ foreach my $str (@string) {
+ $unproc .= $str;
+ $result .= normalize_partial($form, $unproc);
+ }
+ $result .= $unproc;
+ # instead of normalize($form, join('', @string))
+
+=item C<$processed = NFD_partial($unprocessed)>
+
+It does like C<normalize_partial('NFD', $unprocessed)>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+=item C<$processed = NFC_partial($unprocessed)>
+
+It does like C<normalize_partial('NFC', $unprocessed)>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+=item C<$processed = NFKD_partial($unprocessed)>
+
+It does like C<normalize_partial('NFKD', $unprocessed)>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+=item C<$processed = NFKC_partial($unprocessed)>
+
+It does like C<normalize_partial('NFKC', $unprocessed)>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
=back
=head2 Quick Check
@@ -321,15 +414,15 @@ while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC.
If you want to check exactly, compare the string with its NFC/NFKC/FCC.
if ($string eq NFC($string)) {
- # $string is exactly normalized in NFC;
+ # $string is exactly normalized in NFC;
} else {
- # $string is not normalized in NFC;
+ # $string is not normalized in NFC;
}
if ($string eq NFKC($string)) {
- # $string is exactly normalized in NFKC;
+ # $string is exactly normalized in NFKC;
} else {
- # $string is not normalized in NFKC;
+ # $string is not normalized in NFKC;
}
=head2 Character Data
@@ -454,7 +547,10 @@ normalization implemented by this module depends on your perl's version.
5.8.4-5.8.6 4.0.1 (normalization is same as 4.0.0)
5.8.7-5.8.8 4.1.0
5.10.0 5.0.0
- 5.8.9 5.1.0
+ 5.8.9, 5.10.1 5.1.0
+ 5.12.0-5.12.3 5.2.0
+ 5.14.0 6.0.0
+ 5.16.0 (to be) 6.1.0
=item Correction of decomposition mapping
@@ -482,7 +578,7 @@ lower than 4.1.0.
SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
-Copyright(C) 2001-2007, SADAHIRO Tomoyuki. Japan. All rights reserved.
+Copyright(C) 2001-2012, SADAHIRO Tomoyuki. Japan. All rights reserved.
This module is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs
index f4bbca7907a..b760dff0b3b 100644
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs
@@ -17,32 +17,43 @@
/* Perl 5.6.1 ? */
#ifndef utf8n_to_uvuni
-#define utf8n_to_uvuni utf8_to_uv
+#define utf8n_to_uvuni utf8_to_uv
#endif /* utf8n_to_uvuni */
/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
-#ifdef UTF8_ALLOW_BOM
-#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FFFF)
-#else
-#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF)
-#endif
+#ifndef UTF8_ALLOW_BOM
+#define UTF8_ALLOW_BOM (0)
+#endif /* UTF8_ALLOW_BOM */
+
+#ifndef UTF8_ALLOW_SURROGATE
+#define UTF8_ALLOW_SURROGATE (0)
+#endif /* UTF8_ALLOW_SURROGATE */
+
+#ifndef UTF8_ALLOW_FE_FF
+#define UTF8_ALLOW_FE_FF (0)
+#endif /* UTF8_ALLOW_FE_FF */
+
+#ifndef UTF8_ALLOW_FFFF
+#define UTF8_ALLOW_FFFF (0)
+#endif /* UTF8_ALLOW_FFFF */
+
+#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF)
+
+/* check if the string buffer is enough before uvuni_to_utf8(). */
+/* dstart, d, and dlen should be defined outside before. */
+#define Renew_d_if_not_enough_to(need) STRLEN curlen = d - dstart; \
+ if (dlen < curlen + (need)) { \
+ dlen += (need); \
+ Renew(dstart, dlen+1, U8); \
+ d = dstart + curlen; \
+ }
-/* if utf8n_to_uvuni() sets retlen to 0 (?) */
+/* if utf8n_to_uvuni() sets retlen to 0 (if broken?) */
#define ErrRetlenIsZero "panic (Unicode::Normalize %s): zero-length character"
/* utf8_hop() hops back before start. Maybe broken UTF-8 */
#define ErrHopBeforeStart "panic (Unicode::Normalize): hopping before start"
-/* It should never happen as there is no instance in UTF-8 and UTF-EBCDIC.
- If Unicode would add a new composition of A + B to C
- where bytes::length(A) + bytes::length(B) < bytes::length(C),
- this code should be fixed.
- In this case, mkheader will prevent Unicode::Normalize from building. */
-#define ErrLongerThanSrc "panic (Unicode::Normalize %s): longer than source"
-
-/* uvuni_to_utf8 wants UTF8_MAXBYTES free bytes available */
-#define ErrTargetNotEnough "panic (Unicode::Normalize %s): target not enough"
-
/* At present, char > 0x10ffff are unaffected without complaint, right? */
#define VALID_UTF_MAX (0x10ffff)
#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
@@ -216,13 +227,7 @@ U8* pv_utf8_decompose(U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscompat)
p += retlen;
if (Hangul_IsS(uv)) {
- STRLEN cur = d - dstart;
-
- if (dlen < cur + UTF8_MAXLEN * 3) {
- dlen += UTF8_MAXLEN * 3;
- Renew(dstart, dlen+1, U8);
- d = dstart + cur;
- }
+ Renew_d_if_not_enough_to(UTF8_MAXLEN * 3)
d = pv_cat_decompHangul(d, uv);
}
else {
@@ -230,23 +235,12 @@ U8* pv_utf8_decompose(U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscompat)
if (r) {
STRLEN len = (STRLEN)strlen((char *)r);
- STRLEN cur = d - dstart;
- if (dlen < cur + len) {
- dlen += len;
- Renew(dstart, dlen+1, U8);
- d = dstart + cur;
- }
+ Renew_d_if_not_enough_to(len)
while (len--)
*d++ = *r++;
}
else {
- STRLEN cur = d - dstart;
-
- if (dlen < cur + UTF8_MAXLEN) {
- dlen += UTF8_MAXLEN;
- Renew(dstart, dlen+1, U8);
- d = dstart + cur;
- }
+ Renew_d_if_not_enough_to(UTF8_MAXLEN)
d = uvuni_to_utf8(d, uv);
}
}
@@ -256,11 +250,12 @@ U8* pv_utf8_decompose(U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscompat)
}
static
-U8* pv_utf8_reorder(U8* s, STRLEN slen, U8* d, STRLEN dlen)
+U8* pv_utf8_reorder(U8* s, STRLEN slen, U8** dp, STRLEN dlen)
{
U8* p = s;
U8* e = s + slen;
- U8* dend = d + dlen;
+ U8* dstart = *dp;
+ U8* d = dstart;
UNF_cc seq_ary[CC_SEQ_SIZE];
UNF_cc* seq_ptr = seq_ary; /* use array at the beginning */
@@ -268,10 +263,6 @@ U8* pv_utf8_reorder(U8* s, STRLEN slen, U8* d, STRLEN dlen)
STRLEN seq_max = CC_SEQ_SIZE;
STRLEN cc_pos = 0;
- if (dlen < slen || dlen < slen + UTF8_MAXLEN)
- croak(ErrTargetNotEnough, "reorder");
- dend -= UTF8_MAXLEN; /* safety */
-
while (p < e) {
U8 curCC;
STRLEN retlen;
@@ -306,6 +297,7 @@ U8* pv_utf8_reorder(U8* s, STRLEN slen, U8* d, STRLEN dlen)
continue;
}
+ /* output */
if (cc_pos) {
STRLEN i;
@@ -313,30 +305,30 @@ U8* pv_utf8_reorder(U8* s, STRLEN slen, U8* d, STRLEN dlen)
qsort((void*)seq_ptr, cc_pos, sizeof(UNF_cc), compare_cc);
for (i = 0; i < cc_pos; i++) {
+ Renew_d_if_not_enough_to(UTF8_MAXLEN)
d = uvuni_to_utf8(d, seq_ptr[i].uv);
- if (dend < d) /* real end is dend + UTF8_MAXLEN */
- croak(ErrLongerThanSrc, "reorder");
}
cc_pos = 0;
}
if (curCC == 0) {
+ Renew_d_if_not_enough_to(UTF8_MAXLEN)
d = uvuni_to_utf8(d, uv);
- if (dend < d) /* real end is dend + UTF8_MAXLEN */
- croak(ErrLongerThanSrc, "reorder");
}
}
if (seq_ext)
Safefree(seq_ext);
+ *dp = dstart;
return d;
}
static
-U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig)
+U8* pv_utf8_compose(U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscontig)
{
U8* p = s;
U8* e = s + slen;
- U8* dend = d + dlen;
+ U8* dstart = *dp;
+ U8* d = dstart;
UV uvS = 0; /* code point of the starter */
bool valid_uvS = FALSE; /* if FALSE, uvS isn't initialized yet */
@@ -348,10 +340,6 @@ U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig)
STRLEN seq_max = CC_SEQ_SIZE;
STRLEN cc_pos = 0;
- if (dlen < slen || dlen < slen + UTF8_MAXLEN)
- croak(ErrTargetNotEnough, "compose");
- dend -= UTF8_MAXLEN; /* safety */
-
while (p < e) {
U8 curCC;
STRLEN retlen;
@@ -370,9 +358,8 @@ U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig)
continue;
}
else {
+ Renew_d_if_not_enough_to(UTF8_MAXLEN)
d = uvuni_to_utf8(d, uv);
- if (dend < d) /* real end is dend + UTF8_MAXLEN */
- croak(ErrLongerThanSrc, "compose");
continue;
}
}
@@ -380,9 +367,9 @@ U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig)
bool composed;
/* blocked */
- if (iscontig && cc_pos || /* discontiguous combination */
- curCC != 0 && preCC == curCC || /* blocked by same CC */
- preCC > curCC) /* blocked by higher CC: revised D2 */
+ if ((iscontig && cc_pos) || /* discontiguous combination */
+ (curCC != 0 && preCC == curCC) || /* blocked by same CC */
+ (preCC > curCC)) /* blocked by higher CC: revised D2 */
composed = FALSE;
/* not blocked:
@@ -428,17 +415,18 @@ U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig)
}
}
- d = uvuni_to_utf8(d, uvS); /* starter (composed or not) */
- if (dend < d) /* real end is dend + UTF8_MAXLEN */
- croak(ErrLongerThanSrc, "compose");
+ /* output */
+ {
+ Renew_d_if_not_enough_to(UTF8_MAXLEN)
+ d = uvuni_to_utf8(d, uvS); /* starter (composed or not) */
+ }
if (cc_pos) {
STRLEN i;
for (i = 0; i < cc_pos; i++) {
+ Renew_d_if_not_enough_to(UTF8_MAXLEN)
d = uvuni_to_utf8(d, seq_ptr[i]);
- if (dend < d) /* real end is dend + UTF8_MAXLEN */
- croak(ErrLongerThanSrc, "compose");
}
cc_pos = 0;
}
@@ -447,6 +435,7 @@ U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig)
}
if (seq_ext)
Safefree(seq_ext);
+ *dp = dstart;
return d;
}
@@ -474,6 +463,7 @@ decompose(src, compat = &PL_sv_no)
OUTPUT:
RETVAL
+
SV*
reorder(src)
SV * src
@@ -485,16 +475,17 @@ reorder(src)
CODE:
s = (U8*)sv_2pvunicode(src,&slen);
dst = newSVpvn("", 0);
- dlen = slen + UTF8_MAXLEN;
- d = (U8*)SvGROW(dst,dlen+1);
+ dlen = slen;
+ New(0, d, dlen+1, U8);
+ dend = pv_utf8_reorder(s, slen, &d, dlen);
+ sv_setpvn(dst, (char *)d, dend - d);
SvUTF8_on(dst);
- dend = pv_utf8_reorder(s, slen, d, dlen);
- *dend = '\0';
- SvCUR_set(dst, dend - d);
+ Safefree(d);
RETVAL = dst;
OUTPUT:
RETVAL
+
SV*
compose(src)
SV * src
@@ -508,16 +499,17 @@ compose(src)
CODE:
s = (U8*)sv_2pvunicode(src,&slen);
dst = newSVpvn("", 0);
- dlen = slen + UTF8_MAXLEN;
- d = (U8*)SvGROW(dst,dlen+1);
+ dlen = slen;
+ New(0, d, dlen+1, U8);
+ dend = pv_utf8_compose(s, slen, &d, dlen, (bool)ix);
+ sv_setpvn(dst, (char *)d, dend - d);
SvUTF8_on(dst);
- dend = pv_utf8_compose(s, slen, d, dlen, (bool)ix);
- *dend = '\0';
- SvCUR_set(dst, dend - d);
+ Safefree(d);
RETVAL = dst;
OUTPUT:
RETVAL
+
SV*
NFD(src)
SV * src
@@ -529,29 +521,34 @@ NFD(src)
U8 *s, *t, *tend, *d, *dend;
STRLEN slen, tlen, dlen;
CODE:
- /* decompose */
s = (U8*)sv_2pvunicode(src,&slen);
+
+ /* decompose */
tlen = slen;
New(0, t, tlen+1, U8);
- tend = pv_utf8_decompose(s, slen, &t, tlen, (bool)ix);
+ tend = pv_utf8_decompose(s, slen, &t, tlen, (bool)(ix==1));
*tend = '\0';
- tlen = tend - t; /* no longer know real tlen */
+ tlen = tend - t; /* no longer know real size of t */
/* reorder */
- dst = newSVpvn("", 0);
- dlen = tlen + UTF8_MAXLEN;
- d = (U8*)SvGROW(dst,dlen+1);
- SvUTF8_on(dst);
- dend = pv_utf8_reorder(t, tlen, d, dlen);
+ dlen = tlen;
+ New(0, d, dlen+1, U8);
+ dend = pv_utf8_reorder(t, tlen, &d, dlen);
*dend = '\0';
- SvCUR_set(dst, dend - d);
+ dlen = dend - d; /* no longer know real size of d */
/* return */
+ dst = newSVpvn("", 0);
+ sv_setpvn(dst, (char *)d, dlen);
+ SvUTF8_on(dst);
+
Safefree(t);
+ Safefree(d);
RETVAL = dst;
OUTPUT:
RETVAL
+
SV*
NFC(src)
SV * src
@@ -564,37 +561,42 @@ NFC(src)
U8 *s, *t, *tend, *u, *uend, *d, *dend;
STRLEN slen, tlen, ulen, dlen;
CODE:
- /* decompose */
s = (U8*)sv_2pvunicode(src,&slen);
+
+ /* decompose */
tlen = slen;
New(0, t, tlen+1, U8);
tend = pv_utf8_decompose(s, slen, &t, tlen, (bool)(ix==1));
*tend = '\0';
- tlen = tend - t; /* no longer know real tlen */
+ tlen = tend - t; /* no longer know real size of t */
/* reorder */
- ulen = tlen + UTF8_MAXLEN;
+ ulen = tlen;
New(0, u, ulen+1, U8);
- uend = pv_utf8_reorder(t, tlen, u, ulen);
+ uend = pv_utf8_reorder(t, tlen, &u, ulen);
*uend = '\0';
- ulen = uend - u;
+ ulen = uend - u; /* no longer know real size of u */
/* compose */
- dst = newSVpvn("", 0);
- dlen = ulen + UTF8_MAXLEN;
- d = (U8*)SvGROW(dst,dlen+1);
- SvUTF8_on(dst);
- dend = pv_utf8_compose(u, ulen, d, dlen, (bool)(ix==2));
+ dlen = ulen;
+ New(0, d, dlen+1, U8);
+ dend = pv_utf8_compose(u, ulen, &d, dlen, (bool)(ix==2));
*dend = '\0';
- SvCUR_set(dst, dend - d);
+ dlen = dend - d; /* no longer know real size of d */
/* return */
+ dst = newSVpvn("", 0);
+ sv_setpvn(dst, (char *)d, dlen);
+ SvUTF8_on(dst);
+
Safefree(t);
Safefree(u);
+ Safefree(d);
RETVAL = dst;
OUTPUT:
RETVAL
+
SV*
checkNFD(src)
SV * src
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/README b/gnu/usr.bin/perl/cpan/Unicode-Normalize/README
index 8a5390cc789..958df4e1bd8 100644
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/README
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/README
@@ -1,4 +1,4 @@
-Unicode/Normalize version 1.03
+Unicode/Normalize version 1.14
===================================
Unicode::Normalize - Unicode Normalization Forms
@@ -62,7 +62,6 @@ which are included in recent perl core distributions.
- unicore/CombiningClass.pl (or unicode/CombiningClass.pl)
- unicore/Decomposition.pl (or unicode/Decomposition.pl)
-- unicore/CompositionExclusions.txt (or unicode/CompExcl.txt)
NOTES
@@ -83,7 +82,7 @@ COPYRIGHT AND LICENSE
SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
-Copyright(C) 2001-2007, SADAHIRO Tomoyuki. Japan. All rights reserved.
+Copyright(C) 2001-2012, SADAHIRO Tomoyuki. Japan. All rights reserved.
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader b/gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader
index b3e3c3153cb..c694b7e9ebf 100644
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader
@@ -9,7 +9,6 @@
# Input files:
# unicore/CombiningClass.pl (or unicode/CombiningClass.pl)
# unicore/Decomposition.pl (or unicode/Decomposition.pl)
-# unicore/CompositionExclusions.txt (or unicode/CompExcl.txt)
#
# Output files:
# unfcan.h
@@ -32,30 +31,48 @@ BEGIN {
our $PACKAGE = 'Unicode::Normalize, mkheader';
-our $Combin = do "unicore/CombiningClass.pl"
- || do "unicode/CombiningClass.pl"
- || croak "$PACKAGE: CombiningClass.pl not found";
+our $prefix = "UNF_";
+our $structname = "${prefix}complist";
-our $Decomp = do "unicore/Decomposition.pl"
- || do "unicode/Decomposition.pl"
- || croak "$PACKAGE: Decomposition.pl not found";
+sub pack_U {
+ return pack('U*', @_);
+}
+
+# %Canon and %Compat will be ($codepoint => $hexstring) after _U_stringify()
+our %Comp1st; # $codepoint => $listname : may be composed with a next char.
+our %CompList; # $listname,$2nd => $codepoint : composite
+
+##### The below part is common to mkheader and PP #####
our %Combin; # $codepoint => $number : combination class
our %Canon; # $codepoint => \@codepoints : canonical decomp.
our %Compat; # $codepoint => \@codepoints : compat. decomp.
-# after _U_stringify(), ($codepoint => $hexstring) for %Canon and %Compat
+our %Compos; # $1st,$2nd => $codepoint : composite
our %Exclus; # $codepoint => 1 : composition exclusions
our %Single; # $codepoint => 1 : singletons
our %NonStD; # $codepoint => 1 : non-starter decompositions
-
-our %Comp1st; # $codepoint => $listname : may be composed with a next char.
our %Comp2nd; # $codepoint => 1 : may be composed with a prev char.
-our %CompList; # $listname,$2nd => $codepoint : composite
-our $prefix = "UNF_";
-our $structname = "${prefix}complist";
+# from core Unicode database
+our $Combin = do "unicore/CombiningClass.pl"
+ || do "unicode/CombiningClass.pl"
+ || croak "$PACKAGE: CombiningClass.pl not found";
+our $Decomp = do "unicore/Decomposition.pl"
+ || do "unicode/Decomposition.pl"
+ || croak "$PACKAGE: Decomposition.pl not found";
-########## definition of Hangul constants ##########
+# CompositionExclusions.txt since Unicode 3.2.0
+our @CompEx = qw(
+ 0958 0959 095A 095B 095C 095D 095E 095F 09DC 09DD 09DF 0A33 0A36
+ 0A59 0A5A 0A5B 0A5E 0B5C 0B5D 0F43 0F4D 0F52 0F57 0F5C 0F69 0F76
+ 0F78 0F93 0F9D 0FA2 0FA7 0FAC 0FB9 FB1D FB1F FB2A FB2B FB2C FB2D
+ FB2E FB2F FB30 FB31 FB32 FB33 FB34 FB35 FB36 FB38 FB39 FB3A FB3B
+ FB3C FB3E FB40 FB41 FB43 FB44 FB46 FB47 FB48 FB49 FB4A FB4B FB4C
+ FB4D FB4E 2ADC 1D15E 1D15F 1D160 1D161 1D162 1D163 1D164 1D1BB
+ 1D1BC 1D1BD 1D1BE 1D1BF 1D1C0
+);
+
+# definition of Hangul constants
use constant SBase => 0xAC00;
use constant SFinal => 0xD7A3; # SBase -1 + SCount
use constant SCount => 11172; # LCount * NCount
@@ -71,67 +88,23 @@ use constant TFinal => 0x11C2;
use constant TCount => 28;
sub decomposeHangul {
- my $SIndex = $_[0] - SBase;
- my $LIndex = int( $SIndex / NCount);
- my $VIndex = int(($SIndex % NCount) / TCount);
- my $TIndex = $SIndex % TCount;
+ my $sindex = $_[0] - SBase;
+ my $lindex = int( $sindex / NCount);
+ my $vindex = int(($sindex % NCount) / TCount);
+ my $tindex = $sindex % TCount;
my @ret = (
- LBase + $LIndex,
- VBase + $VIndex,
- $TIndex ? (TBase + $TIndex) : (),
+ LBase + $lindex,
+ VBase + $vindex,
+ $tindex ? (TBase + $tindex) : (),
);
- return @ret;
-}
-
-########## length of a character ##########
-
-sub utf8len {
- my $uv = shift;
- return $uv < 0x80 ? 1 :
- $uv < 0x800 ? 2 :
- $uv < 0x10000 ? 3 :
- $uv < 0x110000 ? 4 :
- croak "$PACKAGE: illegal char in the composite. codepoint max is 0x10ffff.";
+ return wantarray ? @ret : pack_U(@ret);
}
-sub utfelen {
- my $uv = shift;
- return $uv < 0xA0 ? 1 :
- $uv < 0x400 ? 2 :
- $uv < 0x4000 ? 3 :
- $uv < 0x40000 ? 4 :
- $uv < 0x110000 ? 5 :
- croak "$PACKAGE: illegal char in the composite. codepoint max is 0x10ffff.";
-}
-
-my $errExpand = "$PACKAGE: Composition to U+%04X (from U+%04X and U+%04X) " .
- "needs growing the string in %s! Quit. Please inform the author...";
-
-########## getting full decomposion ##########
-{
- my($f, $fh);
- foreach my $d (@INC) {
- $f = File::Spec->catfile($d, "unicore", "CompositionExclusions.txt");
- last if open($fh, $f);
- $f = File::Spec->catfile($d, "unicode", "CompExcl.txt");
- last if open($fh, $f);
- $f = undef;
- }
- croak "$PACKAGE: neither unicore/CompositionExclusions.txt "
- . "nor unicode/CompExcl.txt is found in @INC" unless defined $f;
+########## getting full decomposition ##########
- while (<$fh>) {
- next if /^#/ or /^$/;
- s/#.*//;
- $Exclus{ hex($1) } = 1 if /([0-9A-Fa-f]+)/;
- }
- close $fh;
-}
-
-##
## converts string "hhhh hhhh hhhh" to a numeric list
-##
-sub _getHexArray { map hex, $_[0] =~ /([0-9A-Fa-f]+)/g }
+## (hex digits separated by spaces)
+sub _getHexArray { map hex, $_[0] =~ /\G *([0-9A-Fa-f]+)/g }
while ($Combin =~ /(.+)/g) {
my @tab = split /\t/, $1;
@@ -147,43 +120,38 @@ while ($Decomp =~ /(.+)/g) {
my @tab = split /\t/, $1;
my $compat = $tab[2] =~ s/<[^>]+>//;
my $dec = [ _getHexArray($tab[2]) ]; # decomposition
- my $ini = hex($tab[0]);
+ my $ini = hex($tab[0]); # initial decomposable character
my $end = $tab[1] eq '' ? $ini : hex($tab[1]);
# ($ini .. $end) is the range of decomposable characters.
- my $listname =
- @$dec == 2 ? sprintf("${structname}_%06x", $dec->[0]) : 'USELESS';
- # %04x is bad since it'd place _3046 after _1d157.
-
foreach my $u ($ini .. $end) {
$Compat{$u} = $dec;
+ $Canon{$u} = $dec if ! $compat;
+ }
+}
- if (! $compat) {
- $Canon{$u} = $dec;
-
- if (@$dec == 2) {
- if (utf8len($dec->[0]) + utf8len($dec->[1]) < utf8len($u)) {
- croak sprintf $errExpand, $u, $dec->[0], $dec->[1],
- "utf-8";
- }
- if (utfelen($dec->[0]) + utfelen($dec->[1]) < utfelen($u)) {
- croak sprintf $errExpand, $u, $dec->[0], $dec->[1],
- "utf-ebcdic";
- }
-
- if ($Combin{ $dec->[0] }) {
- $NonStD{$u} = 1;
- } else {
- $CompList{ $listname }{ $dec->[1] } = $u;
- $Comp1st{ $dec->[0] } = $listname;
- $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u};
- }
- } elsif (@$dec == 1) {
- $Single{$u} = 1;
- } else {
- croak("Weird Canonical Decomposition of U+$tab[0]");
- }
+for my $s (@CompEx) {
+ my $u = hex $s;
+ next if !$Canon{$u}; # not assigned
+ next if $u == 0xFB1D && !$Canon{0x1D15E}; # 3.0.1 before Corrigendum #2
+ $Exclus{$u} = 1;
+}
+
+foreach my $u (keys %Canon) {
+ my $dec = $Canon{$u};
+
+ if (@$dec == 2) {
+ if ($Combin{ $dec->[0] }) {
+ $NonStD{$u} = 1;
+ } else {
+ $Compos{ $dec->[0] }{ $dec->[1] } = $u;
+ $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u};
}
+ } elsif (@$dec == 1) {
+ $Single{$u} = 1;
+ } else {
+ my $h = sprintf '%04X', $u;
+ croak("Weird Canonical Decomposition of U+$h");
}
}
@@ -222,8 +190,18 @@ foreach my $key (keys %Compat) {
$Compat{$key} = [ getCompatList($key) ];
}
-sub _pack_U {
- return pack('U*', @_);
+##### The above part is common to mkheader and PP #####
+
+foreach my $comp1st (keys %Compos) {
+ my $listname = sprintf("${structname}_%06x", $comp1st);
+ # %04x is bad since it'd place _3046 after _1d157.
+ $Comp1st{$comp1st} = $listname;
+ my $rh1st = $Compos{$comp1st};
+
+ foreach my $comp2nd (keys %$rh1st) {
+ my $uc = $rh1st->{$comp2nd};
+ $CompList{$listname}{$comp2nd} = $uc;
+ }
}
sub split_into_char {
@@ -239,7 +217,7 @@ sub split_into_char {
sub _U_stringify {
sprintf '"%s"', join '',
- map sprintf("\\x%02x", $_), split_into_char(_pack_U(@_));
+ map sprintf("\\x%02x", $_), split_into_char(pack_U(@_));
}
foreach my $hash (\%Canon, \%Compat) {
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t
index 1cc0db181ce..e62c4ea18ce 100755
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t
@@ -16,12 +16,22 @@ BEGIN {
#########################
-use Test;
use strict;
use warnings;
-BEGIN { plan tests => 70 };
+BEGIN { $| = 1; print "1..70\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
use Unicode::Normalize qw(:all);
-ok(1); # If we made it this far, we're ok.
+
+ok(1);
sub _pack_U { Unicode::Normalize::pack_U(@_) }
sub hexU { _pack_U map hex, split ' ', shift }
@@ -39,6 +49,8 @@ ok(normalize('FCC', ""), "");
ok(normalize('FCC', "A"), "A");
ok(normalize('FCD', "A"), "A");
+# 9
+
# if checkFCD is YES, the return value from FCD should be same as the original
ok(FCD(hexU("00C5")), hexU("00C5")); # A with ring above
ok(FCD(hexU("0041 030A")), hexU("0041 030A")); # A+ring
@@ -52,6 +64,8 @@ ok(normalize('FCD', hexU("0041 0327 030A")), hexU("0041 0327 030A"));
ok(normalize('FCD', hexU("AC01 1100 1161")), hexU("AC01 1100 1161"));
ok(normalize('FCD', hexU("212B F900")), hexU("212B F900"));
+# 19
+
# if checkFCD is MAYBE or NO, FCD returns NFD (this behavior isn't documented)
ok(FCD(hexU("00C5 0327")), hexU("0041 0327 030A"));
ok(FCD(hexU("0041 030A 0327")), hexU("0041 0327 030A"));
@@ -63,6 +77,8 @@ ok(normalize('FCD', hexU("0041 030A 0327")), hexU("0041 0327 030A"));
ok(normalize('FCD', hexU("00C5 0327")), NFD(hexU("00C5 0327")));
ok(normalize('FCD', hexU("0041 030A 0327")), NFD(hexU("0041 030A 0327")));
+# 27
+
ok(answer(checkFCD('')), 'YES');
ok(answer(checkFCD('A')), 'YES');
ok(answer(checkFCD("\x{030A}")), 'YES'); # 030A;COMBINING RING ABOVE
@@ -83,6 +99,8 @@ ok(answer(checkFCC(hexU("1EA7 05AE 0315 0062"))), "NO");
ok(answer(check('FCD', hexU("1EA7 05AE 0315 0062"))), "NO");
ok(answer(check('FCC', hexU("1EA7 05AE 0315 0062"))), "NO");
+# 45
+
ok(FCC(hexU("00C5 0327")), hexU("0041 0327 030A"));
ok(FCC(hexU("0045 0304 0300")), "\x{1E14}");
ok(FCC("\x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}"), "\x{AC00}\x{AE00}");
@@ -97,6 +115,8 @@ ok(FCC("\x{1100}\x{1161}\x{0300}"), "\x{AC00}\x{0300}");
ok(FCC("\x{0B47}\x{300}\x{0B3E}\x{327}"), "\x{0B47}\x{300}\x{0B3E}\x{327}");
ok(FCC("\x{1100}\x{300}\x{1161}\x{327}"), "\x{1100}\x{300}\x{1161}\x{327}");
+# 57
+
ok(answer(checkFCC('')), 'YES');
ok(answer(checkFCC('A')), 'YES');
ok(answer(checkFCC("\x{030A}")), 'MAYBE'); # 030A;COMBINING RING ABOVE
@@ -111,3 +131,5 @@ ok(answer(checkFCC("\x{212B}\x{F900}")), 'NO'); # compat
ok(answer(checkFCC("\x{212B}\x{0327}")), 'NO'); # compat
ok(answer(checkFCC("\x{0327}\x{212B}")), 'NO'); # compat
+# 70
+
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t
index 27cd177596f..c9b424906d1 100755
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t
@@ -16,12 +16,22 @@ BEGIN {
#########################
-use Test;
use strict;
use warnings;
-BEGIN { plan tests => 37 };
+BEGIN { $| = 1; print "1..37\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
use Unicode::Normalize qw(:all);
-ok(1); # If we made it this far, we're ok.
+
+ok(1);
sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t
index 76ced03ea1a..d8d8d952e0a 100755
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t
@@ -16,12 +16,22 @@ BEGIN {
#########################
-use Test;
use strict;
use warnings;
-BEGIN { plan tests => 211 };
+BEGIN { $| = 1; print "1..217\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
use Unicode::Normalize qw(:all);
-ok(1); # If we made it this far, we're ok.
+
+ok(1);
sub _pack_U { Unicode::Normalize::pack_U(@_) }
sub hexU { _pack_U map hex, split ' ', shift }
@@ -49,6 +59,8 @@ ok(getCanon(0x212C), undef);
ok(getCanon(0x3243), undef);
ok(getCanon(0xFA2D), _pack_U(0x9DB4));
+# 20
+
ok(getCompat( 0), undef);
ok(getCompat(0x29), undef);
ok(getCompat(0x41), undef);
@@ -64,6 +76,8 @@ ok(getCompat(0xAC00), _pack_U(0x1100, 0x1161));
ok(getCompat(0xAE00), _pack_U(0x1100, 0x1173, 0x11AF));
ok(getCompat(0xFA2D), _pack_U(0x9DB4));
+# 34
+
ok(getComposite( 0, 0), undef);
ok(getComposite( 0, 0x29), undef);
ok(getComposite(0x29, 0), undef);
@@ -84,6 +98,8 @@ ok(getComposite(0xAC00, 0x11A7), undef);
ok(getComposite(0xAC00, 0x11A8), 0xAC01);
ok(getComposite(0xADF8, 0x11AF), 0xAE00);
+# 53
+
sub uprops {
my $uv = shift;
my $r = "";
@@ -120,6 +136,8 @@ ok(uprops(0xF900), 'xSnFbDmCKyG'); # CJK COMPATIBILITY IDEOGRAPH-F900
ok(uprops(0xFB4E), 'XsnFbDmCKyG'); # HEBREW LETTER PE WITH RAFE
ok(uprops(0xFF71), 'xsnfbdmcKyG'); # HALFWIDTH KATAKANA LETTER A
+# 71
+
ok(decompose(""), "");
ok(decompose("A"), "A");
ok(decompose("", 1), "");
@@ -138,6 +156,8 @@ my $sDec = "\x{FA19}";
ok(decompose($sDec), "\x{795E}");
ok($sDec, "\x{FA19}");
+# 83
+
ok(reorder(""), "");
ok(reorder("A"), "A");
ok(reorder(hexU("0041 0300 0315 0313 031b 0061")),
@@ -150,6 +170,8 @@ my $sReord = "\x{3000}\x{300}\x{31b}";
ok(reorder($sReord), "\x{3000}\x{31b}\x{300}");
ok($sReord, "\x{3000}\x{300}\x{31b}");
+# 89
+
ok(compose(""), "");
ok(compose("A"), "A");
ok(compose(hexU("0061 0300")), hexU("00E0"));
@@ -165,6 +187,8 @@ my $sCom = "\x{304B}\x{3099}";
ok(compose($sCom), "\x{304C}");
ok($sCom, "\x{304B}\x{3099}");
+# 100
+
ok(composeContiguous(""), "");
ok(composeContiguous("A"), "A");
ok(composeContiguous(hexU("0061 0300")), hexU("00E0"));
@@ -180,6 +204,8 @@ my $sCtg = "\x{30DB}\x{309A}";
ok(composeContiguous($sCtg), "\x{30DD}");
ok($sCtg, "\x{30DB}\x{309A}");
+# 111
+
sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
ok(answer(checkNFD("")), "YES");
@@ -220,6 +246,8 @@ ok(answer(checkNFKC(hexU("0041 0327 030A"))), "MAYBE"); # A+cedilla+ring
ok(answer(checkNFKC(hexU("0041 030A 0327"))), "NO"); # A+ring+cedilla
ok(answer(check("NFKC", hexU("20 C1 212B 300"))), "NO");
+# 145
+
"012ABC" =~ /(\d+)(\w+)/;
ok("012" eq NFC $1 && "ABC" eq NFC $2);
@@ -230,6 +258,8 @@ ok(normalize('NFC', $1), "012");
ok(normalize('NFC', $2), "ABC");
# s/^NF// in normalize() must not prevent using $1, $&, etc.
+# 150
+
# a string with initial zero should be treated like a number
# LATIN CAPITAL LETTER A WITH GRAVE
@@ -276,6 +306,8 @@ ok(getCanon("044032"), _pack_U(0x1100, 0x1161));
ok(getCompat("044032"), _pack_U(0x1100, 0x1161));
ok(getComposite("04352", "04449"), 0xAC00);
+# 182
+
# string with 22 combining characters: (0x300..0x315)
my $str_cc22 = _pack_U(0x3041, 0x300..0x315, 0x3042);
ok(decompose($str_cc22), $str_cc22);
@@ -289,6 +321,8 @@ ok(NFKC($str_cc22), $str_cc22);
ok(FCD($str_cc22), $str_cc22);
ok(FCC($str_cc22), $str_cc22);
+# 192
+
# string with 40 combining characters of the same class: (0x300..0x313)x2
my $str_cc40 = _pack_U(0x3041, 0x300..0x313, 0x300..0x313, 0x3042);
ok(decompose($str_cc40), $str_cc40);
@@ -302,6 +336,8 @@ ok(NFKC($str_cc40), $str_cc40);
ok(FCD($str_cc40), $str_cc40);
ok(FCC($str_cc40), $str_cc40);
+# 202
+
my $precomp = hexU("304C 304E 3050 3052 3054");
my $combseq = hexU("304B 3099 304D 3099 304F 3099 3051 3099 3053 3099");
ok(decompose($precomp x 5), $combseq x 5);
@@ -319,4 +355,22 @@ ok(decompose($precomp . $notcomp), $combseq . $notcomp);
ok(decompose($precomp . $notcomp x 5), $combseq . $notcomp x 5);
ok(decompose($precomp . $notcomp x10), $combseq . $notcomp x10);
+# 211
+
+my $preUnicode3_1 = !defined getCanon(0x1D15E);
+my $preUnicode3_2 = !defined getCanon(0x2ADC);
+
+# HEBREW LETTER YOD WITH HIRIQ
+ok($preUnicode3_1 xor isExclusion(0xFB1D));
+ok($preUnicode3_1 xor isComp_Ex (0xFB1D));
+
+# MUSICAL SYMBOL HALF NOTE
+ok($preUnicode3_1 xor isExclusion(0x1D15E));
+ok($preUnicode3_1 xor isComp_Ex (0x1D15E));
+
+# FORKING
+ok($preUnicode3_2 xor isExclusion(0x2ADC));
+ok($preUnicode3_2 xor isComp_Ex (0x2ADC));
+
+# 217
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t
index 976e5097a2c..bcd9517f6d7 100755
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t
@@ -35,16 +35,25 @@ BEGIN {
}
}
-use Test;
use strict;
use warnings;
-BEGIN { plan tests => 112 };
+BEGIN { $| = 1; print "1..113\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+ok(1);
#########################
no warnings qw(utf8);
-# To avoid warning in Test.pm, EXPR in ok(EXPR) must be boolean.
for my $u (0xD800, 0xDFFF, 0xFDD0, 0xFDEF, 0xFEFF, 0xFFFE, 0xFFFF,
0x1FFFF, 0x10FFFF, 0x110000, 0x7FFFFFFF)
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t
index 5d93747965a..1442c30000c 100755
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t
@@ -16,12 +16,22 @@ BEGIN {
#########################
-use Test;
use strict;
use warnings;
-BEGIN { plan tests => 64 };
+BEGIN { $| = 1; print "1..64\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
use Unicode::Normalize qw(normalize);
-ok(1); # If we made it this far, we're ok.
+
+ok(1);
sub _pack_U { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
@@ -48,6 +58,8 @@ ok(normalize('NFC', "A"), "A");
ok(normalize('NFKD',"A"), "A");
ok(normalize('NFKC',"A"), "A");
+# 17
+
# don't modify the source
my $sNFD = "\x{FA19}";
ok(normalize('NFD', $sNFD), "\x{795E}");
@@ -65,6 +77,8 @@ my $sNFKC = "\x{FA26}";
ok(normalize('NFKC', $sNFKC), "\x{90FD}");
ok($sNFKC, "\x{FA26}");
+# 25
+
sub hexNFC {
join " ", map sprintf("%04X", $_),
_unpack_U normalize 'C', _pack_U map hex, split ' ', shift;
@@ -100,14 +114,14 @@ ok(hexNFC("AC00 11A9"), "AC02");
ok(hexNFC("AC00 11C2"), "AC1B");
ok(hexNFC("AC00 11C3"), "AC00 11C3");
+# 47
+
# Test Cases from Public Review Issue #29: Normalization Issue
# cf. http://www.unicode.org/review/pr-29.html
ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E");
ok(hexNFC("1100 0300 1161"), "1100 0300 1161");
-
ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300");
ok(hexNFC("1100 1161 0300"), "AC00 0300");
-
ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327");
ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327");
@@ -123,3 +137,6 @@ ok(hexNFC("0315 0061 0300"), "0315 00E0");
ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0");
ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0");
ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0");
+
+# 64
+
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial1.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial1.t
new file mode 100644
index 00000000000..56f2ca44274
--- /dev/null
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial1.t
@@ -0,0 +1,117 @@
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+BEGIN {
+ unless (5.006001 <= $]) {
+ print "1..0 # skipped: Perl 5.6.1 or later".
+ " needed for this test\n";
+ exit;
+ }
+}
+
+#########################
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..26\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+use Unicode::Normalize qw(:all);
+
+ok(1);
+
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+
+#########################
+
+sub arraynorm {
+ my $form = shift;
+ my @string = @_;
+ my $result = "";
+ my $unproc = "";
+ foreach my $str (@string) {
+ $unproc .= $str;
+ $result .= $form eq 'NFC' ? NFC_partial ($unproc) :
+ $form eq 'NFD' ? NFD_partial ($unproc) :
+ $form eq 'NFKC' ? NFKC_partial($unproc) :
+ $form eq 'NFKD' ? NFKD_partial($unproc) :
+ undef;
+ }
+ $result .= $unproc;
+ return $result;
+}
+
+my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
+my $strC = "\x{3CE}\x{AC01}\x{AC03}";
+my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
+my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
+ok($strC eq NFC($strD));
+ok($strD eq join('', @str1));
+ok($strC eq arraynorm('NFC', @str1));
+ok($strD eq join('', @str2));
+ok($strC eq arraynorm('NFC', @str2));
+
+my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
+my $strX = "\x{300}\x{AC01}";
+ok($strX eq NFC(join('', @strX)));
+ok($strX eq arraynorm('NFC', @strX));
+ok($strX eq NFKC(join('', @strX)));
+ok($strX eq arraynorm('NFKC', @strX));
+
+my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
+my $strY = ("\x{304C}\x{0323}\x{0308}");
+ok($strY eq NFC(join('', @strY)));
+ok($strY eq arraynorm('NFC', @strY));
+ok($strY eq NFKC(join('', @strY)));
+ok($strY eq arraynorm('NFKC', @strY));
+
+my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
+my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
+ok($strZ eq NFD(join('', @strZ)));
+ok($strZ eq arraynorm('NFD', @strZ));
+ok($strZ eq NFKD(join('', @strZ)));
+ok($strZ eq arraynorm('NFKD', @strZ));
+
+# 18
+
+# must modify the source
+my $sNFD = "\x{FA19}";
+ok(NFD_partial($sNFD), "");
+ok($sNFD, "\x{795E}");
+
+my $sNFC = "\x{FA1B}";
+ok(NFC_partial($sNFC), "");
+ok($sNFC, "\x{798F}");
+
+my $sNFKD = "\x{FA1E}";
+ok(NFKD_partial($sNFKD), "");
+ok($sNFKD, "\x{7FBD}");
+
+my $sNFKC = "\x{FA26}";
+ok(NFKC_partial($sNFKC), "");
+ok($sNFKC, "\x{90FD}");
+
+# 26
+
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial2.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial2.t
new file mode 100644
index 00000000000..4d824a44181
--- /dev/null
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial2.t
@@ -0,0 +1,113 @@
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+BEGIN {
+ unless (5.006001 <= $]) {
+ print "1..0 # skipped: Perl 5.6.1 or later".
+ " needed for this test\n";
+ exit;
+ }
+}
+
+#########################
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..26\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+use Unicode::Normalize qw(:all);
+
+ok(1);
+
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+
+#########################
+
+sub arraynorm {
+ my $form = shift;
+ my @string = @_;
+ my $result = "";
+ my $unproc = "";
+ foreach my $str (@string) {
+ $unproc .= $str;
+ $result .= normalize_partial($form, $unproc);
+ }
+ $result .= $unproc;
+ return $result;
+}
+
+my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
+my $strC = "\x{3CE}\x{AC01}\x{AC03}";
+my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
+my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
+ok($strC eq NFC($strD));
+ok($strD eq join('', @str1));
+ok($strC eq arraynorm('NFC', @str1));
+ok($strD eq join('', @str2));
+ok($strC eq arraynorm('NFC', @str2));
+
+my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
+my $strX = "\x{300}\x{AC01}";
+ok($strX eq NFC(join('', @strX)));
+ok($strX eq arraynorm('NFC', @strX));
+ok($strX eq NFKC(join('', @strX)));
+ok($strX eq arraynorm('NFKC', @strX));
+
+my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
+my $strY = ("\x{304C}\x{0323}\x{0308}");
+ok($strY eq NFC(join('', @strY)));
+ok($strY eq arraynorm('NFC', @strY));
+ok($strY eq NFKC(join('', @strY)));
+ok($strY eq arraynorm('NFKC', @strY));
+
+my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
+my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
+ok($strZ eq NFD(join('', @strZ)));
+ok($strZ eq arraynorm('NFD', @strZ));
+ok($strZ eq NFKD(join('', @strZ)));
+ok($strZ eq arraynorm('NFKD', @strZ));
+
+# 18
+
+# must modify the source
+my $sNFD = "\x{FA19}";
+ok(normalize_partial('NFD', $sNFD), "");
+ok($sNFD, "\x{795E}");
+
+my $sNFC = "\x{FA1B}";
+ok(normalize_partial('NFC', $sNFC), "");
+ok($sNFC, "\x{798F}");
+
+my $sNFKD = "\x{FA1E}";
+ok(normalize_partial('NFKD', $sNFKD), "");
+ok($sNFKD, "\x{7FBD}");
+
+my $sNFKC = "\x{FA26}";
+ok(normalize_partial('NFKC', $sNFKC), "");
+ok($sNFKC, "\x{90FD}");
+
+# 26
+
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t
index 3c4298d849a..714018a68e4 100755
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t
@@ -16,12 +16,22 @@ BEGIN {
#########################
-use Test;
use strict;
use warnings;
-BEGIN { plan tests => 42 };
+BEGIN { $| = 1; print "1..48\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
use Unicode::Normalize qw(:all);
-ok(1); # If we made it this far, we're ok.
+
+ok(1);
#########################
@@ -35,6 +45,8 @@ ok(FCD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(FCC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(reorder "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+# 9
+
ok(prototype \&normalize,'$$');
ok(prototype \&NFD, '$');
ok(prototype \&NFC, '$');
@@ -56,6 +68,8 @@ ok(prototype \&reorder, '$');
ok(prototype \&compose, '$');
ok(prototype \&composeContiguous, '$');
+# 27
+
ok(prototype \&getCanon, '$');
ok(prototype \&getCompat, '$');
ok(prototype \&getComposite, '$$');
@@ -65,11 +79,18 @@ ok(prototype \&isSingleton, '$');
ok(prototype \&isNonStDecomp, '$');
ok(prototype \&isComp2nd, '$');
ok(prototype \&isComp_Ex, '$');
-
ok(prototype \&isNFD_NO, '$');
ok(prototype \&isNFC_NO, '$');
ok(prototype \&isNFC_MAYBE, '$');
ok(prototype \&isNFKD_NO, '$');
ok(prototype \&isNFKC_NO, '$');
ok(prototype \&isNFKC_MAYBE, '$');
+ok(prototype \&splitOnLastStarter, undef);
+ok(prototype \&normalize_partial, '$$');
+ok(prototype \&NFD_partial, '$');
+ok(prototype \&NFC_partial, '$');
+ok(prototype \&NFKD_partial, '$');
+ok(prototype \&NFKC_partial, '$');
+
+# 48
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t
index 6bf7ff66b1f..fe579cdd0d6 100755
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t
@@ -24,12 +24,22 @@ BEGIN {
#########################
-use Test;
use strict;
use warnings;
-BEGIN { plan tests => 14 };
+BEGIN { $| = 1; print "1..34\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
use Unicode::Normalize qw(:all);
-ok(1); # If we made it this far, we're ok.
+
+ok(1);
sub _pack_U { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
@@ -40,14 +50,6 @@ our $proc; # before the last starter
our $unproc; # the last starter and after
# If string has no starter, entire string is set to $unproc.
-# When you have $normalized string and $unnormalized string following,
-# a simple concatenation
-# C<$concat = $normalized . normalize($form, $unnormalized)>
-# is wrong. Instead of it, like this:
-#
-# ($processed, $unprocessed) = splitOnLastStarter($normalized);
-# $concat = $processed . normalize($form, $unprocessed.$unnormalized);
-
($proc, $unproc) = splitOnLastStarter("");
ok($proc, "");
ok($unproc, "");
@@ -79,3 +81,64 @@ ok(NFC($ka_grave.$dakuten) eq $ga_grave);
ok(NFC($ka_grave).NFC($dakuten) ne $ga_grave);
ok($concat eq $ga_grave);
+# 14
+
+sub arraynorm {
+ my $form = shift;
+ my @string = @_;
+ my $result = "";
+ my $unproc = "";
+ foreach my $str (@string) {
+ $unproc .= $str;
+ my $n = normalize($form, $unproc);
+ my($p, $u) = splitOnLastStarter($n);
+ $result .= $p;
+ $unproc = $u;
+ }
+ $result .= $unproc;
+ return $result;
+}
+
+my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
+my $strC = "\x{3CE}\x{AC01}\x{AC03}";
+my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
+my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
+ok($strC eq NFC($strD));
+ok($strD eq join('', @str1));
+ok($strC eq arraynorm('NFC', @str1));
+ok($strD eq join('', @str2));
+ok($strC eq arraynorm('NFC', @str2));
+
+my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
+my $strX = "\x{300}\x{AC01}";
+ok($strX eq NFC(join('', @strX)));
+ok($strX eq arraynorm('NFC', @strX));
+ok($strX eq NFKC(join('', @strX)));
+ok($strX eq arraynorm('NFKC', @strX));
+
+my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
+my $strY = ("\x{304C}\x{0323}\x{0308}");
+ok($strY eq NFC(join('', @strY)));
+ok($strY eq arraynorm('NFC', @strY));
+ok($strY eq NFKC(join('', @strY)));
+ok($strY eq arraynorm('NFKC', @strY));
+
+my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
+my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
+ok($strZ eq NFD(join('', @strZ)));
+ok($strZ eq arraynorm('NFD', @strZ));
+ok($strZ eq NFKD(join('', @strZ)));
+ok($strZ eq arraynorm('NFKD', @strZ));
+
+# 31
+
+# don't modify the source
+
+my $source = "ABC";
+($proc, $unproc) = splitOnLastStarter($source);
+ok($proc, "AB");
+ok($unproc, "C");
+ok($source, "ABC");
+
+# 34
+
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t
index e48e16f1b9b..f69c695f106 100755
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t
@@ -16,12 +16,22 @@ BEGIN {
#########################
-use Test;
use strict;
use warnings;
-BEGIN { plan tests => 58 };
+BEGIN { $| = 1; print "1..72\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
use Unicode::Normalize;
-ok(1); # If we made it this far, we're ok.
+
+ok(1);
sub _pack_U { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
@@ -38,6 +48,8 @@ ok(NFC("A"), "A");
ok(NFKD("A"), "A");
ok(NFKC("A"), "A");
+# 9
+
# don't modify the source
my $sNFD = "\x{FA19}";
ok(NFD($sNFD), "\x{795E}");
@@ -55,6 +67,7 @@ my $sNFKC = "\x{FA26}";
ok(NFKC($sNFKC), "\x{90FD}");
ok($sNFKC, "\x{FA26}");
+# 17
sub hexNFC {
join " ", map sprintf("%04X", $_),
@@ -91,6 +104,8 @@ ok(hexNFC("AC00 11A9"), "AC02");
ok(hexNFC("AC00 11C2"), "AC1B");
ok(hexNFC("AC00 11C3"), "AC00 11C3");
+# 39
+
# Test Cases from Public Review Issue #29: Normalization Issue
# cf. http://www.unicode.org/review/pr-29.html
ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E");
@@ -113,13 +128,38 @@ ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0");
ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0");
ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0");
-# NFC() should be unary.
+# 56
+
+# NFC() and NFKC() should be unary.
my $str11 = _pack_U(0x41, 0x0302, 0x0301, 0x62);
my $str12 = _pack_U(0x1EA4, 0x62);
ok(NFC $str11 eq $str12);
+ok(NFKC $str11 eq $str12);
-# NFD() should be unary.
+# NFD() and NFKD() should be unary.
my $str21 = _pack_U(0xE0, 0xAC00);
my $str22 = _pack_U(0x61, 0x0300, 0x1100, 0x1161);
ok(NFD $str21 eq $str22);
+ok(NFKD $str21 eq $str22);
+
+# 60
+
+## Bug #53197: NFKC("\x{2000}") produces...
+
+ok(NFKC("\x{2002}") eq ' ');
+ok(NFKD("\x{2002}") eq ' ');
+ok(NFKC("\x{2000}") eq ' ');
+ok(NFKD("\x{2000}") eq ' ');
+
+ok(NFKC("\x{210C}") eq 'H');
+ok(NFKD("\x{210C}") eq 'H');
+ok(NFKC("\x{210D}") eq 'H');
+ok(NFKD("\x{210D}") eq 'H');
+
+ok(NFC("\x{F907}") eq "\x{9F9C}");
+ok(NFD("\x{F907}") eq "\x{9F9C}");
+ok(NFKC("\x{F907}") eq "\x{9F9C}");
+ok(NFKD("\x{F907}") eq "\x{9F9C}");
+
+# 72
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t
index c7214917266..be1712a5050 100755
--- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t
+++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t
@@ -27,10 +27,20 @@ BEGIN {
}
}
-use Test;
use strict;
use warnings;
-BEGIN { plan tests => 16 };
+BEGIN { $| = 1; print "1..17\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+ok(1);
package tiescalar;
sub TIESCALAR {