diff options
author | 2013-03-25 20:06:16 +0000 | |
---|---|---|
committer | 2013-03-25 20:06:16 +0000 | |
commit | 898184e3e61f9129feb5978fad5a8c6865f00b92 (patch) | |
tree | 56f32aefc1eed60b534611007c7856f82697a205 /gnu/usr.bin/perl/cpan/Unicode-Normalize | |
parent | PGSHIFT -> PAGE_SHIFT (diff) | |
download | wireguard-openbsd-898184e3e61f9129feb5978fad5a8c6865f00b92.tar.xz wireguard-openbsd-898184e3e61f9129feb5978fad5a8c6865f00b92.zip |
import perl 5.16.3 from CPAN - worked on by Andrew Fresh and myself
Diffstat (limited to 'gnu/usr.bin/perl/cpan/Unicode-Normalize')
17 files changed, 856 insertions, 263 deletions
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Changes b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Changes index e9cb3918a5e..88df63cb4d7 100644 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Changes +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Changes @@ -1,12 +1,51 @@ Revision history for Perl extension Unicode::Normalize. +1.14 Sat Mar 10 13:34:53 2012 + - avoid "use Test". + +1.13 Mon Jul 25 21:07:49 2011 + - tried fixing the tarball with world writable files. + ( http://www.perlmonks.org/?node_id=731935 ) + +1.12 Mon May 16 23:36:07 2011 + - removed Normalize/CompExcl.pl and coded Composition Exclusions; + how to load CompExcl.pl seems not good, but I'm not sure... + +1.11 Sun May 15 20:31:09 2011 + - As perl 5.14.0 has removed unicore/CompositionExclusions.txt + from the installation, Normalize/CompExcl.pl in this distribution + is used instead. (see [rt.cpan.org #68106]) + +1.10 Sun Jan 16 21:00:34 2011 + - XSUB: reorder() and compose() treat with growing the string. + - XSUB: provision against UTF8_ALLOW_* flags to be undefined in future. + - doc: about perl 5.13.x and Unicode 6.0.0 + - doc and comments: [perl #81876] Fix typos by Peter J. Acklam. + +1.07 Mon Sep 20 20:20:02 2010 + - doc: about perl 5.12.x and Unicode 5.2.0 + - test: prototype of normalize_partial() and cousins in proto.t. + +1.06 Thu Feb 11 16:19:54 2010 + - mkheader/Pure Perl: fixed the internal _getHexArray() for perl 5.11.3 + changes (Bug #53197). + +1.05 Mon Sep 28 21:43:17 2009 + - normalize_partial() and NFX_partial(). { NFX =~ /^NFK?[CD]\z/ } + - added partial1.t for NFX_partial(). + - added partial2.t for normalize_partial(). + +1.04 Wed Sep 23 22:32:57 2009 + - doc: splitOnLastStarter() since 0.24 is now documented. + - test: some new tests are added to split.t. + 1.03 Sun Mar 29 12:56:23 2009 - mkheader: check if no composition needs growing the string. - Makefile.PL: a tweak 1.02 Tue Jun 5 22:46:45 2007 - XSUB: mkheader, _U_stringify() - avoid unpack('C*') on unicode. - - test: short.t removed - pure perl is not inapprotiate for test of + - test: short.t removed - pure perl is not appropriate for test of unicode edge cases. 1.01 Tue Jun 13 22:01:53 2006 @@ -43,7 +82,7 @@ Revision history for Perl extension Unicode::Normalize. - XSUB: even if string contains a malformed, "short" Unicode character, decompose() and reorder() will be safe. Garbage will be no longer added. - added null.t and short.t. - - now truely added illegal.t (in 0.27, forgot to change MANIFEST). + - now truly added illegal.t (in 0.27, forgot to change MANIFEST). 0.27 Sun Nov 16 13:16:21 2003 - Illegal code points (surrogate and noncharacter) will be allowed @@ -69,7 +108,8 @@ Revision history for Perl extension Unicode::Normalize. script files, named "enableXS" and "disableXS". (no longer <perl Makefile.PL xs> and <perl Makefile.PL noxs>.) * simplified Makefile.PL. - - added fcdc.t and split.t. + - added fcdc.t for FCD() and FCC(). + - added split.t for splitOnLastStarter(): an undocumented function. 0.23 Sat Jun 28 20:38:10 2003 - bug fix: \0-terminate in compose() in XS. diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Makefile.PL b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Makefile.PL index a04ca62b8f6..f0b94e74963 100644 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Makefile.PL +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Makefile.PL @@ -12,7 +12,10 @@ if (-f "Normalize.xs") { } WriteMakefile( + 'AUTHOR' => 'SADAHIRO Tomoyuki <SADAHIRO@cpan.org>', + 'ABSTRACT' => 'Unicode Normalization Forms', 'INSTALLDIRS' => $] >= 5.007002 ? 'perl' : 'site', + 'LICENSE' => 'perl', 'NAME' => 'Unicode::Normalize', 'VERSION_FROM' => 'Normalize.pm', # finds $VERSION 'clean' => $clean, @@ -25,7 +28,6 @@ WriteMakefile( File::Copy => 0, File::Spec => 0, strict => 0, - Test => 0, warnings => 0, }, ); diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.pm b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.pm index ad5ff82a836..c580f4852c6 100644 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.pm +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.pm @@ -13,13 +13,9 @@ use Carp; no warnings 'utf8'; -our $VERSION = '1.03'; +our $VERSION = '1.14'; our $PACKAGE = __PACKAGE__; -require Exporter; -require DynaLoader; - -our @ISA = qw(Exporter DynaLoader); our @EXPORT = qw( NFC NFD NFKC NFKD ); our @EXPORT_OK = qw( normalize decompose reorder compose @@ -27,8 +23,8 @@ our @EXPORT_OK = qw( getCanon getCompat getComposite getCombinClass isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE - FCD checkFCD FCC checkFCC composeContiguous - splitOnLastStarter + FCD checkFCD FCC checkFCC composeContiguous splitOnLastStarter + normalize_partial NFC_partial NFD_partial NFKC_partial NFKD_partial ); our %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ], @@ -37,14 +33,8 @@ our %EXPORT_TAGS = ( fast => [ qw/FCD checkFCD FCC checkFCC composeContiguous/ ], ); -###### - -bootstrap Unicode::Normalize $VERSION; - -###### - ## -## utilites for tests +## utilities for tests ## sub pack_U { @@ -55,9 +45,18 @@ sub unpack_U { return unpack('U*', shift(@_).pack('U*')); } +require Exporter; + +##### The above part is common to XS and PP ##### + +our @ISA = qw(Exporter DynaLoader); +require DynaLoader; +bootstrap Unicode::Normalize $VERSION; + +##### The below part is common to XS and PP ##### ## -## normalization forms +## normalize ## sub FCD ($) { @@ -83,9 +82,27 @@ sub normalize($$) croak($PACKAGE."::normalize: invalid form name: $form"); } +## +## partial +## + +sub normalize_partial ($$) { + if (exists $formNorm{$_[0]}) { + my $n = normalize($_[0], $_[1]); + my($p, $u) = splitOnLastStarter($n); + $_[1] = $u; + return $p; + } + croak($PACKAGE."::normalize_partial: invalid form name: $_[0]"); +} + +sub NFD_partial ($) { return normalize_partial('NFD', $_[0]) } +sub NFC_partial ($) { return normalize_partial('NFC', $_[0]) } +sub NFKD_partial($) { return normalize_partial('NFKD',$_[0]) } +sub NFKC_partial($) { return normalize_partial('NFKC',$_[0]) } ## -## quick check +## check ## our %formCheck = ( @@ -239,6 +256,82 @@ you can get its NFC/NFKC string, by saying $NFC_string = compose($NFD_string); $NFKC_string = compose($NFKD_string); +=item C<($processed, $unprocessed) = splitOnLastStarter($normalized)> + +It returns two strings: the first one, C<$processed>, is a part +before the last starter, and the second one, C<$unprocessed> is +another part after the first part. A starter is a character having +a combining class of zero (see UAX #15). + +Note that C<$processed> may be empty (when C<$normalized> contains no +starter or starts with the last starter), and then C<$unprocessed> +should be equal to the entire C<$normalized>. + +When you have a C<$normalized> string and an C<$unnormalized> string +following it, a simple concatenation is wrong: + + $concat = $normalized . normalize($form, $unnormalized); # wrong! + +Instead of it, do like this: + + ($processed, $unprocessed) = splitOnLastStarter($normalized); + $concat = $processed . normalize($form, $unprocessed.$unnormalized); + +C<splitOnLastStarter()> should be called with a pre-normalized parameter +C<$normalized>, that is in the same form as C<$form> you want. + +If you have an array of C<@string> that should be concatenated and then +normalized, you can do like this: + + my $result = ""; + my $unproc = ""; + foreach my $str (@string) { + $unproc .= $str; + my $n = normalize($form, $unproc); + my($p, $u) = splitOnLastStarter($n); + $result .= $p; + $unproc = $u; + } + $result .= $unproc; + # instead of normalize($form, join('', @string)) + +=item C<$processed = normalize_partial($form, $unprocessed)> + +A wrapper for the combination of C<normalize()> and C<splitOnLastStarter()>. +Note that C<$unprocessed> will be modified as a side-effect. + +If you have an array of C<@string> that should be concatenated and then +normalized, you can do like this: + + my $result = ""; + my $unproc = ""; + foreach my $str (@string) { + $unproc .= $str; + $result .= normalize_partial($form, $unproc); + } + $result .= $unproc; + # instead of normalize($form, join('', @string)) + +=item C<$processed = NFD_partial($unprocessed)> + +It does like C<normalize_partial('NFD', $unprocessed)>. +Note that C<$unprocessed> will be modified as a side-effect. + +=item C<$processed = NFC_partial($unprocessed)> + +It does like C<normalize_partial('NFC', $unprocessed)>. +Note that C<$unprocessed> will be modified as a side-effect. + +=item C<$processed = NFKD_partial($unprocessed)> + +It does like C<normalize_partial('NFKD', $unprocessed)>. +Note that C<$unprocessed> will be modified as a side-effect. + +=item C<$processed = NFKC_partial($unprocessed)> + +It does like C<normalize_partial('NFKC', $unprocessed)>. +Note that C<$unprocessed> will be modified as a side-effect. + =back =head2 Quick Check @@ -321,15 +414,15 @@ while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC. If you want to check exactly, compare the string with its NFC/NFKC/FCC. if ($string eq NFC($string)) { - # $string is exactly normalized in NFC; + # $string is exactly normalized in NFC; } else { - # $string is not normalized in NFC; + # $string is not normalized in NFC; } if ($string eq NFKC($string)) { - # $string is exactly normalized in NFKC; + # $string is exactly normalized in NFKC; } else { - # $string is not normalized in NFKC; + # $string is not normalized in NFKC; } =head2 Character Data @@ -454,7 +547,10 @@ normalization implemented by this module depends on your perl's version. 5.8.4-5.8.6 4.0.1 (normalization is same as 4.0.0) 5.8.7-5.8.8 4.1.0 5.10.0 5.0.0 - 5.8.9 5.1.0 + 5.8.9, 5.10.1 5.1.0 + 5.12.0-5.12.3 5.2.0 + 5.14.0 6.0.0 + 5.16.0 (to be) 6.1.0 =item Correction of decomposition mapping @@ -482,7 +578,7 @@ lower than 4.1.0. SADAHIRO Tomoyuki <SADAHIRO@cpan.org> -Copyright(C) 2001-2007, SADAHIRO Tomoyuki. Japan. All rights reserved. +Copyright(C) 2001-2012, SADAHIRO Tomoyuki. Japan. All rights reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs index f4bbca7907a..b760dff0b3b 100644 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs @@ -17,32 +17,43 @@ /* Perl 5.6.1 ? */ #ifndef utf8n_to_uvuni -#define utf8n_to_uvuni utf8_to_uv +#define utf8n_to_uvuni utf8_to_uv #endif /* utf8n_to_uvuni */ /* UTF8_ALLOW_BOM is used before Perl 5.8.0 */ -#ifdef UTF8_ALLOW_BOM -#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FFFF) -#else -#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF) -#endif +#ifndef UTF8_ALLOW_BOM +#define UTF8_ALLOW_BOM (0) +#endif /* UTF8_ALLOW_BOM */ + +#ifndef UTF8_ALLOW_SURROGATE +#define UTF8_ALLOW_SURROGATE (0) +#endif /* UTF8_ALLOW_SURROGATE */ + +#ifndef UTF8_ALLOW_FE_FF +#define UTF8_ALLOW_FE_FF (0) +#endif /* UTF8_ALLOW_FE_FF */ + +#ifndef UTF8_ALLOW_FFFF +#define UTF8_ALLOW_FFFF (0) +#endif /* UTF8_ALLOW_FFFF */ + +#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF) + +/* check if the string buffer is enough before uvuni_to_utf8(). */ +/* dstart, d, and dlen should be defined outside before. */ +#define Renew_d_if_not_enough_to(need) STRLEN curlen = d - dstart; \ + if (dlen < curlen + (need)) { \ + dlen += (need); \ + Renew(dstart, dlen+1, U8); \ + d = dstart + curlen; \ + } -/* if utf8n_to_uvuni() sets retlen to 0 (?) */ +/* if utf8n_to_uvuni() sets retlen to 0 (if broken?) */ #define ErrRetlenIsZero "panic (Unicode::Normalize %s): zero-length character" /* utf8_hop() hops back before start. Maybe broken UTF-8 */ #define ErrHopBeforeStart "panic (Unicode::Normalize): hopping before start" -/* It should never happen as there is no instance in UTF-8 and UTF-EBCDIC. - If Unicode would add a new composition of A + B to C - where bytes::length(A) + bytes::length(B) < bytes::length(C), - this code should be fixed. - In this case, mkheader will prevent Unicode::Normalize from building. */ -#define ErrLongerThanSrc "panic (Unicode::Normalize %s): longer than source" - -/* uvuni_to_utf8 wants UTF8_MAXBYTES free bytes available */ -#define ErrTargetNotEnough "panic (Unicode::Normalize %s): target not enough" - /* At present, char > 0x10ffff are unaffected without complaint, right? */ #define VALID_UTF_MAX (0x10ffff) #define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv)) @@ -216,13 +227,7 @@ U8* pv_utf8_decompose(U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscompat) p += retlen; if (Hangul_IsS(uv)) { - STRLEN cur = d - dstart; - - if (dlen < cur + UTF8_MAXLEN * 3) { - dlen += UTF8_MAXLEN * 3; - Renew(dstart, dlen+1, U8); - d = dstart + cur; - } + Renew_d_if_not_enough_to(UTF8_MAXLEN * 3) d = pv_cat_decompHangul(d, uv); } else { @@ -230,23 +235,12 @@ U8* pv_utf8_decompose(U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscompat) if (r) { STRLEN len = (STRLEN)strlen((char *)r); - STRLEN cur = d - dstart; - if (dlen < cur + len) { - dlen += len; - Renew(dstart, dlen+1, U8); - d = dstart + cur; - } + Renew_d_if_not_enough_to(len) while (len--) *d++ = *r++; } else { - STRLEN cur = d - dstart; - - if (dlen < cur + UTF8_MAXLEN) { - dlen += UTF8_MAXLEN; - Renew(dstart, dlen+1, U8); - d = dstart + cur; - } + Renew_d_if_not_enough_to(UTF8_MAXLEN) d = uvuni_to_utf8(d, uv); } } @@ -256,11 +250,12 @@ U8* pv_utf8_decompose(U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscompat) } static -U8* pv_utf8_reorder(U8* s, STRLEN slen, U8* d, STRLEN dlen) +U8* pv_utf8_reorder(U8* s, STRLEN slen, U8** dp, STRLEN dlen) { U8* p = s; U8* e = s + slen; - U8* dend = d + dlen; + U8* dstart = *dp; + U8* d = dstart; UNF_cc seq_ary[CC_SEQ_SIZE]; UNF_cc* seq_ptr = seq_ary; /* use array at the beginning */ @@ -268,10 +263,6 @@ U8* pv_utf8_reorder(U8* s, STRLEN slen, U8* d, STRLEN dlen) STRLEN seq_max = CC_SEQ_SIZE; STRLEN cc_pos = 0; - if (dlen < slen || dlen < slen + UTF8_MAXLEN) - croak(ErrTargetNotEnough, "reorder"); - dend -= UTF8_MAXLEN; /* safety */ - while (p < e) { U8 curCC; STRLEN retlen; @@ -306,6 +297,7 @@ U8* pv_utf8_reorder(U8* s, STRLEN slen, U8* d, STRLEN dlen) continue; } + /* output */ if (cc_pos) { STRLEN i; @@ -313,30 +305,30 @@ U8* pv_utf8_reorder(U8* s, STRLEN slen, U8* d, STRLEN dlen) qsort((void*)seq_ptr, cc_pos, sizeof(UNF_cc), compare_cc); for (i = 0; i < cc_pos; i++) { + Renew_d_if_not_enough_to(UTF8_MAXLEN) d = uvuni_to_utf8(d, seq_ptr[i].uv); - if (dend < d) /* real end is dend + UTF8_MAXLEN */ - croak(ErrLongerThanSrc, "reorder"); } cc_pos = 0; } if (curCC == 0) { + Renew_d_if_not_enough_to(UTF8_MAXLEN) d = uvuni_to_utf8(d, uv); - if (dend < d) /* real end is dend + UTF8_MAXLEN */ - croak(ErrLongerThanSrc, "reorder"); } } if (seq_ext) Safefree(seq_ext); + *dp = dstart; return d; } static -U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig) +U8* pv_utf8_compose(U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscontig) { U8* p = s; U8* e = s + slen; - U8* dend = d + dlen; + U8* dstart = *dp; + U8* d = dstart; UV uvS = 0; /* code point of the starter */ bool valid_uvS = FALSE; /* if FALSE, uvS isn't initialized yet */ @@ -348,10 +340,6 @@ U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig) STRLEN seq_max = CC_SEQ_SIZE; STRLEN cc_pos = 0; - if (dlen < slen || dlen < slen + UTF8_MAXLEN) - croak(ErrTargetNotEnough, "compose"); - dend -= UTF8_MAXLEN; /* safety */ - while (p < e) { U8 curCC; STRLEN retlen; @@ -370,9 +358,8 @@ U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig) continue; } else { + Renew_d_if_not_enough_to(UTF8_MAXLEN) d = uvuni_to_utf8(d, uv); - if (dend < d) /* real end is dend + UTF8_MAXLEN */ - croak(ErrLongerThanSrc, "compose"); continue; } } @@ -380,9 +367,9 @@ U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig) bool composed; /* blocked */ - if (iscontig && cc_pos || /* discontiguous combination */ - curCC != 0 && preCC == curCC || /* blocked by same CC */ - preCC > curCC) /* blocked by higher CC: revised D2 */ + if ((iscontig && cc_pos) || /* discontiguous combination */ + (curCC != 0 && preCC == curCC) || /* blocked by same CC */ + (preCC > curCC)) /* blocked by higher CC: revised D2 */ composed = FALSE; /* not blocked: @@ -428,17 +415,18 @@ U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig) } } - d = uvuni_to_utf8(d, uvS); /* starter (composed or not) */ - if (dend < d) /* real end is dend + UTF8_MAXLEN */ - croak(ErrLongerThanSrc, "compose"); + /* output */ + { + Renew_d_if_not_enough_to(UTF8_MAXLEN) + d = uvuni_to_utf8(d, uvS); /* starter (composed or not) */ + } if (cc_pos) { STRLEN i; for (i = 0; i < cc_pos; i++) { + Renew_d_if_not_enough_to(UTF8_MAXLEN) d = uvuni_to_utf8(d, seq_ptr[i]); - if (dend < d) /* real end is dend + UTF8_MAXLEN */ - croak(ErrLongerThanSrc, "compose"); } cc_pos = 0; } @@ -447,6 +435,7 @@ U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig) } if (seq_ext) Safefree(seq_ext); + *dp = dstart; return d; } @@ -474,6 +463,7 @@ decompose(src, compat = &PL_sv_no) OUTPUT: RETVAL + SV* reorder(src) SV * src @@ -485,16 +475,17 @@ reorder(src) CODE: s = (U8*)sv_2pvunicode(src,&slen); dst = newSVpvn("", 0); - dlen = slen + UTF8_MAXLEN; - d = (U8*)SvGROW(dst,dlen+1); + dlen = slen; + New(0, d, dlen+1, U8); + dend = pv_utf8_reorder(s, slen, &d, dlen); + sv_setpvn(dst, (char *)d, dend - d); SvUTF8_on(dst); - dend = pv_utf8_reorder(s, slen, d, dlen); - *dend = '\0'; - SvCUR_set(dst, dend - d); + Safefree(d); RETVAL = dst; OUTPUT: RETVAL + SV* compose(src) SV * src @@ -508,16 +499,17 @@ compose(src) CODE: s = (U8*)sv_2pvunicode(src,&slen); dst = newSVpvn("", 0); - dlen = slen + UTF8_MAXLEN; - d = (U8*)SvGROW(dst,dlen+1); + dlen = slen; + New(0, d, dlen+1, U8); + dend = pv_utf8_compose(s, slen, &d, dlen, (bool)ix); + sv_setpvn(dst, (char *)d, dend - d); SvUTF8_on(dst); - dend = pv_utf8_compose(s, slen, d, dlen, (bool)ix); - *dend = '\0'; - SvCUR_set(dst, dend - d); + Safefree(d); RETVAL = dst; OUTPUT: RETVAL + SV* NFD(src) SV * src @@ -529,29 +521,34 @@ NFD(src) U8 *s, *t, *tend, *d, *dend; STRLEN slen, tlen, dlen; CODE: - /* decompose */ s = (U8*)sv_2pvunicode(src,&slen); + + /* decompose */ tlen = slen; New(0, t, tlen+1, U8); - tend = pv_utf8_decompose(s, slen, &t, tlen, (bool)ix); + tend = pv_utf8_decompose(s, slen, &t, tlen, (bool)(ix==1)); *tend = '\0'; - tlen = tend - t; /* no longer know real tlen */ + tlen = tend - t; /* no longer know real size of t */ /* reorder */ - dst = newSVpvn("", 0); - dlen = tlen + UTF8_MAXLEN; - d = (U8*)SvGROW(dst,dlen+1); - SvUTF8_on(dst); - dend = pv_utf8_reorder(t, tlen, d, dlen); + dlen = tlen; + New(0, d, dlen+1, U8); + dend = pv_utf8_reorder(t, tlen, &d, dlen); *dend = '\0'; - SvCUR_set(dst, dend - d); + dlen = dend - d; /* no longer know real size of d */ /* return */ + dst = newSVpvn("", 0); + sv_setpvn(dst, (char *)d, dlen); + SvUTF8_on(dst); + Safefree(t); + Safefree(d); RETVAL = dst; OUTPUT: RETVAL + SV* NFC(src) SV * src @@ -564,37 +561,42 @@ NFC(src) U8 *s, *t, *tend, *u, *uend, *d, *dend; STRLEN slen, tlen, ulen, dlen; CODE: - /* decompose */ s = (U8*)sv_2pvunicode(src,&slen); + + /* decompose */ tlen = slen; New(0, t, tlen+1, U8); tend = pv_utf8_decompose(s, slen, &t, tlen, (bool)(ix==1)); *tend = '\0'; - tlen = tend - t; /* no longer know real tlen */ + tlen = tend - t; /* no longer know real size of t */ /* reorder */ - ulen = tlen + UTF8_MAXLEN; + ulen = tlen; New(0, u, ulen+1, U8); - uend = pv_utf8_reorder(t, tlen, u, ulen); + uend = pv_utf8_reorder(t, tlen, &u, ulen); *uend = '\0'; - ulen = uend - u; + ulen = uend - u; /* no longer know real size of u */ /* compose */ - dst = newSVpvn("", 0); - dlen = ulen + UTF8_MAXLEN; - d = (U8*)SvGROW(dst,dlen+1); - SvUTF8_on(dst); - dend = pv_utf8_compose(u, ulen, d, dlen, (bool)(ix==2)); + dlen = ulen; + New(0, d, dlen+1, U8); + dend = pv_utf8_compose(u, ulen, &d, dlen, (bool)(ix==2)); *dend = '\0'; - SvCUR_set(dst, dend - d); + dlen = dend - d; /* no longer know real size of d */ /* return */ + dst = newSVpvn("", 0); + sv_setpvn(dst, (char *)d, dlen); + SvUTF8_on(dst); + Safefree(t); Safefree(u); + Safefree(d); RETVAL = dst; OUTPUT: RETVAL + SV* checkNFD(src) SV * src diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/README b/gnu/usr.bin/perl/cpan/Unicode-Normalize/README index 8a5390cc789..958df4e1bd8 100644 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/README +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/README @@ -1,4 +1,4 @@ -Unicode/Normalize version 1.03 +Unicode/Normalize version 1.14 =================================== Unicode::Normalize - Unicode Normalization Forms @@ -62,7 +62,6 @@ which are included in recent perl core distributions. - unicore/CombiningClass.pl (or unicode/CombiningClass.pl) - unicore/Decomposition.pl (or unicode/Decomposition.pl) -- unicore/CompositionExclusions.txt (or unicode/CompExcl.txt) NOTES @@ -83,7 +82,7 @@ COPYRIGHT AND LICENSE SADAHIRO Tomoyuki <SADAHIRO@cpan.org> -Copyright(C) 2001-2007, SADAHIRO Tomoyuki. Japan. All rights reserved. +Copyright(C) 2001-2012, SADAHIRO Tomoyuki. Japan. All rights reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader b/gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader index b3e3c3153cb..c694b7e9ebf 100644 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/mkheader @@ -9,7 +9,6 @@ # Input files: # unicore/CombiningClass.pl (or unicode/CombiningClass.pl) # unicore/Decomposition.pl (or unicode/Decomposition.pl) -# unicore/CompositionExclusions.txt (or unicode/CompExcl.txt) # # Output files: # unfcan.h @@ -32,30 +31,48 @@ BEGIN { our $PACKAGE = 'Unicode::Normalize, mkheader'; -our $Combin = do "unicore/CombiningClass.pl" - || do "unicode/CombiningClass.pl" - || croak "$PACKAGE: CombiningClass.pl not found"; +our $prefix = "UNF_"; +our $structname = "${prefix}complist"; -our $Decomp = do "unicore/Decomposition.pl" - || do "unicode/Decomposition.pl" - || croak "$PACKAGE: Decomposition.pl not found"; +sub pack_U { + return pack('U*', @_); +} + +# %Canon and %Compat will be ($codepoint => $hexstring) after _U_stringify() +our %Comp1st; # $codepoint => $listname : may be composed with a next char. +our %CompList; # $listname,$2nd => $codepoint : composite + +##### The below part is common to mkheader and PP ##### our %Combin; # $codepoint => $number : combination class our %Canon; # $codepoint => \@codepoints : canonical decomp. our %Compat; # $codepoint => \@codepoints : compat. decomp. -# after _U_stringify(), ($codepoint => $hexstring) for %Canon and %Compat +our %Compos; # $1st,$2nd => $codepoint : composite our %Exclus; # $codepoint => 1 : composition exclusions our %Single; # $codepoint => 1 : singletons our %NonStD; # $codepoint => 1 : non-starter decompositions - -our %Comp1st; # $codepoint => $listname : may be composed with a next char. our %Comp2nd; # $codepoint => 1 : may be composed with a prev char. -our %CompList; # $listname,$2nd => $codepoint : composite -our $prefix = "UNF_"; -our $structname = "${prefix}complist"; +# from core Unicode database +our $Combin = do "unicore/CombiningClass.pl" + || do "unicode/CombiningClass.pl" + || croak "$PACKAGE: CombiningClass.pl not found"; +our $Decomp = do "unicore/Decomposition.pl" + || do "unicode/Decomposition.pl" + || croak "$PACKAGE: Decomposition.pl not found"; -########## definition of Hangul constants ########## +# CompositionExclusions.txt since Unicode 3.2.0 +our @CompEx = qw( + 0958 0959 095A 095B 095C 095D 095E 095F 09DC 09DD 09DF 0A33 0A36 + 0A59 0A5A 0A5B 0A5E 0B5C 0B5D 0F43 0F4D 0F52 0F57 0F5C 0F69 0F76 + 0F78 0F93 0F9D 0FA2 0FA7 0FAC 0FB9 FB1D FB1F FB2A FB2B FB2C FB2D + FB2E FB2F FB30 FB31 FB32 FB33 FB34 FB35 FB36 FB38 FB39 FB3A FB3B + FB3C FB3E FB40 FB41 FB43 FB44 FB46 FB47 FB48 FB49 FB4A FB4B FB4C + FB4D FB4E 2ADC 1D15E 1D15F 1D160 1D161 1D162 1D163 1D164 1D1BB + 1D1BC 1D1BD 1D1BE 1D1BF 1D1C0 +); + +# definition of Hangul constants use constant SBase => 0xAC00; use constant SFinal => 0xD7A3; # SBase -1 + SCount use constant SCount => 11172; # LCount * NCount @@ -71,67 +88,23 @@ use constant TFinal => 0x11C2; use constant TCount => 28; sub decomposeHangul { - my $SIndex = $_[0] - SBase; - my $LIndex = int( $SIndex / NCount); - my $VIndex = int(($SIndex % NCount) / TCount); - my $TIndex = $SIndex % TCount; + my $sindex = $_[0] - SBase; + my $lindex = int( $sindex / NCount); + my $vindex = int(($sindex % NCount) / TCount); + my $tindex = $sindex % TCount; my @ret = ( - LBase + $LIndex, - VBase + $VIndex, - $TIndex ? (TBase + $TIndex) : (), + LBase + $lindex, + VBase + $vindex, + $tindex ? (TBase + $tindex) : (), ); - return @ret; -} - -########## length of a character ########## - -sub utf8len { - my $uv = shift; - return $uv < 0x80 ? 1 : - $uv < 0x800 ? 2 : - $uv < 0x10000 ? 3 : - $uv < 0x110000 ? 4 : - croak "$PACKAGE: illegal char in the composite. codepoint max is 0x10ffff."; + return wantarray ? @ret : pack_U(@ret); } -sub utfelen { - my $uv = shift; - return $uv < 0xA0 ? 1 : - $uv < 0x400 ? 2 : - $uv < 0x4000 ? 3 : - $uv < 0x40000 ? 4 : - $uv < 0x110000 ? 5 : - croak "$PACKAGE: illegal char in the composite. codepoint max is 0x10ffff."; -} - -my $errExpand = "$PACKAGE: Composition to U+%04X (from U+%04X and U+%04X) " . - "needs growing the string in %s! Quit. Please inform the author..."; - -########## getting full decomposion ########## -{ - my($f, $fh); - foreach my $d (@INC) { - $f = File::Spec->catfile($d, "unicore", "CompositionExclusions.txt"); - last if open($fh, $f); - $f = File::Spec->catfile($d, "unicode", "CompExcl.txt"); - last if open($fh, $f); - $f = undef; - } - croak "$PACKAGE: neither unicore/CompositionExclusions.txt " - . "nor unicode/CompExcl.txt is found in @INC" unless defined $f; +########## getting full decomposition ########## - while (<$fh>) { - next if /^#/ or /^$/; - s/#.*//; - $Exclus{ hex($1) } = 1 if /([0-9A-Fa-f]+)/; - } - close $fh; -} - -## ## converts string "hhhh hhhh hhhh" to a numeric list -## -sub _getHexArray { map hex, $_[0] =~ /([0-9A-Fa-f]+)/g } +## (hex digits separated by spaces) +sub _getHexArray { map hex, $_[0] =~ /\G *([0-9A-Fa-f]+)/g } while ($Combin =~ /(.+)/g) { my @tab = split /\t/, $1; @@ -147,43 +120,38 @@ while ($Decomp =~ /(.+)/g) { my @tab = split /\t/, $1; my $compat = $tab[2] =~ s/<[^>]+>//; my $dec = [ _getHexArray($tab[2]) ]; # decomposition - my $ini = hex($tab[0]); + my $ini = hex($tab[0]); # initial decomposable character my $end = $tab[1] eq '' ? $ini : hex($tab[1]); # ($ini .. $end) is the range of decomposable characters. - my $listname = - @$dec == 2 ? sprintf("${structname}_%06x", $dec->[0]) : 'USELESS'; - # %04x is bad since it'd place _3046 after _1d157. - foreach my $u ($ini .. $end) { $Compat{$u} = $dec; + $Canon{$u} = $dec if ! $compat; + } +} - if (! $compat) { - $Canon{$u} = $dec; - - if (@$dec == 2) { - if (utf8len($dec->[0]) + utf8len($dec->[1]) < utf8len($u)) { - croak sprintf $errExpand, $u, $dec->[0], $dec->[1], - "utf-8"; - } - if (utfelen($dec->[0]) + utfelen($dec->[1]) < utfelen($u)) { - croak sprintf $errExpand, $u, $dec->[0], $dec->[1], - "utf-ebcdic"; - } - - if ($Combin{ $dec->[0] }) { - $NonStD{$u} = 1; - } else { - $CompList{ $listname }{ $dec->[1] } = $u; - $Comp1st{ $dec->[0] } = $listname; - $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u}; - } - } elsif (@$dec == 1) { - $Single{$u} = 1; - } else { - croak("Weird Canonical Decomposition of U+$tab[0]"); - } +for my $s (@CompEx) { + my $u = hex $s; + next if !$Canon{$u}; # not assigned + next if $u == 0xFB1D && !$Canon{0x1D15E}; # 3.0.1 before Corrigendum #2 + $Exclus{$u} = 1; +} + +foreach my $u (keys %Canon) { + my $dec = $Canon{$u}; + + if (@$dec == 2) { + if ($Combin{ $dec->[0] }) { + $NonStD{$u} = 1; + } else { + $Compos{ $dec->[0] }{ $dec->[1] } = $u; + $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u}; } + } elsif (@$dec == 1) { + $Single{$u} = 1; + } else { + my $h = sprintf '%04X', $u; + croak("Weird Canonical Decomposition of U+$h"); } } @@ -222,8 +190,18 @@ foreach my $key (keys %Compat) { $Compat{$key} = [ getCompatList($key) ]; } -sub _pack_U { - return pack('U*', @_); +##### The above part is common to mkheader and PP ##### + +foreach my $comp1st (keys %Compos) { + my $listname = sprintf("${structname}_%06x", $comp1st); + # %04x is bad since it'd place _3046 after _1d157. + $Comp1st{$comp1st} = $listname; + my $rh1st = $Compos{$comp1st}; + + foreach my $comp2nd (keys %$rh1st) { + my $uc = $rh1st->{$comp2nd}; + $CompList{$listname}{$comp2nd} = $uc; + } } sub split_into_char { @@ -239,7 +217,7 @@ sub split_into_char { sub _U_stringify { sprintf '"%s"', join '', - map sprintf("\\x%02x", $_), split_into_char(_pack_U(@_)); + map sprintf("\\x%02x", $_), split_into_char(pack_U(@_)); } foreach my $hash (\%Canon, \%Compat) { diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t index 1cc0db181ce..e62c4ea18ce 100755 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t @@ -16,12 +16,22 @@ BEGIN { ######################### -use Test; use strict; use warnings; -BEGIN { plan tests => 70 }; +BEGIN { $| = 1; print "1..70\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + use Unicode::Normalize qw(:all); -ok(1); # If we made it this far, we're ok. + +ok(1); sub _pack_U { Unicode::Normalize::pack_U(@_) } sub hexU { _pack_U map hex, split ' ', shift } @@ -39,6 +49,8 @@ ok(normalize('FCC', ""), ""); ok(normalize('FCC', "A"), "A"); ok(normalize('FCD', "A"), "A"); +# 9 + # if checkFCD is YES, the return value from FCD should be same as the original ok(FCD(hexU("00C5")), hexU("00C5")); # A with ring above ok(FCD(hexU("0041 030A")), hexU("0041 030A")); # A+ring @@ -52,6 +64,8 @@ ok(normalize('FCD', hexU("0041 0327 030A")), hexU("0041 0327 030A")); ok(normalize('FCD', hexU("AC01 1100 1161")), hexU("AC01 1100 1161")); ok(normalize('FCD', hexU("212B F900")), hexU("212B F900")); +# 19 + # if checkFCD is MAYBE or NO, FCD returns NFD (this behavior isn't documented) ok(FCD(hexU("00C5 0327")), hexU("0041 0327 030A")); ok(FCD(hexU("0041 030A 0327")), hexU("0041 0327 030A")); @@ -63,6 +77,8 @@ ok(normalize('FCD', hexU("0041 030A 0327")), hexU("0041 0327 030A")); ok(normalize('FCD', hexU("00C5 0327")), NFD(hexU("00C5 0327"))); ok(normalize('FCD', hexU("0041 030A 0327")), NFD(hexU("0041 030A 0327"))); +# 27 + ok(answer(checkFCD('')), 'YES'); ok(answer(checkFCD('A')), 'YES'); ok(answer(checkFCD("\x{030A}")), 'YES'); # 030A;COMBINING RING ABOVE @@ -83,6 +99,8 @@ ok(answer(checkFCC(hexU("1EA7 05AE 0315 0062"))), "NO"); ok(answer(check('FCD', hexU("1EA7 05AE 0315 0062"))), "NO"); ok(answer(check('FCC', hexU("1EA7 05AE 0315 0062"))), "NO"); +# 45 + ok(FCC(hexU("00C5 0327")), hexU("0041 0327 030A")); ok(FCC(hexU("0045 0304 0300")), "\x{1E14}"); ok(FCC("\x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}"), "\x{AC00}\x{AE00}"); @@ -97,6 +115,8 @@ ok(FCC("\x{1100}\x{1161}\x{0300}"), "\x{AC00}\x{0300}"); ok(FCC("\x{0B47}\x{300}\x{0B3E}\x{327}"), "\x{0B47}\x{300}\x{0B3E}\x{327}"); ok(FCC("\x{1100}\x{300}\x{1161}\x{327}"), "\x{1100}\x{300}\x{1161}\x{327}"); +# 57 + ok(answer(checkFCC('')), 'YES'); ok(answer(checkFCC('A')), 'YES'); ok(answer(checkFCC("\x{030A}")), 'MAYBE'); # 030A;COMBINING RING ABOVE @@ -111,3 +131,5 @@ ok(answer(checkFCC("\x{212B}\x{F900}")), 'NO'); # compat ok(answer(checkFCC("\x{212B}\x{0327}")), 'NO'); # compat ok(answer(checkFCC("\x{0327}\x{212B}")), 'NO'); # compat +# 70 + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t index 27cd177596f..c9b424906d1 100755 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t @@ -16,12 +16,22 @@ BEGIN { ######################### -use Test; use strict; use warnings; -BEGIN { plan tests => 37 }; +BEGIN { $| = 1; print "1..37\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + use Unicode::Normalize qw(:all); -ok(1); # If we made it this far, we're ok. + +ok(1); sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" } diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t index 76ced03ea1a..d8d8d952e0a 100755 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t @@ -16,12 +16,22 @@ BEGIN { ######################### -use Test; use strict; use warnings; -BEGIN { plan tests => 211 }; +BEGIN { $| = 1; print "1..217\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + use Unicode::Normalize qw(:all); -ok(1); # If we made it this far, we're ok. + +ok(1); sub _pack_U { Unicode::Normalize::pack_U(@_) } sub hexU { _pack_U map hex, split ' ', shift } @@ -49,6 +59,8 @@ ok(getCanon(0x212C), undef); ok(getCanon(0x3243), undef); ok(getCanon(0xFA2D), _pack_U(0x9DB4)); +# 20 + ok(getCompat( 0), undef); ok(getCompat(0x29), undef); ok(getCompat(0x41), undef); @@ -64,6 +76,8 @@ ok(getCompat(0xAC00), _pack_U(0x1100, 0x1161)); ok(getCompat(0xAE00), _pack_U(0x1100, 0x1173, 0x11AF)); ok(getCompat(0xFA2D), _pack_U(0x9DB4)); +# 34 + ok(getComposite( 0, 0), undef); ok(getComposite( 0, 0x29), undef); ok(getComposite(0x29, 0), undef); @@ -84,6 +98,8 @@ ok(getComposite(0xAC00, 0x11A7), undef); ok(getComposite(0xAC00, 0x11A8), 0xAC01); ok(getComposite(0xADF8, 0x11AF), 0xAE00); +# 53 + sub uprops { my $uv = shift; my $r = ""; @@ -120,6 +136,8 @@ ok(uprops(0xF900), 'xSnFbDmCKyG'); # CJK COMPATIBILITY IDEOGRAPH-F900 ok(uprops(0xFB4E), 'XsnFbDmCKyG'); # HEBREW LETTER PE WITH RAFE ok(uprops(0xFF71), 'xsnfbdmcKyG'); # HALFWIDTH KATAKANA LETTER A +# 71 + ok(decompose(""), ""); ok(decompose("A"), "A"); ok(decompose("", 1), ""); @@ -138,6 +156,8 @@ my $sDec = "\x{FA19}"; ok(decompose($sDec), "\x{795E}"); ok($sDec, "\x{FA19}"); +# 83 + ok(reorder(""), ""); ok(reorder("A"), "A"); ok(reorder(hexU("0041 0300 0315 0313 031b 0061")), @@ -150,6 +170,8 @@ my $sReord = "\x{3000}\x{300}\x{31b}"; ok(reorder($sReord), "\x{3000}\x{31b}\x{300}"); ok($sReord, "\x{3000}\x{300}\x{31b}"); +# 89 + ok(compose(""), ""); ok(compose("A"), "A"); ok(compose(hexU("0061 0300")), hexU("00E0")); @@ -165,6 +187,8 @@ my $sCom = "\x{304B}\x{3099}"; ok(compose($sCom), "\x{304C}"); ok($sCom, "\x{304B}\x{3099}"); +# 100 + ok(composeContiguous(""), ""); ok(composeContiguous("A"), "A"); ok(composeContiguous(hexU("0061 0300")), hexU("00E0")); @@ -180,6 +204,8 @@ my $sCtg = "\x{30DB}\x{309A}"; ok(composeContiguous($sCtg), "\x{30DD}"); ok($sCtg, "\x{30DB}\x{309A}"); +# 111 + sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" } ok(answer(checkNFD("")), "YES"); @@ -220,6 +246,8 @@ ok(answer(checkNFKC(hexU("0041 0327 030A"))), "MAYBE"); # A+cedilla+ring ok(answer(checkNFKC(hexU("0041 030A 0327"))), "NO"); # A+ring+cedilla ok(answer(check("NFKC", hexU("20 C1 212B 300"))), "NO"); +# 145 + "012ABC" =~ /(\d+)(\w+)/; ok("012" eq NFC $1 && "ABC" eq NFC $2); @@ -230,6 +258,8 @@ ok(normalize('NFC', $1), "012"); ok(normalize('NFC', $2), "ABC"); # s/^NF// in normalize() must not prevent using $1, $&, etc. +# 150 + # a string with initial zero should be treated like a number # LATIN CAPITAL LETTER A WITH GRAVE @@ -276,6 +306,8 @@ ok(getCanon("044032"), _pack_U(0x1100, 0x1161)); ok(getCompat("044032"), _pack_U(0x1100, 0x1161)); ok(getComposite("04352", "04449"), 0xAC00); +# 182 + # string with 22 combining characters: (0x300..0x315) my $str_cc22 = _pack_U(0x3041, 0x300..0x315, 0x3042); ok(decompose($str_cc22), $str_cc22); @@ -289,6 +321,8 @@ ok(NFKC($str_cc22), $str_cc22); ok(FCD($str_cc22), $str_cc22); ok(FCC($str_cc22), $str_cc22); +# 192 + # string with 40 combining characters of the same class: (0x300..0x313)x2 my $str_cc40 = _pack_U(0x3041, 0x300..0x313, 0x300..0x313, 0x3042); ok(decompose($str_cc40), $str_cc40); @@ -302,6 +336,8 @@ ok(NFKC($str_cc40), $str_cc40); ok(FCD($str_cc40), $str_cc40); ok(FCC($str_cc40), $str_cc40); +# 202 + my $precomp = hexU("304C 304E 3050 3052 3054"); my $combseq = hexU("304B 3099 304D 3099 304F 3099 3051 3099 3053 3099"); ok(decompose($precomp x 5), $combseq x 5); @@ -319,4 +355,22 @@ ok(decompose($precomp . $notcomp), $combseq . $notcomp); ok(decompose($precomp . $notcomp x 5), $combseq . $notcomp x 5); ok(decompose($precomp . $notcomp x10), $combseq . $notcomp x10); +# 211 + +my $preUnicode3_1 = !defined getCanon(0x1D15E); +my $preUnicode3_2 = !defined getCanon(0x2ADC); + +# HEBREW LETTER YOD WITH HIRIQ +ok($preUnicode3_1 xor isExclusion(0xFB1D)); +ok($preUnicode3_1 xor isComp_Ex (0xFB1D)); + +# MUSICAL SYMBOL HALF NOTE +ok($preUnicode3_1 xor isExclusion(0x1D15E)); +ok($preUnicode3_1 xor isComp_Ex (0x1D15E)); + +# FORKING +ok($preUnicode3_2 xor isExclusion(0x2ADC)); +ok($preUnicode3_2 xor isComp_Ex (0x2ADC)); + +# 217 diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t index 976e5097a2c..bcd9517f6d7 100755 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t @@ -35,16 +35,25 @@ BEGIN { } } -use Test; use strict; use warnings; -BEGIN { plan tests => 112 }; +BEGIN { $| = 1; print "1..113\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + +ok(1); ######################### no warnings qw(utf8); -# To avoid warning in Test.pm, EXPR in ok(EXPR) must be boolean. for my $u (0xD800, 0xDFFF, 0xFDD0, 0xFDEF, 0xFEFF, 0xFFFE, 0xFFFF, 0x1FFFF, 0x10FFFF, 0x110000, 0x7FFFFFFF) diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t index 5d93747965a..1442c30000c 100755 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t @@ -16,12 +16,22 @@ BEGIN { ######################### -use Test; use strict; use warnings; -BEGIN { plan tests => 64 }; +BEGIN { $| = 1; print "1..64\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + use Unicode::Normalize qw(normalize); -ok(1); # If we made it this far, we're ok. + +ok(1); sub _pack_U { Unicode::Normalize::pack_U(@_) } sub _unpack_U { Unicode::Normalize::unpack_U(@_) } @@ -48,6 +58,8 @@ ok(normalize('NFC', "A"), "A"); ok(normalize('NFKD',"A"), "A"); ok(normalize('NFKC',"A"), "A"); +# 17 + # don't modify the source my $sNFD = "\x{FA19}"; ok(normalize('NFD', $sNFD), "\x{795E}"); @@ -65,6 +77,8 @@ my $sNFKC = "\x{FA26}"; ok(normalize('NFKC', $sNFKC), "\x{90FD}"); ok($sNFKC, "\x{FA26}"); +# 25 + sub hexNFC { join " ", map sprintf("%04X", $_), _unpack_U normalize 'C', _pack_U map hex, split ' ', shift; @@ -100,14 +114,14 @@ ok(hexNFC("AC00 11A9"), "AC02"); ok(hexNFC("AC00 11C2"), "AC1B"); ok(hexNFC("AC00 11C3"), "AC00 11C3"); +# 47 + # Test Cases from Public Review Issue #29: Normalization Issue # cf. http://www.unicode.org/review/pr-29.html ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E"); ok(hexNFC("1100 0300 1161"), "1100 0300 1161"); - ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300"); ok(hexNFC("1100 1161 0300"), "AC00 0300"); - ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327"); ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327"); @@ -123,3 +137,6 @@ ok(hexNFC("0315 0061 0300"), "0315 00E0"); ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0"); ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0"); ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0"); + +# 64 + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial1.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial1.t new file mode 100644 index 00000000000..56f2ca44274 --- /dev/null +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial1.t @@ -0,0 +1,117 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +BEGIN { + unless (5.006001 <= $]) { + print "1..0 # skipped: Perl 5.6.1 or later". + " needed for this test\n"; + exit; + } +} + +######################### + +use strict; +use warnings; +BEGIN { $| = 1; print "1..26\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + +use Unicode::Normalize qw(:all); + +ok(1); + +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _unpack_U { Unicode::Normalize::unpack_U(@_) } + +######################### + +sub arraynorm { + my $form = shift; + my @string = @_; + my $result = ""; + my $unproc = ""; + foreach my $str (@string) { + $unproc .= $str; + $result .= $form eq 'NFC' ? NFC_partial ($unproc) : + $form eq 'NFD' ? NFD_partial ($unproc) : + $form eq 'NFKC' ? NFKC_partial($unproc) : + $form eq 'NFKD' ? NFKD_partial($unproc) : + undef; + } + $result .= $unproc; + return $result; +} + +my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}"; +my $strC = "\x{3CE}\x{AC01}\x{AC03}"; +my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7)); +my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4)); +ok($strC eq NFC($strD)); +ok($strD eq join('', @str1)); +ok($strC eq arraynorm('NFC', @str1)); +ok($strD eq join('', @str2)); +ok($strC eq arraynorm('NFC', @str2)); + +my @strX = ("\x{300}\x{AC00}", "\x{11A8}"); +my $strX = "\x{300}\x{AC01}"; +ok($strX eq NFC(join('', @strX))); +ok($strX eq arraynorm('NFC', @strX)); +ok($strX eq NFKC(join('', @strX))); +ok($strX eq arraynorm('NFKC', @strX)); + +my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}"); +my $strY = ("\x{304C}\x{0323}\x{0308}"); +ok($strY eq NFC(join('', @strY))); +ok($strY eq arraynorm('NFC', @strY)); +ok($strY eq NFKC(join('', @strY))); +ok($strY eq arraynorm('NFKC', @strY)); + +my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}"); +my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}"); +ok($strZ eq NFD(join('', @strZ))); +ok($strZ eq arraynorm('NFD', @strZ)); +ok($strZ eq NFKD(join('', @strZ))); +ok($strZ eq arraynorm('NFKD', @strZ)); + +# 18 + +# must modify the source +my $sNFD = "\x{FA19}"; +ok(NFD_partial($sNFD), ""); +ok($sNFD, "\x{795E}"); + +my $sNFC = "\x{FA1B}"; +ok(NFC_partial($sNFC), ""); +ok($sNFC, "\x{798F}"); + +my $sNFKD = "\x{FA1E}"; +ok(NFKD_partial($sNFKD), ""); +ok($sNFKD, "\x{7FBD}"); + +my $sNFKC = "\x{FA26}"; +ok(NFKC_partial($sNFKC), ""); +ok($sNFKC, "\x{90FD}"); + +# 26 + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial2.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial2.t new file mode 100644 index 00000000000..4d824a44181 --- /dev/null +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/partial2.t @@ -0,0 +1,113 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +BEGIN { + unless (5.006001 <= $]) { + print "1..0 # skipped: Perl 5.6.1 or later". + " needed for this test\n"; + exit; + } +} + +######################### + +use strict; +use warnings; +BEGIN { $| = 1; print "1..26\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + +use Unicode::Normalize qw(:all); + +ok(1); + +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _unpack_U { Unicode::Normalize::unpack_U(@_) } + +######################### + +sub arraynorm { + my $form = shift; + my @string = @_; + my $result = ""; + my $unproc = ""; + foreach my $str (@string) { + $unproc .= $str; + $result .= normalize_partial($form, $unproc); + } + $result .= $unproc; + return $result; +} + +my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}"; +my $strC = "\x{3CE}\x{AC01}\x{AC03}"; +my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7)); +my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4)); +ok($strC eq NFC($strD)); +ok($strD eq join('', @str1)); +ok($strC eq arraynorm('NFC', @str1)); +ok($strD eq join('', @str2)); +ok($strC eq arraynorm('NFC', @str2)); + +my @strX = ("\x{300}\x{AC00}", "\x{11A8}"); +my $strX = "\x{300}\x{AC01}"; +ok($strX eq NFC(join('', @strX))); +ok($strX eq arraynorm('NFC', @strX)); +ok($strX eq NFKC(join('', @strX))); +ok($strX eq arraynorm('NFKC', @strX)); + +my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}"); +my $strY = ("\x{304C}\x{0323}\x{0308}"); +ok($strY eq NFC(join('', @strY))); +ok($strY eq arraynorm('NFC', @strY)); +ok($strY eq NFKC(join('', @strY))); +ok($strY eq arraynorm('NFKC', @strY)); + +my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}"); +my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}"); +ok($strZ eq NFD(join('', @strZ))); +ok($strZ eq arraynorm('NFD', @strZ)); +ok($strZ eq NFKD(join('', @strZ))); +ok($strZ eq arraynorm('NFKD', @strZ)); + +# 18 + +# must modify the source +my $sNFD = "\x{FA19}"; +ok(normalize_partial('NFD', $sNFD), ""); +ok($sNFD, "\x{795E}"); + +my $sNFC = "\x{FA1B}"; +ok(normalize_partial('NFC', $sNFC), ""); +ok($sNFC, "\x{798F}"); + +my $sNFKD = "\x{FA1E}"; +ok(normalize_partial('NFKD', $sNFKD), ""); +ok($sNFKD, "\x{7FBD}"); + +my $sNFKC = "\x{FA26}"; +ok(normalize_partial('NFKC', $sNFKC), ""); +ok($sNFKC, "\x{90FD}"); + +# 26 + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t index 3c4298d849a..714018a68e4 100755 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t @@ -16,12 +16,22 @@ BEGIN { ######################### -use Test; use strict; use warnings; -BEGIN { plan tests => 42 }; +BEGIN { $| = 1; print "1..48\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + use Unicode::Normalize qw(:all); -ok(1); # If we made it this far, we're ok. + +ok(1); ######################### @@ -35,6 +45,8 @@ ok(FCD "\x{30A}\x{327}" eq "\x{327}\x{30A}"); ok(FCC "\x{30A}\x{327}" eq "\x{327}\x{30A}"); ok(reorder "\x{30A}\x{327}" eq "\x{327}\x{30A}"); +# 9 + ok(prototype \&normalize,'$$'); ok(prototype \&NFD, '$'); ok(prototype \&NFC, '$'); @@ -56,6 +68,8 @@ ok(prototype \&reorder, '$'); ok(prototype \&compose, '$'); ok(prototype \&composeContiguous, '$'); +# 27 + ok(prototype \&getCanon, '$'); ok(prototype \&getCompat, '$'); ok(prototype \&getComposite, '$$'); @@ -65,11 +79,18 @@ ok(prototype \&isSingleton, '$'); ok(prototype \&isNonStDecomp, '$'); ok(prototype \&isComp2nd, '$'); ok(prototype \&isComp_Ex, '$'); - ok(prototype \&isNFD_NO, '$'); ok(prototype \&isNFC_NO, '$'); ok(prototype \&isNFC_MAYBE, '$'); ok(prototype \&isNFKD_NO, '$'); ok(prototype \&isNFKC_NO, '$'); ok(prototype \&isNFKC_MAYBE, '$'); +ok(prototype \&splitOnLastStarter, undef); +ok(prototype \&normalize_partial, '$$'); +ok(prototype \&NFD_partial, '$'); +ok(prototype \&NFC_partial, '$'); +ok(prototype \&NFKD_partial, '$'); +ok(prototype \&NFKC_partial, '$'); + +# 48 diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t index 6bf7ff66b1f..fe579cdd0d6 100755 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t @@ -24,12 +24,22 @@ BEGIN { ######################### -use Test; use strict; use warnings; -BEGIN { plan tests => 14 }; +BEGIN { $| = 1; print "1..34\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + use Unicode::Normalize qw(:all); -ok(1); # If we made it this far, we're ok. + +ok(1); sub _pack_U { Unicode::Normalize::pack_U(@_) } sub _unpack_U { Unicode::Normalize::unpack_U(@_) } @@ -40,14 +50,6 @@ our $proc; # before the last starter our $unproc; # the last starter and after # If string has no starter, entire string is set to $unproc. -# When you have $normalized string and $unnormalized string following, -# a simple concatenation -# C<$concat = $normalized . normalize($form, $unnormalized)> -# is wrong. Instead of it, like this: -# -# ($processed, $unprocessed) = splitOnLastStarter($normalized); -# $concat = $processed . normalize($form, $unprocessed.$unnormalized); - ($proc, $unproc) = splitOnLastStarter(""); ok($proc, ""); ok($unproc, ""); @@ -79,3 +81,64 @@ ok(NFC($ka_grave.$dakuten) eq $ga_grave); ok(NFC($ka_grave).NFC($dakuten) ne $ga_grave); ok($concat eq $ga_grave); +# 14 + +sub arraynorm { + my $form = shift; + my @string = @_; + my $result = ""; + my $unproc = ""; + foreach my $str (@string) { + $unproc .= $str; + my $n = normalize($form, $unproc); + my($p, $u) = splitOnLastStarter($n); + $result .= $p; + $unproc = $u; + } + $result .= $unproc; + return $result; +} + +my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}"; +my $strC = "\x{3CE}\x{AC01}\x{AC03}"; +my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7)); +my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4)); +ok($strC eq NFC($strD)); +ok($strD eq join('', @str1)); +ok($strC eq arraynorm('NFC', @str1)); +ok($strD eq join('', @str2)); +ok($strC eq arraynorm('NFC', @str2)); + +my @strX = ("\x{300}\x{AC00}", "\x{11A8}"); +my $strX = "\x{300}\x{AC01}"; +ok($strX eq NFC(join('', @strX))); +ok($strX eq arraynorm('NFC', @strX)); +ok($strX eq NFKC(join('', @strX))); +ok($strX eq arraynorm('NFKC', @strX)); + +my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}"); +my $strY = ("\x{304C}\x{0323}\x{0308}"); +ok($strY eq NFC(join('', @strY))); +ok($strY eq arraynorm('NFC', @strY)); +ok($strY eq NFKC(join('', @strY))); +ok($strY eq arraynorm('NFKC', @strY)); + +my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}"); +my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}"); +ok($strZ eq NFD(join('', @strZ))); +ok($strZ eq arraynorm('NFD', @strZ)); +ok($strZ eq NFKD(join('', @strZ))); +ok($strZ eq arraynorm('NFKD', @strZ)); + +# 31 + +# don't modify the source + +my $source = "ABC"; +($proc, $unproc) = splitOnLastStarter($source); +ok($proc, "AB"); +ok($unproc, "C"); +ok($source, "ABC"); + +# 34 + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t index e48e16f1b9b..f69c695f106 100755 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t @@ -16,12 +16,22 @@ BEGIN { ######################### -use Test; use strict; use warnings; -BEGIN { plan tests => 58 }; +BEGIN { $| = 1; print "1..72\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + use Unicode::Normalize; -ok(1); # If we made it this far, we're ok. + +ok(1); sub _pack_U { Unicode::Normalize::pack_U(@_) } sub _unpack_U { Unicode::Normalize::unpack_U(@_) } @@ -38,6 +48,8 @@ ok(NFC("A"), "A"); ok(NFKD("A"), "A"); ok(NFKC("A"), "A"); +# 9 + # don't modify the source my $sNFD = "\x{FA19}"; ok(NFD($sNFD), "\x{795E}"); @@ -55,6 +67,7 @@ my $sNFKC = "\x{FA26}"; ok(NFKC($sNFKC), "\x{90FD}"); ok($sNFKC, "\x{FA26}"); +# 17 sub hexNFC { join " ", map sprintf("%04X", $_), @@ -91,6 +104,8 @@ ok(hexNFC("AC00 11A9"), "AC02"); ok(hexNFC("AC00 11C2"), "AC1B"); ok(hexNFC("AC00 11C3"), "AC00 11C3"); +# 39 + # Test Cases from Public Review Issue #29: Normalization Issue # cf. http://www.unicode.org/review/pr-29.html ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E"); @@ -113,13 +128,38 @@ ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0"); ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0"); ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0"); -# NFC() should be unary. +# 56 + +# NFC() and NFKC() should be unary. my $str11 = _pack_U(0x41, 0x0302, 0x0301, 0x62); my $str12 = _pack_U(0x1EA4, 0x62); ok(NFC $str11 eq $str12); +ok(NFKC $str11 eq $str12); -# NFD() should be unary. +# NFD() and NFKD() should be unary. my $str21 = _pack_U(0xE0, 0xAC00); my $str22 = _pack_U(0x61, 0x0300, 0x1100, 0x1161); ok(NFD $str21 eq $str22); +ok(NFKD $str21 eq $str22); + +# 60 + +## Bug #53197: NFKC("\x{2000}") produces... + +ok(NFKC("\x{2002}") eq ' '); +ok(NFKD("\x{2002}") eq ' '); +ok(NFKC("\x{2000}") eq ' '); +ok(NFKD("\x{2000}") eq ' '); + +ok(NFKC("\x{210C}") eq 'H'); +ok(NFKD("\x{210C}") eq 'H'); +ok(NFKC("\x{210D}") eq 'H'); +ok(NFKD("\x{210D}") eq 'H'); + +ok(NFC("\x{F907}") eq "\x{9F9C}"); +ok(NFD("\x{F907}") eq "\x{9F9C}"); +ok(NFKC("\x{F907}") eq "\x{9F9C}"); +ok(NFKD("\x{F907}") eq "\x{9F9C}"); + +# 72 diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t index c7214917266..be1712a5050 100755 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t @@ -27,10 +27,20 @@ BEGIN { } } -use Test; use strict; use warnings; -BEGIN { plan tests => 16 }; +BEGIN { $| = 1; print "1..17\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + +ok(1); package tiescalar; sub TIESCALAR { |