diff options
author | 2010-09-24 14:48:16 +0000 | |
---|---|---|
committer | 2010-09-24 14:48:16 +0000 | |
commit | b39c515898423c8d899e35282f4b395f7cad3298 (patch) | |
tree | 1112fe0668df8904d89127dcb37234a401c97c42 /gnu/usr.bin/perl/cpan/Unicode-Normalize/t | |
parent | use a better description for the vnd(4) entry on this file. (diff) | |
download | wireguard-openbsd-b39c515898423c8d899e35282f4b395f7cad3298.tar.xz wireguard-openbsd-b39c515898423c8d899e35282f4b395f7cad3298.zip |
Perl 5.12.2 from CPAN
Diffstat (limited to 'gnu/usr.bin/perl/cpan/Unicode-Normalize/t')
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t | 113 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t | 71 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t | 322 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t | 73 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t | 125 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Unicode-Normalize/t/null.t | 97 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t | 75 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t | 81 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t | 125 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t | 69 |
10 files changed, 1151 insertions, 0 deletions
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t new file mode 100755 index 00000000000..1cc0db181ce --- /dev/null +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/fcdc.t @@ -0,0 +1,113 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +######################### + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 70 }; +use Unicode::Normalize qw(:all); +ok(1); # If we made it this far, we're ok. + +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub hexU { _pack_U map hex, split ' ', shift } +sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" } + +######################### + +ok(FCD(''), ""); +ok(FCC(''), ""); +ok(FCD('A'), "A"); +ok(FCC('A'), "A"); + +ok(normalize('FCD', ""), ""); +ok(normalize('FCC', ""), ""); +ok(normalize('FCC', "A"), "A"); +ok(normalize('FCD', "A"), "A"); + +# if checkFCD is YES, the return value from FCD should be same as the original +ok(FCD(hexU("00C5")), hexU("00C5")); # A with ring above +ok(FCD(hexU("0041 030A")), hexU("0041 030A")); # A+ring +ok(FCD(hexU("0041 0327 030A")), hexU("0041 0327 030A")); # A+cedilla+ring +ok(FCD(hexU("AC01 1100 1161")), hexU("AC01 1100 1161")); # hangul +ok(FCD(hexU("212B F900")), hexU("212B F900")); # compat + +ok(normalize('FCD', hexU("00C5")), hexU("00C5")); +ok(normalize('FCD', hexU("0041 030A")), hexU("0041 030A")); +ok(normalize('FCD', hexU("0041 0327 030A")), hexU("0041 0327 030A")); +ok(normalize('FCD', hexU("AC01 1100 1161")), hexU("AC01 1100 1161")); +ok(normalize('FCD', hexU("212B F900")), hexU("212B F900")); + +# if checkFCD is MAYBE or NO, FCD returns NFD (this behavior isn't documented) +ok(FCD(hexU("00C5 0327")), hexU("0041 0327 030A")); +ok(FCD(hexU("0041 030A 0327")), hexU("0041 0327 030A")); +ok(FCD(hexU("00C5 0327")), NFD(hexU("00C5 0327"))); +ok(FCD(hexU("0041 030A 0327")), NFD(hexU("0041 030A 0327"))); + +ok(normalize('FCD', hexU("00C5 0327")), hexU("0041 0327 030A")); +ok(normalize('FCD', hexU("0041 030A 0327")), hexU("0041 0327 030A")); +ok(normalize('FCD', hexU("00C5 0327")), NFD(hexU("00C5 0327"))); +ok(normalize('FCD', hexU("0041 030A 0327")), NFD(hexU("0041 030A 0327"))); + +ok(answer(checkFCD('')), 'YES'); +ok(answer(checkFCD('A')), 'YES'); +ok(answer(checkFCD("\x{030A}")), 'YES'); # 030A;COMBINING RING ABOVE +ok(answer(checkFCD("\x{0327}")), 'YES'); # 0327;COMBINING CEDILLA +ok(answer(checkFCD(_pack_U(0x00C5))), 'YES'); # A with ring above +ok(answer(checkFCD(hexU("0041 030A"))), 'YES'); # A+ring +ok(answer(checkFCD(hexU("0041 0327 030A"))), 'YES'); # A+cedilla+ring +ok(answer(checkFCD(hexU("0041 030A 0327"))), 'NO'); # A+ring+cedilla +ok(answer(checkFCD(hexU("00C5 0327"))), 'NO'); # A-ring+cedilla +ok(answer(checkNFC(hexU("00C5 0327"))), 'MAYBE'); # NFC: A-ring+cedilla +ok(answer(check("FCD", hexU("00C5 0327"))), 'NO'); +ok(answer(check("NFC", hexU("00C5 0327"))), 'MAYBE'); +ok(answer(checkFCD("\x{AC01}\x{1100}\x{1161}")), 'YES'); # hangul +ok(answer(checkFCD("\x{212B}\x{F900}")), 'YES'); # compat + +ok(answer(checkFCD(hexU("1EA7 05AE 0315 0062"))), "NO"); +ok(answer(checkFCC(hexU("1EA7 05AE 0315 0062"))), "NO"); +ok(answer(check('FCD', hexU("1EA7 05AE 0315 0062"))), "NO"); +ok(answer(check('FCC', hexU("1EA7 05AE 0315 0062"))), "NO"); + +ok(FCC(hexU("00C5 0327")), hexU("0041 0327 030A")); +ok(FCC(hexU("0045 0304 0300")), "\x{1E14}"); +ok(FCC("\x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}"), "\x{AC00}\x{AE00}"); +ok(normalize('FCC', hexU("00C5 0327")), hexU("0041 0327 030A")); +ok(normalize('FCC', hexU("0045 0304 0300")), "\x{1E14}"); +ok(normalize('FCC', hexU("1100 1161 1100 1173 11AF")), "\x{AC00}\x{AE00}"); + +ok(FCC("\x{0B47}\x{0300}\x{0B3E}"), "\x{0B47}\x{0300}\x{0B3E}"); +ok(FCC("\x{1100}\x{0300}\x{1161}"), "\x{1100}\x{0300}\x{1161}"); +ok(FCC("\x{0B47}\x{0B3E}\x{0300}"), "\x{0B4B}\x{0300}"); +ok(FCC("\x{1100}\x{1161}\x{0300}"), "\x{AC00}\x{0300}"); +ok(FCC("\x{0B47}\x{300}\x{0B3E}\x{327}"), "\x{0B47}\x{300}\x{0B3E}\x{327}"); +ok(FCC("\x{1100}\x{300}\x{1161}\x{327}"), "\x{1100}\x{300}\x{1161}\x{327}"); + +ok(answer(checkFCC('')), 'YES'); +ok(answer(checkFCC('A')), 'YES'); +ok(answer(checkFCC("\x{030A}")), 'MAYBE'); # 030A;COMBINING RING ABOVE +ok(answer(checkFCC("\x{0327}")), 'MAYBE'); # 0327;COMBINING CEDILLA +ok(answer(checkFCC(hexU("00C5"))), 'YES'); # A with ring above +ok(answer(checkFCC(hexU("0041 030A"))), 'MAYBE'); # A+ring +ok(answer(checkFCC(hexU("0041 0327 030A"))), 'MAYBE'); # A+cedilla+ring +ok(answer(checkFCC(hexU("0041 030A 0327"))), 'NO'); # A+ring+cedilla +ok(answer(checkFCC(hexU("00C5 0327"))), 'NO'); # A-ring+cedilla +ok(answer(checkFCC("\x{AC01}\x{1100}\x{1161}")), 'MAYBE'); # hangul +ok(answer(checkFCC("\x{212B}\x{F900}")), 'NO'); # compat +ok(answer(checkFCC("\x{212B}\x{0327}")), 'NO'); # compat +ok(answer(checkFCC("\x{0327}\x{212B}")), 'NO'); # compat + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t new file mode 100755 index 00000000000..27cd177596f --- /dev/null +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/form.t @@ -0,0 +1,71 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +######################### + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 37 }; +use Unicode::Normalize qw(:all); +ok(1); # If we made it this far, we're ok. + +sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" } + +######################### + +ok(NFD ("\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}"); +ok(NFC ("\x{304C}\x{FF76}"), "\x{304C}\x{FF76}"); +ok(NFKD("\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}"); +ok(NFKC("\x{304C}\x{FF76}"), "\x{304C}\x{30AB}"); + +ok(answer(checkNFD ("\x{304C}")), "NO"); +ok(answer(checkNFC ("\x{304C}")), "YES"); +ok(answer(checkNFKD("\x{304C}")), "NO"); +ok(answer(checkNFKC("\x{304C}")), "YES"); +ok(answer(checkNFD ("\x{FF76}")), "YES"); +ok(answer(checkNFC ("\x{FF76}")), "YES"); +ok(answer(checkNFKD("\x{FF76}")), "NO"); +ok(answer(checkNFKC("\x{FF76}")), "NO"); + +ok(normalize('D', "\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}"); +ok(normalize('C', "\x{304C}\x{FF76}"), "\x{304C}\x{FF76}"); +ok(normalize('KD',"\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}"); +ok(normalize('KC',"\x{304C}\x{FF76}"), "\x{304C}\x{30AB}"); + +ok(answer(check('D', "\x{304C}")), "NO"); +ok(answer(check('C', "\x{304C}")), "YES"); +ok(answer(check('KD',"\x{304C}")), "NO"); +ok(answer(check('KC',"\x{304C}")), "YES"); +ok(answer(check('D' ,"\x{FF76}")), "YES"); +ok(answer(check('C' ,"\x{FF76}")), "YES"); +ok(answer(check('KD',"\x{FF76}")), "NO"); +ok(answer(check('KC',"\x{FF76}")), "NO"); + +ok(normalize('NFD', "\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}"); +ok(normalize('NFC', "\x{304C}\x{FF76}"), "\x{304C}\x{FF76}"); +ok(normalize('NFKD',"\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}"); +ok(normalize('NFKC',"\x{304C}\x{FF76}"), "\x{304C}\x{30AB}"); + +ok(answer(check('NFD', "\x{304C}")), "NO"); +ok(answer(check('NFC', "\x{304C}")), "YES"); +ok(answer(check('NFKD',"\x{304C}")), "NO"); +ok(answer(check('NFKC',"\x{304C}")), "YES"); +ok(answer(check('NFD' ,"\x{FF76}")), "YES"); +ok(answer(check('NFC' ,"\x{FF76}")), "YES"); +ok(answer(check('NFKD',"\x{FF76}")), "NO"); +ok(answer(check('NFKC',"\x{FF76}")), "NO"); + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t new file mode 100755 index 00000000000..76ced03ea1a --- /dev/null +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/func.t @@ -0,0 +1,322 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +######################### + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 211 }; +use Unicode::Normalize qw(:all); +ok(1); # If we made it this far, we're ok. + +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub hexU { _pack_U map hex, split ' ', shift } + +######################### + +ok(getCombinClass( 0), 0); +ok(getCombinClass( 41), 0); +ok(getCombinClass( 65), 0); +ok(getCombinClass( 768), 230); +ok(getCombinClass(1809), 36); + +ok(getCanon( 0), undef); +ok(getCanon(0x29), undef); +ok(getCanon(0x41), undef); +ok(getCanon(0x00C0), _pack_U(0x0041, 0x0300)); +ok(getCanon(0x00EF), _pack_U(0x0069, 0x0308)); +ok(getCanon(0x304C), _pack_U(0x304B, 0x3099)); +ok(getCanon(0x1EA4), _pack_U(0x0041, 0x0302, 0x0301)); +ok(getCanon(0x1F82), _pack_U(0x03B1, 0x0313, 0x0300, 0x0345)); +ok(getCanon(0x1FAF), _pack_U(0x03A9, 0x0314, 0x0342, 0x0345)); +ok(getCanon(0xAC00), _pack_U(0x1100, 0x1161)); +ok(getCanon(0xAE00), _pack_U(0x1100, 0x1173, 0x11AF)); +ok(getCanon(0x212C), undef); +ok(getCanon(0x3243), undef); +ok(getCanon(0xFA2D), _pack_U(0x9DB4)); + +ok(getCompat( 0), undef); +ok(getCompat(0x29), undef); +ok(getCompat(0x41), undef); +ok(getCompat(0x00C0), _pack_U(0x0041, 0x0300)); +ok(getCompat(0x00EF), _pack_U(0x0069, 0x0308)); +ok(getCompat(0x304C), _pack_U(0x304B, 0x3099)); +ok(getCompat(0x1EA4), _pack_U(0x0041, 0x0302, 0x0301)); +ok(getCompat(0x1F82), _pack_U(0x03B1, 0x0313, 0x0300, 0x0345)); +ok(getCompat(0x1FAF), _pack_U(0x03A9, 0x0314, 0x0342, 0x0345)); +ok(getCompat(0x212C), _pack_U(0x0042)); +ok(getCompat(0x3243), _pack_U(0x0028, 0x81F3, 0x0029)); +ok(getCompat(0xAC00), _pack_U(0x1100, 0x1161)); +ok(getCompat(0xAE00), _pack_U(0x1100, 0x1173, 0x11AF)); +ok(getCompat(0xFA2D), _pack_U(0x9DB4)); + +ok(getComposite( 0, 0), undef); +ok(getComposite( 0, 0x29), undef); +ok(getComposite(0x29, 0), undef); +ok(getComposite(0x29, 0x29), undef); +ok(getComposite( 0, 0x41), undef); +ok(getComposite(0x41, 0), undef); +ok(getComposite(0x41, 0x41), undef); +ok(getComposite(12, 0x0300), undef); +ok(getComposite(0x0055, 0xFF00), undef); +ok(getComposite(0x0041, 0x0300), 0x00C0); +ok(getComposite(0x0055, 0x0300), 0x00D9); +ok(getComposite(0x0112, 0x0300), 0x1E14); +ok(getComposite(0x1100, 0x1161), 0xAC00); +ok(getComposite(0x1100, 0x1173), 0xADF8); +ok(getComposite(0x1100, 0x11AF), undef); +ok(getComposite(0x1173, 0x11AF), undef); +ok(getComposite(0xAC00, 0x11A7), undef); +ok(getComposite(0xAC00, 0x11A8), 0xAC01); +ok(getComposite(0xADF8, 0x11AF), 0xAE00); + +sub uprops { + my $uv = shift; + my $r = ""; + $r .= isExclusion($uv) ? 'X' : 'x'; + $r .= isSingleton($uv) ? 'S' : 's'; + $r .= isNonStDecomp($uv) ? 'N' : 'n'; # Non-Starter Decomposition + $r .= isComp_Ex($uv) ? 'F' : 'f'; # Full exclusion (X + S + N) + $r .= isComp2nd($uv) ? 'B' : 'b'; # B = M = Y + $r .= isNFD_NO($uv) ? 'D' : 'd'; + $r .= isNFC_MAYBE($uv) ? 'M' : 'm'; # Maybe + $r .= isNFC_NO($uv) ? 'C' : 'c'; + $r .= isNFKD_NO($uv) ? 'K' : 'k'; + $r .= isNFKC_MAYBE($uv) ? 'Y' : 'y'; # maYbe + $r .= isNFKC_NO($uv) ? 'G' : 'g'; + return $r; +} + +ok(uprops(0x0000), 'xsnfbdmckyg'); # NULL +ok(uprops(0x0029), 'xsnfbdmckyg'); # RIGHT PARENTHESIS +ok(uprops(0x0041), 'xsnfbdmckyg'); # LATIN CAPITAL LETTER A +ok(uprops(0x00A0), 'xsnfbdmcKyG'); # NO-BREAK SPACE +ok(uprops(0x00C0), 'xsnfbDmcKyg'); # LATIN CAPITAL LETTER A WITH GRAVE +ok(uprops(0x0300), 'xsnfBdMckYg'); # COMBINING GRAVE ACCENT +ok(uprops(0x0344), 'xsNFbDmCKyG'); # COMBINING GREEK DIALYTIKA TONOS +ok(uprops(0x0387), 'xSnFbDmCKyG'); # GREEK ANO TELEIA +ok(uprops(0x0958), 'XsnFbDmCKyG'); # DEVANAGARI LETTER QA +ok(uprops(0x0F43), 'XsnFbDmCKyG'); # TIBETAN LETTER GHA +ok(uprops(0x1100), 'xsnfbdmckyg'); # HANGUL CHOSEONG KIYEOK +ok(uprops(0x1161), 'xsnfBdMckYg'); # HANGUL JUNGSEONG A +ok(uprops(0x11AF), 'xsnfBdMckYg'); # HANGUL JONGSEONG RIEUL +ok(uprops(0x212B), 'xSnFbDmCKyG'); # ANGSTROM SIGN +ok(uprops(0xAC00), 'xsnfbDmcKyg'); # HANGUL SYLLABLE GA +ok(uprops(0xF900), 'xSnFbDmCKyG'); # CJK COMPATIBILITY IDEOGRAPH-F900 +ok(uprops(0xFB4E), 'XsnFbDmCKyG'); # HEBREW LETTER PE WITH RAFE +ok(uprops(0xFF71), 'xsnfbdmcKyG'); # HALFWIDTH KATAKANA LETTER A + +ok(decompose(""), ""); +ok(decompose("A"), "A"); +ok(decompose("", 1), ""); +ok(decompose("A", 1), "A"); + +ok(decompose(hexU("1E14 AC01")), hexU("0045 0304 0300 1100 1161 11A8")); +ok(decompose(hexU("AC00 AE00")), hexU("1100 1161 1100 1173 11AF")); +ok(decompose(hexU("304C FF76")), hexU("304B 3099 FF76")); + +ok(decompose(hexU("1E14 AC01"), 1), hexU("0045 0304 0300 1100 1161 11A8")); +ok(decompose(hexU("AC00 AE00"), 1), hexU("1100 1161 1100 1173 11AF")); +ok(decompose(hexU("304C FF76"), 1), hexU("304B 3099 30AB")); + +# don't modify the source +my $sDec = "\x{FA19}"; +ok(decompose($sDec), "\x{795E}"); +ok($sDec, "\x{FA19}"); + +ok(reorder(""), ""); +ok(reorder("A"), "A"); +ok(reorder(hexU("0041 0300 0315 0313 031b 0061")), + hexU("0041 031b 0300 0313 0315 0061")); +ok(reorder(hexU("00C1 0300 0315 0313 031b 0061 309A 3099")), + hexU("00C1 031b 0300 0313 0315 0061 309A 3099")); + +# don't modify the source +my $sReord = "\x{3000}\x{300}\x{31b}"; +ok(reorder($sReord), "\x{3000}\x{31b}\x{300}"); +ok($sReord, "\x{3000}\x{300}\x{31b}"); + +ok(compose(""), ""); +ok(compose("A"), "A"); +ok(compose(hexU("0061 0300")), hexU("00E0")); +ok(compose(hexU("0061 0300 031B")), hexU("00E0 031B")); +ok(compose(hexU("0061 0300 0315")), hexU("00E0 0315")); +ok(compose(hexU("0061 0300 0313")), hexU("00E0 0313")); +ok(compose(hexU("0061 031B 0300")), hexU("00E0 031B")); +ok(compose(hexU("0061 0315 0300")), hexU("0061 0315 0300")); +ok(compose(hexU("0061 0313 0300")), hexU("0061 0313 0300")); + +# don't modify the source +my $sCom = "\x{304B}\x{3099}"; +ok(compose($sCom), "\x{304C}"); +ok($sCom, "\x{304B}\x{3099}"); + +ok(composeContiguous(""), ""); +ok(composeContiguous("A"), "A"); +ok(composeContiguous(hexU("0061 0300")), hexU("00E0")); +ok(composeContiguous(hexU("0061 0300 031B")), hexU("00E0 031B")); +ok(composeContiguous(hexU("0061 0300 0315")), hexU("00E0 0315")); +ok(composeContiguous(hexU("0061 0300 0313")), hexU("00E0 0313")); +ok(composeContiguous(hexU("0061 031B 0300")), hexU("0061 031B 0300")); +ok(composeContiguous(hexU("0061 0315 0300")), hexU("0061 0315 0300")); +ok(composeContiguous(hexU("0061 0313 0300")), hexU("0061 0313 0300")); + +# don't modify the source +my $sCtg = "\x{30DB}\x{309A}"; +ok(composeContiguous($sCtg), "\x{30DD}"); +ok($sCtg, "\x{30DB}\x{309A}"); + +sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" } + +ok(answer(checkNFD("")), "YES"); +ok(answer(checkNFC("")), "YES"); +ok(answer(checkNFKD("")), "YES"); +ok(answer(checkNFKC("")), "YES"); +ok(answer(check("NFD", "")), "YES"); +ok(answer(check("NFC", "")), "YES"); +ok(answer(check("NFKD","")), "YES"); +ok(answer(check("NFKC","")), "YES"); + +# U+0000 to U+007F are prenormalized in all the normalization forms. +ok(answer(checkNFD("AZaz\t12!#`")), "YES"); +ok(answer(checkNFC("AZaz\t12!#`")), "YES"); +ok(answer(checkNFKD("AZaz\t12!#`")), "YES"); +ok(answer(checkNFKC("AZaz\t12!#`")), "YES"); +ok(answer(check("D", "AZaz\t12!#`")), "YES"); +ok(answer(check("C", "AZaz\t12!#`")), "YES"); +ok(answer(check("KD","AZaz\t12!#`")), "YES"); +ok(answer(check("KC","AZaz\t12!#`")), "YES"); + +ok(answer(checkNFD(NFD(_pack_U(0xC1, 0x1100, 0x1173, 0x11AF)))), "YES"); +ok(answer(checkNFD(hexU("20 C1 1100 1173 11AF"))), "NO"); +ok(answer(checkNFC(hexU("20 C1 1173 11AF"))), "MAYBE"); +ok(answer(checkNFC(hexU("20 C1 AE00 1100"))), "YES"); +ok(answer(checkNFC(hexU("20 C1 AE00 1100 0300"))), "MAYBE"); +ok(answer(checkNFC(hexU("212B 1100 0300"))), "NO"); +ok(answer(checkNFC(hexU("1100 0300 212B"))), "NO"); +ok(answer(checkNFC(hexU("0041 0327 030A"))), "MAYBE"); # A+cedilla+ring +ok(answer(checkNFC(hexU("0041 030A 0327"))), "NO"); # A+ring+cedilla +ok(answer(checkNFC(hexU("20 C1 FF71 2025"))),"YES"); +ok(answer(check("NFC", hexU("20 C1 212B 300"))), "NO"); +ok(answer(checkNFKD(hexU("20 C1 FF71 2025"))), "NO"); +ok(answer(checkNFKC(hexU("20 C1 AE00 2025"))), "NO"); +ok(answer(checkNFKC(hexU("212B 1100 0300"))), "NO"); +ok(answer(checkNFKC(hexU("1100 0300 212B"))), "NO"); +ok(answer(checkNFKC(hexU("0041 0327 030A"))), "MAYBE"); # A+cedilla+ring +ok(answer(checkNFKC(hexU("0041 030A 0327"))), "NO"); # A+ring+cedilla +ok(answer(check("NFKC", hexU("20 C1 212B 300"))), "NO"); + +"012ABC" =~ /(\d+)(\w+)/; +ok("012" eq NFC $1 && "ABC" eq NFC $2); + +ok(normalize('C', $1), "012"); +ok(normalize('C', $2), "ABC"); + +ok(normalize('NFC', $1), "012"); +ok(normalize('NFC', $2), "ABC"); + # s/^NF// in normalize() must not prevent using $1, $&, etc. + +# a string with initial zero should be treated like a number + +# LATIN CAPITAL LETTER A WITH GRAVE +ok(getCombinClass("0192"), 0); +ok(getCanon ("0192"), _pack_U(0x41, 0x300)); +ok(getCompat("0192"), _pack_U(0x41, 0x300)); +ok(getComposite("065", "0768"), 192); +ok(isNFD_NO ("0192")); +ok(isNFKD_NO("0192")); + +# DEVANAGARI LETTER QA +ok(isExclusion("02392")); +ok(isComp_Ex ("02392")); +ok(isNFC_NO ("02392")); +ok(isNFKC_NO ("02392")); +ok(isNFD_NO ("02392")); +ok(isNFKD_NO ("02392")); + +# ANGSTROM SIGN +ok(isSingleton("08491")); +ok(isComp_Ex ("08491")); +ok(isNFC_NO ("08491")); +ok(isNFKC_NO ("08491")); +ok(isNFD_NO ("08491")); +ok(isNFKD_NO ("08491")); + +# COMBINING GREEK DIALYTIKA TONOS +ok(isNonStDecomp("0836")); +ok(isComp_Ex ("0836")); +ok(isNFC_NO ("0836")); +ok(isNFKC_NO ("0836")); +ok(isNFD_NO ("0836")); +ok(isNFKD_NO ("0836")); + +# COMBINING GRAVE ACCENT +ok(getCombinClass("0768"), 230); +ok(isComp2nd ("0768")); +ok(isNFC_MAYBE ("0768")); +ok(isNFKC_MAYBE("0768")); + +# HANGUL SYLLABLE GA +ok(getCombinClass("044032"), 0); +ok(getCanon("044032"), _pack_U(0x1100, 0x1161)); +ok(getCompat("044032"), _pack_U(0x1100, 0x1161)); +ok(getComposite("04352", "04449"), 0xAC00); + +# string with 22 combining characters: (0x300..0x315) +my $str_cc22 = _pack_U(0x3041, 0x300..0x315, 0x3042); +ok(decompose($str_cc22), $str_cc22); +ok(reorder($str_cc22), $str_cc22); +ok(compose($str_cc22), $str_cc22); +ok(composeContiguous($str_cc22), $str_cc22); +ok(NFD($str_cc22), $str_cc22); +ok(NFC($str_cc22), $str_cc22); +ok(NFKD($str_cc22), $str_cc22); +ok(NFKC($str_cc22), $str_cc22); +ok(FCD($str_cc22), $str_cc22); +ok(FCC($str_cc22), $str_cc22); + +# string with 40 combining characters of the same class: (0x300..0x313)x2 +my $str_cc40 = _pack_U(0x3041, 0x300..0x313, 0x300..0x313, 0x3042); +ok(decompose($str_cc40), $str_cc40); +ok(reorder($str_cc40), $str_cc40); +ok(compose($str_cc40), $str_cc40); +ok(composeContiguous($str_cc40), $str_cc40); +ok(NFD($str_cc40), $str_cc40); +ok(NFC($str_cc40), $str_cc40); +ok(NFKD($str_cc40), $str_cc40); +ok(NFKC($str_cc40), $str_cc40); +ok(FCD($str_cc40), $str_cc40); +ok(FCC($str_cc40), $str_cc40); + +my $precomp = hexU("304C 304E 3050 3052 3054"); +my $combseq = hexU("304B 3099 304D 3099 304F 3099 3051 3099 3053 3099"); +ok(decompose($precomp x 5), $combseq x 5); +ok(decompose($precomp x 10), $combseq x 10); +ok(decompose($precomp x 20), $combseq x 20); + +my $hangsyl = hexU("AC00 B098 B2E4 B77C B9C8"); +my $jamoseq = hexU("1100 1161 1102 1161 1103 1161 1105 1161 1106 1161"); +ok(decompose($hangsyl x 5), $jamoseq x 5); +ok(decompose($hangsyl x 10), $jamoseq x 10); +ok(decompose($hangsyl x 20), $jamoseq x 20); + +my $notcomp = hexU("304B 304D 304F 3051 3053"); +ok(decompose($precomp . $notcomp), $combseq . $notcomp); +ok(decompose($precomp . $notcomp x 5), $combseq . $notcomp x 5); +ok(decompose($precomp . $notcomp x10), $combseq . $notcomp x10); + + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t new file mode 100755 index 00000000000..976e5097a2c --- /dev/null +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/illegal.t @@ -0,0 +1,73 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +BEGIN { + unless (5.006001 <= $]) { + print "1..0 # skipped: Perl 5.6.1 or later". + " needed for this test\n"; + exit; + } +} + +######################### + +BEGIN { + use Unicode::Normalize qw(:all); + + unless (exists &Unicode::Normalize::bootstrap or 5.008 <= $]) { + print "1..0 # skipped: XSUB, or Perl 5.8.0 or later". + " needed for this test\n"; + print $@; + exit; + } +} + +use Test; +use strict; +use warnings; + +BEGIN { plan tests => 112 }; + +######################### + +no warnings qw(utf8); +# To avoid warning in Test.pm, EXPR in ok(EXPR) must be boolean. + +for my $u (0xD800, 0xDFFF, 0xFDD0, 0xFDEF, 0xFEFF, 0xFFFE, 0xFFFF, + 0x1FFFF, 0x10FFFF, 0x110000, 0x7FFFFFFF) +{ + my $c = chr $u; + ok($c eq NFD($c)); # 1 + ok($c eq NFC($c)); # 2 + ok($c eq NFKD($c)); # 3 + ok($c eq NFKC($c)); # 4 + ok($c eq FCD($c)); # 5 + ok($c eq FCC($c)); # 6 + ok($c eq decompose($c)); # 7 + ok($c eq decompose($c,1)); # 8 + ok($c eq reorder($c)); # 9 + ok($c eq compose($c)); # 10 +} + +our $proc; # before the last starter +our $unproc; # the last starter and after + +sub _pack_U { Unicode::Normalize::pack_U(@_) } + +($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0xFFFF)); +ok($proc eq _pack_U(0x41, 0x300, 0x327)); +ok($unproc eq "\x{FFFF}"); + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t new file mode 100755 index 00000000000..5d93747965a --- /dev/null +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/norm.t @@ -0,0 +1,125 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +######################### + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 64 }; +use Unicode::Normalize qw(normalize); +ok(1); # If we made it this far, we're ok. + +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _unpack_U { Unicode::Normalize::unpack_U(@_) } + +######################### + +ok(normalize('D', ""), ""); +ok(normalize('C', ""), ""); +ok(normalize('KD',""), ""); +ok(normalize('KC',""), ""); + +ok(normalize('D', "A"), "A"); +ok(normalize('C', "A"), "A"); +ok(normalize('KD',"A"), "A"); +ok(normalize('KC',"A"), "A"); + +ok(normalize('NFD', ""), ""); +ok(normalize('NFC', ""), ""); +ok(normalize('NFKD',""), ""); +ok(normalize('NFKC',""), ""); + +ok(normalize('NFD', "A"), "A"); +ok(normalize('NFC', "A"), "A"); +ok(normalize('NFKD',"A"), "A"); +ok(normalize('NFKC',"A"), "A"); + +# don't modify the source +my $sNFD = "\x{FA19}"; +ok(normalize('NFD', $sNFD), "\x{795E}"); +ok($sNFD, "\x{FA19}"); + +my $sNFC = "\x{FA1B}"; +ok(normalize('NFC', $sNFC), "\x{798F}"); +ok($sNFC, "\x{FA1B}"); + +my $sNFKD = "\x{FA1E}"; +ok(normalize('NFKD', $sNFKD), "\x{7FBD}"); +ok($sNFKD, "\x{FA1E}"); + +my $sNFKC = "\x{FA26}"; +ok(normalize('NFKC', $sNFKC), "\x{90FD}"); +ok($sNFKC, "\x{FA26}"); + +sub hexNFC { + join " ", map sprintf("%04X", $_), + _unpack_U normalize 'C', _pack_U map hex, split ' ', shift; +} +sub hexNFD { + join " ", map sprintf("%04X", $_), + _unpack_U normalize 'D', _pack_U map hex, split ' ', shift; +} + +ok(hexNFD("1E14 AC01"), "0045 0304 0300 1100 1161 11A8"); +ok(hexNFD("AC00 AE00"), "1100 1161 1100 1173 11AF"); + +ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01"); +ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00"); +ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); + +ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062"); +ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); +ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); +ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); +ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); + +ok(hexNFC("AC00 11A7"), "AC00 11A7"); +ok(hexNFC("AC00 11A8"), "AC01"); +ok(hexNFC("AC00 11A9"), "AC02"); +ok(hexNFC("AC00 11C2"), "AC1B"); +ok(hexNFC("AC00 11C3"), "AC00 11C3"); + +# Test Cases from Public Review Issue #29: Normalization Issue +# cf. http://www.unicode.org/review/pr-29.html +ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E"); +ok(hexNFC("1100 0300 1161"), "1100 0300 1161"); + +ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300"); +ok(hexNFC("1100 1161 0300"), "AC00 0300"); + +ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327"); +ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327"); + +ok(hexNFC("0300 0041"), "0300 0041"); +ok(hexNFC("0300 0301 0041"), "0300 0301 0041"); +ok(hexNFC("0301 0300 0041"), "0301 0300 0041"); +ok(hexNFC("0000 0300 0000 0301"), "0000 0300 0000 0301"); +ok(hexNFC("0000 0301 0000 0300"), "0000 0301 0000 0300"); + +ok(hexNFC("0327 0061 0300"), "0327 00E0"); +ok(hexNFC("0301 0061 0300"), "0301 00E0"); +ok(hexNFC("0315 0061 0300"), "0315 00E0"); +ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0"); +ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0"); +ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0"); diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/null.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/null.t new file mode 100755 index 00000000000..6067da4775c --- /dev/null +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/null.t @@ -0,0 +1,97 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +######################### + +use strict; +use warnings; + +use Unicode::Normalize qw(:all); +print "1..24\n"; + +print "ok 1\n"; + +# if $_ is not NULL-terminated, test may fail. + +$_ = compose('abc'); +print /c$/ ? "ok" : "not ok", " 2\n"; + +$_ = decompose('abc'); +print /c$/ ? "ok" : "not ok", " 3\n"; + +$_ = reorder('abc'); +print /c$/ ? "ok" : "not ok", " 4\n"; + +$_ = NFD('abc'); +print /c$/ ? "ok" : "not ok", " 5\n"; + +$_ = NFC('abc'); +print /c$/ ? "ok" : "not ok", " 6\n"; + +$_ = NFKD('abc'); +print /c$/ ? "ok" : "not ok", " 7\n"; + +$_ = NFKC('abc'); +print /c$/ ? "ok" : "not ok", " 8\n"; + +$_ = FCC('abc'); +print /c$/ ? "ok" : "not ok", " 9\n"; + +$_ = decompose("\x{304C}abc"); +print /c$/ ? "ok" : "not ok", " 10\n"; + +$_ = decompose("\x{304B}\x{3099}abc"); +print /c$/ ? "ok" : "not ok", " 11\n"; + +$_ = reorder("\x{304C}abc"); +print /c$/ ? "ok" : "not ok", " 12\n"; + +$_ = reorder("\x{304B}\x{3099}abc"); +print /c$/ ? "ok" : "not ok", " 13\n"; + +$_ = compose("\x{304C}abc"); +print /c$/ ? "ok" : "not ok", " 14\n"; + +$_ = compose("\x{304B}\x{3099}abc"); +print /c$/ ? "ok" : "not ok", " 15\n"; + +$_ = NFD("\x{304C}abc"); +print /c$/ ? "ok" : "not ok", " 16\n"; + +$_ = NFC("\x{304C}abc"); +print /c$/ ? "ok" : "not ok", " 17\n"; + +$_ = NFKD("\x{304C}abc"); +print /c$/ ? "ok" : "not ok", " 18\n"; + +$_ = NFKC("\x{304C}abc"); +print /c$/ ? "ok" : "not ok", " 19\n"; + +$_ = FCC("\x{304C}abc"); +print /c$/ ? "ok" : "not ok", " 20\n"; + +$_ = getCanon(0x100); +print s/.$// ? "ok" : "not ok", " 21\n"; + +$_ = getCompat(0x100); +print s/.$// ? "ok" : "not ok", " 22\n"; + +$_ = getCanon(0xAC00); +print s/.$// ? "ok" : "not ok", " 23\n"; + +$_ = getCompat(0xAC00); +print s/.$// ? "ok" : "not ok", " 24\n"; + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t new file mode 100755 index 00000000000..3c4298d849a --- /dev/null +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/proto.t @@ -0,0 +1,75 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +######################### + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 42 }; +use Unicode::Normalize qw(:all); +ok(1); # If we made it this far, we're ok. + +######################### + +# unary op. RING-CEDILLA +ok( "\x{30A}\x{327}" ne "\x{327}\x{30A}"); +ok(NFD "\x{30A}\x{327}" eq "\x{327}\x{30A}"); +ok(NFC "\x{30A}\x{327}" eq "\x{327}\x{30A}"); +ok(NFKD "\x{30A}\x{327}" eq "\x{327}\x{30A}"); +ok(NFKC "\x{30A}\x{327}" eq "\x{327}\x{30A}"); +ok(FCD "\x{30A}\x{327}" eq "\x{327}\x{30A}"); +ok(FCC "\x{30A}\x{327}" eq "\x{327}\x{30A}"); +ok(reorder "\x{30A}\x{327}" eq "\x{327}\x{30A}"); + +ok(prototype \&normalize,'$$'); +ok(prototype \&NFD, '$'); +ok(prototype \&NFC, '$'); +ok(prototype \&NFKD, '$'); +ok(prototype \&NFKC, '$'); +ok(prototype \&FCD, '$'); +ok(prototype \&FCC, '$'); + +ok(prototype \&check, '$$'); +ok(prototype \&checkNFD, '$'); +ok(prototype \&checkNFC, '$'); +ok(prototype \&checkNFKD,'$'); +ok(prototype \&checkNFKC,'$'); +ok(prototype \&checkFCD, '$'); +ok(prototype \&checkFCC, '$'); + +ok(prototype \&decompose, '$;$'); +ok(prototype \&reorder, '$'); +ok(prototype \&compose, '$'); +ok(prototype \&composeContiguous, '$'); + +ok(prototype \&getCanon, '$'); +ok(prototype \&getCompat, '$'); +ok(prototype \&getComposite, '$$'); +ok(prototype \&getCombinClass,'$'); +ok(prototype \&isExclusion, '$'); +ok(prototype \&isSingleton, '$'); +ok(prototype \&isNonStDecomp, '$'); +ok(prototype \&isComp2nd, '$'); +ok(prototype \&isComp_Ex, '$'); + +ok(prototype \&isNFD_NO, '$'); +ok(prototype \&isNFC_NO, '$'); +ok(prototype \&isNFC_MAYBE, '$'); +ok(prototype \&isNFKD_NO, '$'); +ok(prototype \&isNFKC_NO, '$'); +ok(prototype \&isNFKC_MAYBE, '$'); + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t new file mode 100755 index 00000000000..6bf7ff66b1f --- /dev/null +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/split.t @@ -0,0 +1,81 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +BEGIN { + unless (5.006001 <= $]) { + print "1..0 # skipped: Perl 5.6.1 or later". + " needed for this test\n"; + exit; + } +} + +######################### + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 14 }; +use Unicode::Normalize qw(:all); +ok(1); # If we made it this far, we're ok. + +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _unpack_U { Unicode::Normalize::unpack_U(@_) } + +######################### + +our $proc; # before the last starter +our $unproc; # the last starter and after +# If string has no starter, entire string is set to $unproc. + +# When you have $normalized string and $unnormalized string following, +# a simple concatenation +# C<$concat = $normalized . normalize($form, $unnormalized)> +# is wrong. Instead of it, like this: +# +# ($processed, $unprocessed) = splitOnLastStarter($normalized); +# $concat = $processed . normalize($form, $unprocessed.$unnormalized); + +($proc, $unproc) = splitOnLastStarter(""); +ok($proc, ""); +ok($unproc, ""); + +($proc, $unproc) = splitOnLastStarter("A"); +ok($proc, ""); +ok($unproc, "A"); + +($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0x42)); +ok($proc, _pack_U(0x41, 0x300, 0x327)); +ok($unproc, "B"); + +($proc, $unproc) = splitOnLastStarter(_pack_U(0x4E00, 0x41, 0x301)); +ok($proc, _pack_U(0x4E00)); +ok($unproc, _pack_U(0x41, 0x301)); + +($proc, $unproc) = splitOnLastStarter(_pack_U(0x302, 0x301, 0x300)); +ok($proc, ""); +ok($unproc, _pack_U(0x302, 0x301, 0x300)); + +our $ka_grave = _pack_U(0x41, 0, 0x42, 0x304B, 0x300); +our $dakuten = _pack_U(0x3099); +our $ga_grave = _pack_U(0x41, 0, 0x42, 0x304C, 0x300); + +our ($p, $u) = splitOnLastStarter($ka_grave); +our $concat = $p . NFC($u.$dakuten); + +ok(NFC($ka_grave.$dakuten) eq $ga_grave); +ok(NFC($ka_grave).NFC($dakuten) ne $ga_grave); +ok($concat eq $ga_grave); + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t new file mode 100755 index 00000000000..e48e16f1b9b --- /dev/null +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/test.t @@ -0,0 +1,125 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +######################### + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 58 }; +use Unicode::Normalize; +ok(1); # If we made it this far, we're ok. + +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _unpack_U { Unicode::Normalize::unpack_U(@_) } + +######################### + +ok(NFD(""), ""); +ok(NFC(""), ""); +ok(NFKD(""), ""); +ok(NFKC(""), ""); + +ok(NFD("A"), "A"); +ok(NFC("A"), "A"); +ok(NFKD("A"), "A"); +ok(NFKC("A"), "A"); + +# don't modify the source +my $sNFD = "\x{FA19}"; +ok(NFD($sNFD), "\x{795E}"); +ok($sNFD, "\x{FA19}"); + +my $sNFC = "\x{FA1B}"; +ok(NFC($sNFC), "\x{798F}"); +ok($sNFC, "\x{FA1B}"); + +my $sNFKD = "\x{FA1E}"; +ok(NFKD($sNFKD), "\x{7FBD}"); +ok($sNFKD, "\x{FA1E}"); + +my $sNFKC = "\x{FA26}"; +ok(NFKC($sNFKC), "\x{90FD}"); +ok($sNFKC, "\x{FA26}"); + + +sub hexNFC { + join " ", map sprintf("%04X", $_), + _unpack_U NFC _pack_U map hex, split ' ', shift; +} +sub hexNFD { + join " ", map sprintf("%04X", $_), + _unpack_U NFD _pack_U map hex, split ' ', shift; +} + +ok(hexNFD("1E14 AC01"), "0045 0304 0300 1100 1161 11A8"); +ok(hexNFD("AC00 AE00"), "1100 1161 1100 1173 11AF"); + +ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01"); +ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00"); +ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); + +ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062"); +ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); +ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); +ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); +ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); + +ok(hexNFC("AC00 11A7"), "AC00 11A7"); +ok(hexNFC("AC00 11A8"), "AC01"); +ok(hexNFC("AC00 11A9"), "AC02"); +ok(hexNFC("AC00 11C2"), "AC1B"); +ok(hexNFC("AC00 11C3"), "AC00 11C3"); + +# Test Cases from Public Review Issue #29: Normalization Issue +# cf. http://www.unicode.org/review/pr-29.html +ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E"); +ok(hexNFC("1100 0300 1161"), "1100 0300 1161"); +ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300"); +ok(hexNFC("1100 1161 0300"), "AC00 0300"); +ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327"); +ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327"); + +ok(hexNFC("0300 0041"), "0300 0041"); +ok(hexNFC("0300 0301 0041"), "0300 0301 0041"); +ok(hexNFC("0301 0300 0041"), "0301 0300 0041"); +ok(hexNFC("0000 0300 0000 0301"), "0000 0300 0000 0301"); +ok(hexNFC("0000 0301 0000 0300"), "0000 0301 0000 0300"); + +ok(hexNFC("0327 0061 0300"), "0327 00E0"); +ok(hexNFC("0301 0061 0300"), "0301 00E0"); +ok(hexNFC("0315 0061 0300"), "0315 00E0"); +ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0"); +ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0"); +ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0"); + +# NFC() should be unary. +my $str11 = _pack_U(0x41, 0x0302, 0x0301, 0x62); +my $str12 = _pack_U(0x1EA4, 0x62); +ok(NFC $str11 eq $str12); + +# NFD() should be unary. +my $str21 = _pack_U(0xE0, 0xAC00); +my $str22 = _pack_U(0x61, 0x0300, 0x1100, 0x1161); +ok(NFD $str21 eq $str22); + diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t new file mode 100755 index 00000000000..c7214917266 --- /dev/null +++ b/gnu/usr.bin/perl/cpan/Unicode-Normalize/t/tie.t @@ -0,0 +1,69 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +######################### + +BEGIN { + use Unicode::Normalize qw(:all); + + unless (exists &Unicode::Normalize::bootstrap or 5.008 <= $]) { + print "1..0 # skipped: XSUB, or Perl 5.8.0 or later". + " needed for this test\n"; + print $@; + exit; + } +} + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 16 }; + +package tiescalar; +sub TIESCALAR { + my ($class, $instance) = @_; + return bless \$instance => $class; +} +sub FETCH { return ${$_[0]}++ } +sub STORE { return ${$_[0]} = $_[1] } +sub DESTROY { undef ${$_[0]} } + +######################### + +package main; + +tie my $tie1, 'tiescalar', "123"; +ok(NFD($tie1), 123); +ok(NFC($tie1), 124); +ok(NFKD($tie1), 125); +ok(NFKC($tie1), 126); +ok(FCD($tie1), 127); +ok(FCC($tie1), 128); + +tie my $tie2, 'tiescalar', "256"; +ok(normalize('NFD', $tie2), 256); +ok(normalize('NFC', $tie2), 257); +ok(normalize('NFKD', $tie2), 258); +ok(normalize('NFKC', $tie2), 259); +ok(normalize('FCD', $tie2), 260); +ok(normalize('FCC', $tie2), 261); + +tie my $tie3, 'tiescalar', "315"; +ok(decompose($tie3), 315); +ok(reorder($tie3), 316); +ok(compose($tie3), 317); +ok(composeContiguous($tie3), 318); + |