diff options
author | 2002-10-27 22:14:39 +0000 | |
---|---|---|
committer | 2002-10-27 22:14:39 +0000 | |
commit | 55745691c11d58794cc2bb4d620ee3985f4381e6 (patch) | |
tree | d570f77ae0fda2ab3c9daa80b06a330c16cfe79f /gnu/usr.bin/perl/lib/Unicode | |
parent | remove MD bits from test. (diff) | |
download | wireguard-openbsd-55745691c11d58794cc2bb4d620ee3985f4381e6.tar.xz wireguard-openbsd-55745691c11d58794cc2bb4d620ee3985f4381e6.zip |
stock perl 5.8.0 from CPAN
Diffstat (limited to 'gnu/usr.bin/perl/lib/Unicode')
-rw-r--r-- | gnu/usr.bin/perl/lib/Unicode/Collate.pm | 1029 | ||||
-rw-r--r-- | gnu/usr.bin/perl/lib/Unicode/Collate/Changes | 58 | ||||
-rw-r--r-- | gnu/usr.bin/perl/lib/Unicode/Collate/README | 57 | ||||
-rw-r--r-- | gnu/usr.bin/perl/lib/Unicode/Collate/keys.txt | 1930 | ||||
-rw-r--r-- | gnu/usr.bin/perl/lib/Unicode/Collate/t/test.t | 605 | ||||
-rw-r--r-- | gnu/usr.bin/perl/lib/Unicode/README | 8 | ||||
-rw-r--r-- | gnu/usr.bin/perl/lib/Unicode/UCD.pm | 746 | ||||
-rw-r--r-- | gnu/usr.bin/perl/lib/Unicode/UCD.t | 281 |
8 files changed, 4714 insertions, 0 deletions
diff --git a/gnu/usr.bin/perl/lib/Unicode/Collate.pm b/gnu/usr.bin/perl/lib/Unicode/Collate.pm new file mode 100644 index 00000000000..51c290ec879 --- /dev/null +++ b/gnu/usr.bin/perl/lib/Unicode/Collate.pm @@ -0,0 +1,1029 @@ +package Unicode::Collate; + +BEGIN { + if (ord("A") == 193) { + die "Unicode::Collate not ported to EBCDIC\n"; + } +} + +use 5.006; +use strict; +use warnings; +use Carp; +use File::Spec; + +require Exporter; + +our $VERSION = '0.12'; +our $PACKAGE = __PACKAGE__; + +our @ISA = qw(Exporter); + +our %EXPORT_TAGS = (); +our @EXPORT_OK = (); +our @EXPORT = (); + +(our $Path = $INC{'Unicode/Collate.pm'}) =~ s/\.pm$//; +our $KeyFile = "allkeys.txt"; + +our $UNICODE_VERSION; + +eval { require Unicode::UCD }; + +unless ($@) { + $UNICODE_VERSION = Unicode::UCD::UnicodeVersion(); +} +else { # XXX, Perl 5.6.1 + my($f, $fh); + foreach my $d (@INC) { + use File::Spec; + $f = File::Spec->catfile($d, "unicode", "Unicode.301"); + if (open($fh, $f)) { + $UNICODE_VERSION = '3.0.1'; + close $fh; + last; + } + } +} + +our $getCombinClass; # coderef for combining class from Unicode::Normalize + +use constant Min2 => 0x20; # minimum weight at level 2 +use constant Min3 => 0x02; # minimum weight at level 3 +use constant UNDEFINED => 0xFF80; # special value for undefined CE's + +our $DefaultRearrange = [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ]; + +sub UCA_Version { "8.0" } + +sub Base_Unicode_Version { $UNICODE_VERSION || 'unknown' } + +## +## constructor +## +sub new +{ + my $class = shift; + my $self = bless { @_ }, $class; + + # alternate lowercased + $self->{alternate} = + ! exists $self->{alternate} ? 'shifted' : lc($self->{alternate}); + + croak "$PACKAGE unknown alternate tag name: $self->{alternate}" + unless $self->{alternate} eq 'blanked' + || $self->{alternate} eq 'non-ignorable' + || $self->{alternate} eq 'shifted' + || $self->{alternate} eq 'shift-trimmed'; + + # collation level + $self->{level} ||= 4; + + croak "Illegal level lower than 1 (passed $self->{level})." + if $self->{level} < 1; + croak "A level higher than 4 (passed $self->{level}) is not supported." + if 4 < $self->{level}; + + # overrideHangul and -CJK + # If true: CODEREF used; '': default; undef: derived elements + $self->{overrideHangul} = '' + if ! exists $self->{overrideHangul}; + $self->{overrideCJK} = '' + if ! exists $self->{overrideCJK}; + + # normalization form + $self->{normalization} = 'D' + if ! exists $self->{normalization}; + $self->{UNF} = undef; + + if (defined $self->{normalization}) { + eval { require Unicode::Normalize }; + croak "Unicode/Normalize.pm is required to normalize strings: $@" + if $@; + + Unicode::Normalize->import(); + $getCombinClass = \&Unicode::Normalize::getCombinClass + if ! $getCombinClass; + + $self->{UNF} = + $self->{normalization} =~ /^(?:NF)?C$/ ? \&NFC : + $self->{normalization} =~ /^(?:NF)?D$/ ? \&NFD : + $self->{normalization} =~ /^(?:NF)?KC$/ ? \&NFKC : + $self->{normalization} =~ /^(?:NF)?KD$/ ? \&NFKD : + croak "$PACKAGE unknown normalization form name: " + . $self->{normalization}; + } + + # Open a table file. + # If undef is passed explicitly, no file is read. + $self->{table} = $KeyFile + if ! exists $self->{table}; + $self->read_table + if defined $self->{table}; + + if ($self->{entry}) { + $self->parseEntry($_) foreach split /\n/, $self->{entry}; + } + + # backwards + $self->{backwards} ||= [ ]; + $self->{backwards} = [ $self->{backwards} ] + if ! ref $self->{backwards}; + + # rearrange + $self->{rearrange} = $DefaultRearrange + if ! exists $self->{rearrange}; + $self->{rearrange} = [] + if ! defined $self->{rearrange}; + croak "$PACKAGE: A list for rearrangement must be store in an ARRAYREF" + if ! ref $self->{rearrange}; + + # keys of $self->{rearrangeHash} are $self->{rearrange}. + $self->{rearrangeHash} = undef; + + if (@{ $self->{rearrange} }) { + @{ $self->{rearrangeHash} }{ @{ $self->{rearrange} } } = (); + } + + return $self; +} + +sub read_table { + my $self = shift; + my $file = $self->{table} ne '' ? $self->{table} : $KeyFile; + + my $filepath = File::Spec->catfile($Path, $file); + open my $fk, "<$filepath" + or croak "File does not exist at $filepath"; + + while (<$fk>) { + next if /^\s*#/; + if (/^\s*\@/) { + if (/^\@version\s*(\S*)/) { + $self->{version} ||= $1; + } + elsif (/^\@alternate\s+(.*)/) { + $self->{alternate} ||= $1; + } + elsif (/^\@backwards\s+(.*)/) { + push @{ $self->{backwards} }, $1; + } + elsif (/^\@rearrange\s+(.*)/) { + push @{ $self->{rearrange} }, _getHexArray($1); + } + next; + } + $self->parseEntry($_); + } + close $fk; +} + + +## +## get $line, parse it, and write an entry in $self +## +sub parseEntry +{ + my $self = shift; + my $line = shift; + my($name, $ele, @key); + + return if $line !~ /^\s*[0-9A-Fa-f]/; + + # removes comment and gets name + $name = $1 + if $line =~ s/[#%]\s*(.*)//; + return if defined $self->{undefName} && $name =~ /$self->{undefName}/; + + # gets element + my($e, $k) = split /;/, $line; + croak "Wrong Entry: <charList> must be separated by ';' from <collElement>" + if ! $k; + + my @e = _getHexArray($e); + $ele = pack('U*', @e); + return if defined $self->{undefChar} && $ele =~ /$self->{undefChar}/; + + # get sort key + if (defined $self->{ignoreName} && $name =~ /$self->{ignoreName}/ || + defined $self->{ignoreChar} && $ele =~ /$self->{ignoreChar}/) + { + $self->{entries}{$ele} = $self->{ignored}{$ele} = 1; + } + else { + my $combining = 1; # primary = 0, secondary != 0; + + foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed + my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient. + push @key, $self->altCE($var, _getHexArray($arr)); + $combining = 0 unless $key[-1][0] == 0 && $key[-1][1] != 0; + } + $self->{entries}{$ele} = \@key; + $self->{combining}{$ele} = 1 if $combining; + } + $self->{maxlength}{ord $ele} = scalar @e if @e > 1; +} + + +## +## arrayref CE = altCE(bool variable?, list[num] weights) +## +sub altCE +{ + my $self = shift; + my $var = shift; + my @c = @_; + + $self->{alternate} eq 'blanked' ? + $var ? [0,0,0,$c[3]] : \@c : + $self->{alternate} eq 'non-ignorable' ? + \@c : + $self->{alternate} eq 'shifted' ? + $var ? [0,0,0,$c[0] ] : [ @c[0..2], $c[0]+$c[1]+$c[2] ? 0xFFFF : 0 ] : + $self->{alternate} eq 'shift-trimmed' ? + $var ? [0,0,0,$c[0] ] : [ @c[0..2], 0 ] : + croak "$PACKAGE unknown alternate name: $self->{alternate}"; +} + +## +## string hex_sortkey = splitCE(string arg) +## +sub viewSortKey +{ + my $self = shift; + my $key = $self->getSortKey(@_); + my $view = join " ", map sprintf("%04X", $_), unpack 'n*', $key; + $view =~ s/ ?0000 ?/|/g; + return "[$view]"; +} + + +## +## list[strings] elements = splitCE(string arg) +## +sub splitCE +{ + my $self = shift; + my $code = $self->{preprocess}; + my $norm = $self->{UNF}; + my $ent = $self->{entries}; + my $max = $self->{maxlength}; + my $reH = $self->{rearrangeHash}; + + my $str = ref $code ? &$code(shift) : shift; + $str = &$norm($str) if ref $norm; + + my @src = unpack('U*', $str); + my @buf; + + # rearrangement + if ($reH) { + for (my $i = 0; $i < @src; $i++) { + if (exists $reH->{ $src[$i] } && $i + 1 < @src) { + ($src[$i], $src[$i+1]) = ($src[$i+1], $src[$i]); + $i++; + } + } + } + + for (my $i = 0; $i < @src; $i++) { + my $ch; + my $u = $src[$i]; + + # non-characters + next unless defined $u; + next if $u < 0 || 0x10FFFF < $u # out of range + || (0xD800 <= $u && $u <= 0xDFFF); # unpaired surrogates + my $four = $u & 0xFFFF; + next if $four == 0xFFFE || $four == 0xFFFF; + + if ($max->{$u}) { # contract + for (my $j = $max->{$u}; $j >= 1; $j--) { + next unless $i+$j-1 < @src; + $ch = pack 'U*', @src[$i .. $i+$j-1]; + $i += $j-1, last if $ent->{$ch}; + } + } else { + $ch = pack('U', $u); + } + + # with Combining Char (UTS#10, 4.2.1), here requires Unicode::Normalize. + if ($getCombinClass && defined $ch) { + for (my $j = $i+1; $j < @src; $j++) { + next unless defined $src[$j]; + last unless $getCombinClass->( $src[$j] ); + my $comb = pack 'U', $src[$j]; + next if ! $ent->{ $ch.$comb }; + $ch .= $comb; + $src[$j] = undef; + } + } + push @buf, $ch; + } + wantarray ? @buf : \@buf; +} + + +## +## list[arrayrefs] weight = getWt(string element) +## +sub getWt +{ + my $self = shift; + my $ch = shift; + my $ent = $self->{entries}; + my $ign = $self->{ignored}; + my $cjk = $self->{overrideCJK}; + my $hang = $self->{overrideHangul}; + + return if !defined $ch || $ign->{$ch}; # ignored + return @{ $ent->{$ch} } if $ent->{$ch}; + my $u = unpack('U', $ch); + + if (0xAC00 <= $u && $u <= 0xD7A3) { # is_Hangul + return $hang + ? &$hang($u) + : defined $hang + ? map({ + my $v = $_; + my $ar = $ent->{pack('U', $v)}; + $ar ? @$ar : map($self->altCE(0,@$_), _derivCE($v)); + } _decompHangul($u)) + : map($self->altCE(0,@$_), _derivCE($u)); + } + elsif (0x3400 <= $u && $u <= 0x4DB5 || + 0x4E00 <= $u && $u <= 0x9FA5 || + 0x20000 <= $u && $u <= 0x2A6D6) { # is_CJK + return $cjk + ? &$cjk($u) + : defined $cjk && $u <= 0xFFFF + ? $self->altCE(0, ($u, 0x20, 0x02, $u)) + : map($self->altCE(0,@$_), _derivCE($u)); + } + else { + return map($self->altCE(0,@$_), _derivCE($u)); + } +} + +## +## int = index(string, substring) +## +sub index +{ + my $self = shift; + my $lev = $self->{level}; + my $comb = $self->{combining}; + my $str = $self->splitCE(shift); + my $sub = $self->splitCE(shift); + + return wantarray ? (0,0) : 0 if ! @$sub; + return wantarray ? () : -1 if ! @$str; + + my @subWt = grep _ignorableAtLevel($_,$lev), + map $self->getWt($_), @$sub; + + my(@strWt,@strPt); + my $count = 0; + for (my $i = 0; $i < @$str; $i++) { + my $go_ahead = 0; + + my @tmp = grep _ignorableAtLevel($_,$lev), $self->getWt($str->[$i]); + $go_ahead += length $str->[$i]; + + # /*XXX*/ still broken. + # index("e\x{300}", "e") should be 'no match' at level 2 or higher + # as "e\x{300}" is a *single* grapheme cluster and not equal to "e". + + # go ahead as far as we find a combining character; + while ($i + 1 < @$str && + (! defined $str->[$i+1] || $comb->{ $str->[$i+1] }) ) { + $i++; + $go_ahead += length $str->[$i]; + next if ! defined $str->[$i]; + push @tmp, + grep _ignorableAtLevel($_,$lev), $self->getWt($str->[$i]); + } + + push @strWt, @tmp; + push @strPt, ($count) x @tmp; + $count += $go_ahead; + + while (@strWt >= @subWt) { + if (_eqArray(\@strWt, \@subWt, $lev)) { + my $pos = $strPt[0]; + return wantarray ? ($pos, $count-$pos) : $pos; + } + shift @strWt; + shift @strPt; + } + } + return wantarray ? () : -1; +} + +## +## bool _eqArray(arrayref, arrayref, level) +## +sub _eqArray($$$) +{ + my $a = shift; # length $a >= length $b; + my $b = shift; + my $lev = shift; + for my $v (0..$lev-1) { + for my $c (0..@$b-1){ + return if $a->[$c][$v] != $b->[$c][$v]; + } + } + return 1; +} + + +## +## bool _ignorableAtLevel(CE, level) +## +sub _ignorableAtLevel($$) +{ + my $ce = shift; + return unless defined $ce; + my $lv = shift; + return ! grep { ! $ce->[$_] } 0..$lv-1; +} + + +## +## string sortkey = getSortKey(string arg) +## +sub getSortKey +{ + my $self = shift; + my $lev = $self->{level}; + my $rCE = $self->splitCE(shift); # get an arrayref + + # weight arrays + my @buf = grep defined(), map $self->getWt($_), @$rCE; + + # make sort key + my @ret = ([],[],[],[]); + foreach my $v (0..$lev-1) { + foreach my $b (@buf) { + push @{ $ret[$v] }, $b->[$v] if $b->[$v]; + } + } + foreach (@{ $self->{backwards} }) { + my $v = $_ - 1; + @{ $ret[$v] } = reverse @{ $ret[$v] }; + } + + # modification of tertiary weights + if ($self->{upper_before_lower}) { + foreach (@{ $ret[2] }) { + if (0x8 <= $_ && $_ <= 0xC) { $_ -= 6 } # lower + elsif (0x2 <= $_ && $_ <= 0x6) { $_ += 6 } # upper + elsif ($_ == 0x1C) { $_ += 1 } # square upper + elsif ($_ == 0x1D) { $_ -= 1 } # square lower + } + } + if ($self->{katakana_before_hiragana}) { + foreach (@{ $ret[2] }) { + if (0x0F <= $_ && $_ <= 0x13) { $_ -= 2 } # katakana + elsif (0x0D <= $_ && $_ <= 0x0E) { $_ += 5 } # hiragana + } + } + join "\0\0", map pack('n*', @$_), @ret; +} + + +## +## int compare = cmp(string a, string b) +## +sub cmp { $_[0]->getSortKey($_[1]) cmp $_[0]->getSortKey($_[2]) } +sub eq { $_[0]->getSortKey($_[1]) eq $_[0]->getSortKey($_[2]) } +sub ne { $_[0]->getSortKey($_[1]) ne $_[0]->getSortKey($_[2]) } +sub lt { $_[0]->getSortKey($_[1]) lt $_[0]->getSortKey($_[2]) } +sub le { $_[0]->getSortKey($_[1]) le $_[0]->getSortKey($_[2]) } +sub gt { $_[0]->getSortKey($_[1]) gt $_[0]->getSortKey($_[2]) } +sub ge { $_[0]->getSortKey($_[1]) ge $_[0]->getSortKey($_[2]) } + +## +## list[strings] sorted = sort(list[strings] arg) +## +sub sort { + my $obj = shift; + return + map { $_->[1] } + sort{ $a->[0] cmp $b->[0] } + map [ $obj->getSortKey($_), $_ ], @_; +} + +## +## list[arrayrefs] CE = _derivCE(int codepoint) +## +sub _derivCE { + my $code = shift; + my $a = UNDEFINED + ($code >> 15); # ok + my $b = ($code & 0x7FFF) | 0x8000; # ok +# my $a = 0xFFC2 + ($code >> 15); # ng +# my $b = $code & 0x7FFF | 0x1000; # ng + $b ? ([$a,2,1,$code],[$b,0,0,$code]) : [$a,2,1,$code]; +} + +## +## "hhhh hhhh hhhh" to (dddd, dddd, dddd) +## +sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g } + +# +# $code must be in Hangul syllable. +# Check it before you enter here. +# +sub _decompHangul { + my $code = shift; + my $SIndex = $code - 0xAC00; + my $LIndex = int( $SIndex / 588); + my $VIndex = int(($SIndex % 588) / 28); + my $TIndex = $SIndex % 28; + return ( + 0x1100 + $LIndex, + 0x1161 + $VIndex, + $TIndex ? (0x11A7 + $TIndex) : (), + ); +} + +1; +__END__ + +=head1 NAME + +Unicode::Collate - Unicode Collation Algorithm + +=head1 SYNOPSIS + + use Unicode::Collate; + + #construct + $Collator = Unicode::Collate->new(%tailoring); + + #sort + @sorted = $Collator->sort(@not_sorted); + + #compare + $result = $Collator->cmp($a, $b); # returns 1, 0, or -1. + +=head1 DESCRIPTION + +=head2 Constructor and Tailoring + +The C<new> method returns a collator object. + + $Collator = Unicode::Collate->new( + alternate => $alternate, + backwards => $levelNumber, # or \@levelNumbers + entry => $element, + normalization => $normalization_form, + ignoreName => qr/$ignoreName/, + ignoreChar => qr/$ignoreChar/, + katakana_before_hiragana => $bool, + level => $collationLevel, + overrideCJK => \&overrideCJK, + overrideHangul => \&overrideHangul, + preprocess => \&preprocess, + rearrange => \@charList, + table => $filename, + undefName => qr/$undefName/, + undefChar => qr/$undefChar/, + upper_before_lower => $bool, + ); + # if %tailoring is false (i.e. empty), + # $Collator should do the default collation. + +=over 4 + +=item alternate + +-- see 3.2.2 Alternate Weighting, UTR #10. + +This key allows to alternate weighting for variable collation elements, +which are marked with an ASTERISK in the table +(NOTE: Many punction marks and symbols are variable in F<allkeys.txt>). + + alternate => 'blanked', 'non-ignorable', 'shifted', or 'shift-trimmed'. + +These names are case-insensitive. +By default (if specification is omitted), 'shifted' is adopted. + + 'Blanked' Variable elements are ignorable at levels 1 through 3; + considered at the 4th level. + + 'Non-ignorable' Variable elements are not reset to ignorable. + + 'Shifted' Variable elements are ignorable at levels 1 through 3 + their level 4 weight is replaced by the old level 1 weight. + Level 4 weight for Non-Variable elements is 0xFFFF. + + 'Shift-Trimmed' Same as 'shifted', but all FFFF's at the 4th level + are trimmed. + +=item backwards + +-- see 3.1.2 French Accents, UTR #10. + + backwards => $levelNumber or \@levelNumbers + +Weights in reverse order; ex. level 2 (diacritic ordering) in French. +If omitted, forwards at all the levels. + +=item entry + +-- see 3.1 Linguistic Features; 3.2.1 File Format, UTR #10. + +Overrides a default order or defines additional collation elements + + entry => <<'ENTRIES', # use the UCA file format +00E6 ; [.0861.0020.0002.00E6] [.08B1.0020.0002.00E6] # ligature <ae> as <a><e> +0063 0068 ; [.0893.0020.0002.0063] # "ch" in traditional Spanish +0043 0068 ; [.0893.0020.0008.0043] # "Ch" in traditional Spanish +ENTRIES + +=item ignoreName + +=item ignoreChar + +-- see Completely Ignorable, 3.2.2 Alternate Weighting, UTR #10. + +Makes the entry in the table ignorable. +If a collation element is ignorable, +it is ignored as if the element had been deleted from there. + +E.g. when 'a' and 'e' are ignorable, +'element' is equal to 'lament' (or 'lmnt'). + +=item level + +-- see 4.3 Form a sort key for each string, UTR #10. + +Set the maximum level. +Any higher levels than the specified one are ignored. + + Level 1: alphabetic ordering + Level 2: diacritic ordering + Level 3: case ordering + Level 4: tie-breaking (e.g. in the case when alternate is 'shifted') + + ex.level => 2, + +If omitted, the maximum is the 4th. + +=item normalization + +-- see 4.1 Normalize each input string, UTR #10. + +If specified, strings are normalized before preparation of sort keys +(the normalization is executed after preprocess). + +As a form name, one of the following names must be used. + + 'C' or 'NFC' for Normalization Form C + 'D' or 'NFD' for Normalization Form D + 'KC' or 'NFKC' for Normalization Form KC + 'KD' or 'NFKD' for Normalization Form KD + +If omitted, the string is put into Normalization Form D. + +If C<undef> is passed explicitly as the value for this key, +any normalization is not carried out (this may make tailoring easier +if any normalization is not desired). + +see B<CAVEAT>. + +=item overrideCJK + +-- see 7.1 Derived Collation Elements, UTR #10. + +By default, mapping of CJK Unified Ideographs +uses the Unicode codepoint order. +But the mapping of CJK Unified Ideographs may be overrided. + +ex. CJK Unified Ideographs in the JIS code point order. + + overrideCJK => sub { + my $u = shift; # get a Unicode codepoint + my $b = pack('n', $u); # to UTF-16BE + my $s = your_unicode_to_sjis_converter($b); # convert + my $n = unpack('n', $s); # convert sjis to short + [ $n, 0x20, 0x2, $u ]; # return the collation element + }, + +ex. ignores all CJK Unified Ideographs. + + overrideCJK => sub {()}, # CODEREF returning empty list + + # where ->eq("Pe\x{4E00}rl", "Perl") is true + # as U+4E00 is a CJK Unified Ideograph and to be ignorable. + +If C<undef> is passed explicitly as the value for this key, +weights for CJK Unified Ideographs are treated as undefined. +But assignment of weight for CJK Unified Ideographs +in table or L<entry> is still valid. + +=item overrideHangul + +-- see 7.1 Derived Collation Elements, UTR #10. + +By default, Hangul Syllables are decomposed into Hangul Jamo. +But the mapping of Hangul Syllables may be overrided. + +This tag works like L<overrideCJK>, so see there for examples. + +If you want to override the mapping of Hangul Syllables, +the Normalization Forms D and KD are not appropriate +(they will be decomposed before overriding). + +If C<undef> is passed explicitly as the value for this key, +weight for Hangul Syllables is treated as undefined +without decomposition into Hangul Jamo. +But definition of weight for Hangul Syllables +in table or L<entry> is still valid. + +=item preprocess + +-- see 5.1 Preprocessing, UTR #10. + +If specified, the coderef is used to preprocess +before the formation of sort keys. + +ex. dropping English articles, such as "a" or "the". +Then, "the pen" is before "a pencil". + + preprocess => sub { + my $str = shift; + $str =~ s/\b(?:an?|the)\s+//gi; + $str; + }, + +=item rearrange + +-- see 3.1.3 Rearrangement, UTR #10. + +Characters that are not coded in logical order and to be rearranged. +By default, + + rearrange => [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ], + +If you want to disallow any rearrangement, +pass C<undef> or C<[]> (a reference to an empty list) +as the value for this key. + +=item table + +-- see 3.2 Default Unicode Collation Element Table, UTR #10. + +You can use another element table if desired. +The table file must be in your C<lib/Unicode/Collate> directory. + +By default, the file C<lib/Unicode/Collate/allkeys.txt> is used. + +If C<undef> is passed explicitly as the value for this key, +no file is read (but you can define collation elements via L<entry>). + +A typical way to define a collation element table +without any file of table: + + $onlyABC = Unicode::Collate->new( + table => undef, + entry => << 'ENTRIES', +0061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A +0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A +0062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B +0042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B +0063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C +0043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C +ENTRIES + ); + +=item undefName + +=item undefChar + +-- see 6.3.4 Reducing the Repertoire, UTR #10. + +Undefines the collation element as if it were unassigned in the table. +This reduces the size of the table. +If an unassigned character appears in the string to be collated, +the sort key is made from its codepoint +as a single-character collation element, +as it is greater than any other assigned collation elements +(in the codepoint order among the unassigned characters). +But, it'd be better to ignore characters +unfamiliar to you and maybe never used. + +=item katakana_before_hiragana + +=item upper_before_lower + +-- see 6.6 Case Comparisons; 7.3.1 Tertiary Weight Table, UTR #10. + +By default, lowercase is before uppercase +and hiragana is before katakana. + +If the tag is made true, this is reversed. + +B<NOTE>: These tags simplemindedly assume +any lowercase/uppercase or hiragana/katakana distinctions +should occur in level 3, and their weights at level 3 +should be same as those mentioned in 7.3.1, UTR #10. +If you define your collation elements which violates this, +these tags doesn't work validly. + +=back + +=head2 Methods for Collation + +=over 4 + +=item C<@sorted = $Collator-E<gt>sort(@not_sorted)> + +Sorts a list of strings. + +=item C<$result = $Collator-E<gt>cmp($a, $b)> + +Returns 1 (when C<$a> is greater than C<$b>) +or 0 (when C<$a> is equal to C<$b>) +or -1 (when C<$a> is lesser than C<$b>). + +=item C<$result = $Collator-E<gt>eq($a, $b)> + +=item C<$result = $Collator-E<gt>ne($a, $b)> + +=item C<$result = $Collator-E<gt>lt($a, $b)> + +=item C<$result = $Collator-E<gt>le($a, $b)> + +=item C<$result = $Collator-E<gt>gt($a, $b)> + +=item C<$result = $Collator-E<gt>ge($a, $b)> + +They works like the same name operators as theirs. + + eq : whether $a is equal to $b. + ne : whether $a is not equal to $b. + lt : whether $a is lesser than $b. + le : whether $a is lesser than $b or equal to $b. + gt : whether $a is greater than $b. + ge : whether $a is greater than $b or equal to $b. + +=item C<$sortKey = $Collator-E<gt>getSortKey($string)> + +-- see 4.3 Form a sort key for each string, UTR #10. + +Returns a sort key. + +You compare the sort keys using a binary comparison +and get the result of the comparison of the strings using UCA. + + $Collator->getSortKey($a) cmp $Collator->getSortKey($b) + + is equivalent to + + $Collator->cmp($a, $b) + +=item C<$sortKeyForm = $Collator-E<gt>viewSortKey($string)> + +Returns a string formalized to display a sort key. +Weights are enclosed with C<'['> and C<']'> +and level boundaries are denoted by C<'|'>. + + use Unicode::Collate; + my $c = Unicode::Collate->new(); + print $c->viewSortKey("Perl"),"\n"; + + # output: + # [09B3 08B1 09CB 094F|0020 0020 0020 0020|0008 0002 0002 0002|FFFF FFFF FFFF FFFF] + # Level 1 Level 2 Level 3 Level 4 + +=item C<$position = $Collator-E<gt>index($string, $substring)> + +=item C<($position, $length) = $Collator-E<gt>index($string, $substring)> + +-- see 6.8 Searching, UTR #10. + +If C<$substring> matches a part of C<$string>, returns +the position of the first occurrence of the matching part in scalar context; +in list context, returns a two-element list of +the position and the length of the matching part. + +B<Notice> that the length of the matching part may differ from +the length of C<$substring>. + +B<Note> that the position and the length are counted on the string +after the process of preprocess, normalization, and rearrangement. +Therefore, in case the specified string is not binary equal to +the preprocessed/normalized/rearranged string, the position and the length +may differ form those on the specified string. But it is guaranteed +that, if matched, it returns a non-negative value as C<$position>. + +If C<$substring> does not match any part of C<$string>, +returns C<-1> in scalar context and +an empty list in list context. + +e.g. you say + + my $Collator = Unicode::Collate->new( normalization => undef, level => 1 ); + my $str = "Ich mu\x{00DF} studieren."; + my $sub = "m\x{00FC}ss"; + my $match; + if (my($pos,$len) = $Collator->index($str, $sub)) { + $match = substr($str, $pos, $len); + } + +and get C<"mu\x{00DF}"> in C<$match> since C<"mu>E<223>C<"> +is primary equal to C<"m>E<252>C<ss">. + +=back + +=head2 Other Methods + +=over 4 + +=item UCA_Version + +Returns the version number of Unicode Technical Standard 10 +this module consults. + +=item Base_Unicode_Version + +Returns the version number of the Unicode Standard +this module is based on. + +=back + +=head2 EXPORT + +None by default. + +=head2 TODO + +Unicode::Collate has not been ported to EBCDIC. The code mostly would +work just fine but a decision needs to be made: how the module should +work in EBCDIC? Should the low 256 characters be understood as +Unicode or as EBCDIC code points? Should one be chosen or should +there be a way to do either? Or should such translation be left +outside the module for the user to do, for example by using +Encode::from_to()? +(or utf8::unicode_to_native()/utf8::native_to_unicode()?) + +=head2 CAVEAT + +Use of the C<normalization> parameter requires +the B<Unicode::Normalize> module. + +If you need not it (say, in the case when you need not +handle any combining characters), +assign C<normalization =E<gt> undef> explicitly. + +-- see 6.5 Avoiding Normalization, UTR #10. + +=head2 BUGS + +C<index()> is an experimental method and +its return value may be unreliable. +The correct implementation for C<index()> must be based +on Locale-Sensitive Support: Level 3 in UTR #18, +F<Unicode Regular Expression Guidelines>. + +See also 4.2 Locale-Dependent Graphemes in UTR #18. + +=head1 AUTHOR + +SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt> + + http://homepage1.nifty.com/nomenclator/perl/ + + Copyright(C) 2001-2002, SADAHIRO Tomoyuki. Japan. All rights reserved. + + This library is free software; you can redistribute it + and/or modify it under the same terms as Perl itself. + +=head1 SEE ALSO + +=over 4 + +=item http://www.unicode.org/unicode/reports/tr10/ + +Unicode Collation Algorithm - UTR #10 + +=item http://www.unicode.org/unicode/reports/tr10/allkeys.txt + +The Default Unicode Collation Element Table + +=item http://www.unicode.org/unicode/reports/tr15/ + +Unicode Normalization Forms - UAX #15 + +=item http://www.unicode.org/unicode/reports/tr18 + +Unicode Regular Expression Guidelines - UTR #18 + +=item L<Unicode::Normalize> + +=back + +=cut diff --git a/gnu/usr.bin/perl/lib/Unicode/Collate/Changes b/gnu/usr.bin/perl/lib/Unicode/Collate/Changes new file mode 100644 index 00000000000..997117c6700 --- /dev/null +++ b/gnu/usr.bin/perl/lib/Unicode/Collate/Changes @@ -0,0 +1,58 @@ +Revision history for Perl extension Unicode::Collate. + +0.12 Sun May 05 09:43:10 2002 + - add new methods, ->UCA_Version and ->Base_Unicode_Version. + - test fix: removed the needless requirement of Unicode::Normalize. + [reported by David Hand] + +0.11 Fri May 03 02:28:10 2002 + - fix: now derived collation elements can be used for Hangul Jamo + when their weights are not defined. + [reported by Andreas J. Koenig] + - fix: rearrangements had not worked. + - mentioned pleblem on index() in BUGS. + - more documents, more tests. + - tag names for 'alternate' are case-insensitive (i.e. 'SHIFTed' etc.). + - The <undef> value for the keys "overrideCJK", "overrideHangul", + "rearrange" has a special behavior (different from default). + +0.10 Tue Dec 11 23:26:42 2001 + - now you are allowed to use no table file. + - fix: fetching CE with two or more combining characters. + +0.09 Sun Nov 11 17:02:40:18 2001 + - add the following methods: eq, ne, lt, le, gt, le. + - relies on &Unicode::Normalize::getCombinClass() + in place of %Unicode::Normalize::Combin + (the hash is not defined in the XS version of Unicode::Normalize). + then you should install Unicode::Normalize 0.10 or later. + - now independent of Lingua::KO::Hangul::Util + (this module does decomposition of Hangul syllables for itself) + +0.08 Mon Aug 20 22:40:18 2001 + - add the index method. + +0.07 Thu Aug 16 23:42:02 2001 + - rename the module name to Unicode::Collate. + +0.06 Thu Aug 16 23:18:36 2001 + - add description of the getSortKey method. + +0.05 Mon Aug 13 22:23:11 2001 + - bug fix: on the things of 4.2.1, UTR #10 + - getSortKey returns a string, but not an arrayref. + +0.04 Mon Aug 13 22:23:11 2001 + - some bugs are fixed. + - some tailoring parameters are added. + +0.03 Mon Aug 06 06:26:35 2001 + - modify README + +0.02 Sun Aug 05 20:20:01 2001 + - some fix + +0.01 Sun Jul 29 16:16:15 2001 + - original version; created by h2xs 1.21 + with options -A -X -n Sort::UCA + diff --git a/gnu/usr.bin/perl/lib/Unicode/Collate/README b/gnu/usr.bin/perl/lib/Unicode/Collate/README new file mode 100644 index 00000000000..4d4f12ce977 --- /dev/null +++ b/gnu/usr.bin/perl/lib/Unicode/Collate/README @@ -0,0 +1,57 @@ +Unicode/Collate version 0.12 +=============================== + +Unicode::Collate - Unicode Collation Algorithm + + UCA - Unicode TR #10. + http://www.unicode.org/unicode/reports/tr10/ + + Fetch the following file and put it into the Unicode/Collate directory. + http://www.unicode.org/unicode/reports/tr10/allkeys.txt + + You can install this module using a subset "keys.txt" + contained in this distribution without the "allkeys.txt"; + but "keys.txt" is small and not very useful. + +SYNOPSIS + + use Unicode::Collate; + + #construct + $Collator = Unicode::Collate->new(%tailoring); + + #sort + @sorted = $Collator->sort(@not_sorted); + + #compare + $result = $Collator->cmp($a, $b); # returns 1, 0, or -1. + $result = $Collator->eq($a, $b); # returns true/false + (similarly ->ne, ->lt, ->le, ->gt, ->ge) + +INSTALLATION + +Perl 5.006 or later + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + + It's better if you have Unicode::Normalize (v 0.10 or later) + although Unicode::Collate can be used without Unicode::Normalize. + +COPYRIGHT AND LICENCE + +SADAHIRO Tomoyuki <bqw10602@nifty.com> + + http://homepage1.nifty.com/nomenclator/perl/ + + Copyright(C) 2001-2002, SADAHIRO Tomoyuki. Japan. All rights reserved. + + This library is free software; you can redistribute it + and/or modify it under the same terms as Perl itself. + diff --git a/gnu/usr.bin/perl/lib/Unicode/Collate/keys.txt b/gnu/usr.bin/perl/lib/Unicode/Collate/keys.txt new file mode 100644 index 00000000000..5fe3ebef624 --- /dev/null +++ b/gnu/usr.bin/perl/lib/Unicode/Collate/keys.txt @@ -0,0 +1,1930 @@ +#### This file is generated from allkeys-3.0.1d9.txt (unicode.org) +#### by deleting many many elements. +#### +#### Remaining elements include LATIN, HANGUL, HIRAGANA, KATAKANA, +#### BOPOMOFO, CJK UNIFIED IDEOGRAPHS. +#### +#### To fetch the original file, access to: +#### http://www.unicode.org/unicode/reports/tr10/allkeys.txt +#### +# allkeys-3.0.1d9.txt +# Created: 2001-Feb-22 +# Posted: 2001-Mar-29 +# +# Note: This file was originally posted with the header section +# omitted, together with the @version and @rearrange lines. This +# 2001-Mar-29 reposting corrects that omission. All of the weight +# entries are identical to the originally posted allkeys.txt for +# UTS #10, Version 8.0. +# +# Derived from: unidata-3.0.1d9.txt +# Sifter version: 3.0.1d4, 2001-Feb-22 + +@version 3.0.1d9 + +@rearrange 0E40,0E41,0E42,0E43,0E44 +@rearrange 0EC0,0EC1,0EC2,0EC3,0EC4 + +0009 ; [*0201.0020.0002.0009] # HORIZONTAL TABULATION (in 6429) +000A ; [*0202.0020.0002.000A] # LINE FEED (in 6429) +000B ; [*0203.0020.0002.000B] # VERTICAL TABULATION (in 6429) +000C ; [*0204.0020.0002.000C] # FORM FEED (in 6429) +000D ; [*0205.0020.0002.000D] # CARRIAGE RETURN (in 6429) +0020 ; [*0209.0020.0002.0020] # SPACE +0021 ; [*0237.0020.0002.0021] # EXCLAMATION MARK +0022 ; [*025C.0020.0002.0022] # QUOTATION MARK +0023 ; [*0295.0020.0002.0023] # NUMBER SIGN +0025 ; [*0296.0020.0002.0025] # PERCENT SIGN +0026 ; [*0293.0020.0002.0026] # AMPERSAND +0027 ; [*0255.0020.0002.0027] # APOSTROPHE +0028 ; [*0266.0020.0002.0028] # LEFT PARENTHESIS +0029 ; [*0267.0020.0002.0029] # RIGHT PARENTHESIS +002A ; [*028E.0020.0002.002A] # ASTERISK +002B ; [*038B.0020.0002.002B] # PLUS SIGN +002C ; [*0219.0020.0002.002C] # COMMA +002D ; [*020D.0020.0002.002D] # HYPHEN-MINUS +002E ; [*0241.0020.0002.002E] # FULL STOP +002F ; [*0290.0020.0002.002F] # SOLIDUS +003A ; [*0223.0020.0002.003A] # COLON +003B ; [*0221.0020.0002.003B] # SEMICOLON +003C ; [*038F.0020.0002.003C] # LESS-THAN SIGN +003D ; [*0390.0020.0002.003D] # EQUALS SIGN +003E ; [*0391.0020.0002.003E] # GREATER-THAN SIGN +003F ; [*023A.0020.0002.003F] # QUESTION MARK +0040 ; [*028D.0020.0002.0040] # COMMERCIAL AT +005C ; [*0292.0020.0002.005C] # REVERSE SOLIDUS +007B ; [*026A.0020.0002.007B] # LEFT CURLY BRACKET +007C ; [*0393.0020.0002.007C] # VERTICAL LINE +007D ; [*026B.0020.0002.007D] # RIGHT CURLY BRACKET +007E ; [*0396.0020.0002.007E] # TILDE +0085 ; [*0206.0020.0002.0085] # NEXT LINE (in 6429) +00A1 ; [*0238.0020.0002.00A1] # INVERTED EXCLAMATION MARK +00A6 ; [*0394.0020.0002.00A6] # BROKEN BAR +00A7 ; [*0288.0020.0002.00A7] # SECTION SIGN +00A9 ; [*028B.0020.0002.00A9] # COPYRIGHT SIGN +00AC ; [*0392.0020.0002.00AC] # NOT SIGN +00AD ; [*020C.0020.0002.00AD] # SOFT HYPHEN +00AE ; [*028C.0020.0002.00AE] # REGISTERED SIGN +00B0 ; [*02F6.0020.0002.00B0] # DEGREE SIGN +00B1 ; [*038C.0020.0002.00B1] # PLUS-MINUS SIGN +00B6 ; [*0289.0020.0002.00B6] # PILCROW SIGN +00B7 ; [*024B.0020.0002.00B7] # MIDDLE DOT +00BF ; [*023B.0020.0002.00BF] # INVERTED QUESTION MARK +00D7 ; [*038E.0020.0002.00D7] # MULTIPLICATION SIGN +00F7 ; [*038D.0020.0002.00F7] # DIVISION SIGN +02B9 ; [*02D5.0020.0002.02B9] # MODIFIER LETTER PRIME +02BA ; [*02D7.0020.0002.02BA] # MODIFIER LETTER DOUBLE PRIME +02C2 ; [*02D8.0020.0002.02C2] # MODIFIER LETTER LEFT ARROWHEAD +02C3 ; [*02D9.0020.0002.02C3] # MODIFIER LETTER RIGHT ARROWHEAD +02C4 ; [*02DA.0020.0002.02C4] # MODIFIER LETTER UP ARROWHEAD +02C5 ; [*02DB.0020.0002.02C5] # MODIFIER LETTER DOWN ARROWHEAD +02C6 ; [*02DC.0020.0002.02C6] # MODIFIER LETTER CIRCUMFLEX ACCENT +02C7 ; [*02DD.0020.0002.02C7] # CARON +02C8 ; [*02DE.0020.0002.02C8] # MODIFIER LETTER VERTICAL LINE +02C9 ; [*02DF.0020.0002.02C9] # MODIFIER LETTER MACRON +02CA ; [*02E0.0020.0002.02CA] # MODIFIER LETTER ACUTE ACCENT +02CB ; [*02E1.0020.0002.02CB] # MODIFIER LETTER GRAVE ACCENT +02CC ; [*02E2.0020.0002.02CC] # MODIFIER LETTER LOW VERTICAL LINE +02CD ; [*02E3.0020.0002.02CD] # MODIFIER LETTER LOW MACRON +02CE ; [*02E4.0020.0002.02CE] # MODIFIER LETTER LOW GRAVE ACCENT +02CF ; [*02E5.0020.0002.02CF] # MODIFIER LETTER LOW ACUTE ACCENT +02D2 ; [*02E6.0020.0002.02D2] # MODIFIER LETTER CENTRED RIGHT HALF RING +02D3 ; [*02E7.0020.0002.02D3] # MODIFIER LETTER CENTRED LEFT HALF RING +02D4 ; [*02E8.0020.0002.02D4] # MODIFIER LETTER UP TACK +02D5 ; [*02E9.0020.0002.02D5] # MODIFIER LETTER DOWN TACK +02D6 ; [*02EA.0020.0002.02D6] # MODIFIER LETTER PLUS SIGN +02D7 ; [*02EB.0020.0002.02D7] # MODIFIER LETTER MINUS SIGN +02DE ; [*02EC.0020.0002.02DE] # MODIFIER LETTER RHOTIC HOOK +02E5 ; [*02ED.0020.0002.02E5] # MODIFIER LETTER EXTRA-HIGH TONE BAR +02E6 ; [*02EE.0020.0002.02E6] # MODIFIER LETTER HIGH TONE BAR +02E7 ; [*02EF.0020.0002.02E7] # MODIFIER LETTER MID TONE BAR +02E8 ; [*02F0.0020.0002.02E8] # MODIFIER LETTER LOW TONE BAR +02E9 ; [*02F1.0020.0002.02E9] # MODIFIER LETTER EXTRA-LOW TONE BAR +02EA ; [*02F2.0020.0002.02EA] # MODIFIER LETTER YIN DEPARTING TONE MARK +02EB ; [*02F3.0020.0002.02EB] # MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; [*02F4.0020.0002.02EC] # MODIFIER LETTER VOICING +02ED ; [*02F5.0020.0002.02ED] # MODIFIER LETTER UNASPIRATED +2000 ; [*0209.0020.0004.2000] # EN QUAD; CANON +2001 ; [*0209.0020.0004.2001] # EM QUAD; CANON +2010 ; [*0211.0020.0002.2010] # HYPHEN +2012 ; [*0212.0020.0002.2012] # FIGURE DASH +2013 ; [*0213.0020.0002.2013] # EN DASH +2014 ; [*0214.0020.0002.2014] # EM DASH +2015 ; [*0215.0020.0002.2015] # HORIZONTAL BAR +2016 ; [*0395.0020.0002.2016] # DOUBLE VERTICAL LINE +2018 ; [*0256.0020.0002.2018] # LEFT SINGLE QUOTATION MARK +2019 ; [*0257.0020.0002.2019] # RIGHT SINGLE QUOTATION MARK +201A ; [*0258.0020.0002.201A] # SINGLE LOW-9 QUOTATION MARK +201B ; [*0259.0020.0002.201B] # SINGLE HIGH-REVERSED-9 QUOTATION MARK +201C ; [*025D.0020.0002.201C] # LEFT DOUBLE QUOTATION MARK +201D ; [*025E.0020.0002.201D] # RIGHT DOUBLE QUOTATION MARK +201E ; [*025F.0020.0002.201E] # DOUBLE LOW-9 QUOTATION MARK +201F ; [*0260.0020.0002.201F] # DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020 ; [*029A.0020.0002.2020] # DAGGER +2021 ; [*029B.0020.0002.2021] # DOUBLE DAGGER +2022 ; [*029C.0020.0002.2022] # BULLET +2023 ; [*029D.0020.0002.2023] # TRIANGULAR BULLET +2027 ; [*029E.0020.0002.2027] # HYPHENATION POINT +2028 ; [*0207.0020.0002.2028] # LINE SEPARATOR +2029 ; [*0208.0020.0002.2029] # PARAGRAPH SEPARATOR +2030 ; [*0298.0020.0002.2030] # PER MILLE SIGN +2031 ; [*0299.0020.0002.2031] # PER TEN THOUSAND SIGN +2032 ; [*02A2.0020.0002.2032] # PRIME +2035 ; [*02A3.0020.0002.2035] # REVERSED PRIME +2038 ; [*02A5.0020.0002.2038] # CARET +203B ; [*02A6.0020.0002.203B] # REFERENCE MARK +203D ; [*0240.0020.0002.203D] # INTERROBANG +203F ; [*02A7.0020.0002.203F] # UNDERTIE +2040 ; [*02A8.0020.0002.2040] # CHARACTER TIE +2041 ; [*02A9.0020.0002.2041] # CARET INSERTION POINT +2042 ; [*02AA.0020.0002.2042] # ASTERISM +2043 ; [*029F.0020.0002.2043] # HYPHEN BULLET +2044 ; [*0291.0020.0002.2044] # FRACTION SLASH +204A ; [*0294.0020.0002.204A] # TIRONIAN SIGN ET +204B ; [*028A.0020.0002.204B] # REVERSED PILCROW SIGN +204C ; [*02A0.0020.0002.204C] # BLACK LEFTWARDS BULLET +204D ; [*02A1.0020.0002.204D] # BLACK RIGHTWARDS BULLET +2200 ; [*037C.0020.0002.2200] # FOR ALL +2201 ; [*037D.0020.0002.2201] # COMPLEMENT +2202 ; [*037E.0020.0002.2202] # PARTIAL DIFFERENTIAL +2203 ; [*037F.0020.0002.2203] # THERE EXISTS +2204 ; [*037F.0054.0002.2204] # THERE DOES NOT EXIST; CANONSEQ +2205 ; [*0380.0020.0002.2205] # EMPTY SET +2206 ; [*0381.0020.0002.2206] # INCREMENT +2207 ; [*0382.0020.0002.2207] # NABLA +2208 ; [*0383.0020.0002.2208] # ELEMENT OF +2209 ; [*0383.0054.0002.2209] # NOT AN ELEMENT OF; CANONSEQ +220A ; [*0384.0020.0002.220A] # SMALL ELEMENT OF +220B ; [*0385.0020.0002.220B] # CONTAINS AS MEMBER +220C ; [*0385.0054.0002.220C] # DOES NOT CONTAIN AS MEMBER; CANONSEQ +220D ; [*0386.0020.0002.220D] # SMALL CONTAINS AS MEMBER +220E ; [*0387.0020.0002.220E] # END OF PROOF +220F ; [*0388.0020.0002.220F] # N-ARY PRODUCT +2210 ; [*0389.0020.0002.2210] # N-ARY COPRODUCT +2211 ; [*038A.0020.0002.2211] # N-ARY SUMMATION +2212 ; [*0397.0020.0002.2212] # MINUS SIGN +2213 ; [*0398.0020.0002.2213] # MINUS-OR-PLUS SIGN +2214 ; [*0399.0020.0002.2214] # DOT PLUS +2215 ; [*039A.0020.0002.2215] # DIVISION SLASH +2216 ; [*039B.0020.0002.2216] # SET MINUS +2217 ; [*039C.0020.0002.2217] # ASTERISK OPERATOR +2218 ; [*039D.0020.0002.2218] # RING OPERATOR +2219 ; [*039E.0020.0002.2219] # BULLET OPERATOR +221B ; [*03A0.0020.0002.221B] # CUBE ROOT +221C ; [*03A1.0020.0002.221C] # FOURTH ROOT +221D ; [*03A2.0020.0002.221D] # PROPORTIONAL TO +221E ; [*03A3.0020.0002.221E] # INFINITY +2223 ; [*03A8.0020.0002.2223] # DIVIDES +2224 ; [*03A8.0054.0002.2224] # DOES NOT DIVIDE; CANONSEQ +2225 ; [*03A9.0020.0002.2225] # PARALLEL TO +2226 ; [*03A9.0054.0002.2226] # NOT PARALLEL TO; CANONSEQ +2227 ; [*03AA.0020.0002.2227] # LOGICAL AND +2228 ; [*03AB.0020.0002.2228] # LOGICAL OR +2229 ; [*03AC.0020.0002.2229] # INTERSECTION +222A ; [*03AD.0020.0002.222A] # UNION +2234 ; [*03B3.0020.0002.2234] # THEREFORE +2235 ; [*03B4.0020.0002.2235] # BECAUSE +2236 ; [*03B5.0020.0002.2236] # RATIO +2237 ; [*03B6.0020.0002.2237] # PROPORTION +2238 ; [*03B7.0020.0002.2238] # DOT MINUS +2239 ; [*03B8.0020.0002.2239] # EXCESS +223A ; [*03B9.0020.0002.223A] # GEOMETRIC PROPORTION +223B ; [*03BA.0020.0002.223B] # HOMOTHETIC +223C ; [*03BB.0020.0002.223C] # TILDE OPERATOR +223D ; [*03BC.0020.0002.223D] # REVERSED TILDE +223E ; [*03BD.0020.0002.223E] # INVERTED LAZY S +223F ; [*03BE.0020.0002.223F] # SINE WAVE +2240 ; [*03BF.0020.0002.2240] # WREATH PRODUCT +2241 ; [*03BB.0054.0002.2241] # NOT TILDE; CANONSEQ +2242 ; [*03C0.0020.0002.2242] # MINUS TILDE +2243 ; [*03C1.0020.0002.2243] # ASYMPTOTICALLY EQUAL TO +2244 ; [*03C1.0054.0002.2244] # NOT ASYMPTOTICALLY EQUAL TO; CANONSEQ +2245 ; [*03C2.0020.0002.2245] # APPROXIMATELY EQUAL TO +2246 ; [*03C3.0020.0002.2246] # APPROXIMATELY BUT NOT ACTUALLY EQUAL TO +2247 ; [*03C2.0054.0002.2247] # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO; CANONSEQ +2248 ; [*03C4.0020.0002.2248] # ALMOST EQUAL TO +2249 ; [*03C4.0054.0002.2249] # NOT ALMOST EQUAL TO; CANONSEQ +224A ; [*03C5.0020.0002.224A] # ALMOST EQUAL OR EQUAL TO +224B ; [*03C6.0020.0002.224B] # TRIPLE TILDE +224C ; [*03C7.0020.0002.224C] # ALL EQUAL TO +224D ; [*03C8.0020.0002.224D] # EQUIVALENT TO +224E ; [*03C9.0020.0002.224E] # GEOMETRICALLY EQUIVALENT TO +224F ; [*03CA.0020.0002.224F] # DIFFERENCE BETWEEN +2250 ; [*03CB.0020.0002.2250] # APPROACHES THE LIMIT +2251 ; [*03CC.0020.0002.2251] # GEOMETRICALLY EQUAL TO +2252 ; [*03CD.0020.0002.2252] # APPROXIMATELY EQUAL TO OR THE IMAGE OF +2253 ; [*03CE.0020.0002.2253] # IMAGE OF OR APPROXIMATELY EQUAL TO +2254 ; [*03CF.0020.0002.2254] # COLON EQUALS +2255 ; [*03D0.0020.0002.2255] # EQUALS COLON +2256 ; [*03D1.0020.0002.2256] # RING IN EQUAL TO +2257 ; [*03D2.0020.0002.2257] # RING EQUAL TO +2258 ; [*03D3.0020.0002.2258] # CORRESPONDS TO +2259 ; [*03D4.0020.0002.2259] # ESTIMATES +225A ; [*03D5.0020.0002.225A] # EQUIANGULAR TO +225C ; [*03D7.0020.0002.225C] # DELTA EQUAL TO +225D ; [*03D8.0020.0002.225D] # EQUAL TO BY DEFINITION +225E ; [*03D9.0020.0002.225E] # MEASURED BY +225F ; [*03DA.0020.0002.225F] # QUESTIONED EQUAL TO +2260 ; [*0390.0054.0002.2260] # NOT EQUAL TO; CANONSEQ +2261 ; [*03DB.0020.0002.2261] # IDENTICAL TO +2262 ; [*03DB.0054.0002.2262] # NOT IDENTICAL TO; CANONSEQ +2263 ; [*03DC.0020.0002.2263] # STRICTLY EQUIVALENT TO +2264 ; [*03DD.0020.0002.2264] # LESS-THAN OR EQUAL TO +2265 ; [*03DE.0020.0002.2265] # GREATER-THAN OR EQUAL TO +2266 ; [*03DF.0020.0002.2266] # LESS-THAN OVER EQUAL TO +2267 ; [*03E0.0020.0002.2267] # GREATER-THAN OVER EQUAL TO +2268 ; [*03E1.0020.0002.2268] # LESS-THAN BUT NOT EQUAL TO +2269 ; [*03E2.0020.0002.2269] # GREATER-THAN BUT NOT EQUAL TO +226A ; [*03E3.0020.0002.226A] # MUCH LESS-THAN +226B ; [*03E4.0020.0002.226B] # MUCH GREATER-THAN +226C ; [*03E5.0020.0002.226C] # BETWEEN +226D ; [*03C8.0054.0002.226D] # NOT EQUIVALENT TO; CANONSEQ +226E ; [*038F.0054.0002.226E] # NOT LESS-THAN; CANONSEQ +226F ; [*0391.0054.0002.226F] # NOT GREATER-THAN; CANONSEQ +2270 ; [*03DD.0054.0002.2270] # NEITHER LESS-THAN NOR EQUAL TO; CANONSEQ +2271 ; [*03DE.0054.0002.2271] # NEITHER GREATER-THAN NOR EQUAL TO; CANONSEQ +2272 ; [*03E6.0020.0002.2272] # LESS-THAN OR EQUIVALENT TO +2273 ; [*03E7.0020.0002.2273] # GREATER-THAN OR EQUIVALENT TO +2274 ; [*03E6.0054.0002.2274] # NEITHER LESS-THAN NOR EQUIVALENT TO; CANONSEQ +2275 ; [*03E7.0054.0002.2275] # NEITHER GREATER-THAN NOR EQUIVALENT TO; CANONSEQ +2276 ; [*03E8.0020.0002.2276] # LESS-THAN OR GREATER-THAN +2277 ; [*03E9.0020.0002.2277] # GREATER-THAN OR LESS-THAN +2278 ; [*03E8.0054.0002.2278] # NEITHER LESS-THAN NOR GREATER-THAN; CANONSEQ +2279 ; [*03E9.0054.0002.2279] # NEITHER GREATER-THAN NOR LESS-THAN; CANONSEQ +227A ; [*03EA.0020.0002.227A] # PRECEDES +227B ; [*03EB.0020.0002.227B] # SUCCEEDS +227C ; [*03EC.0020.0002.227C] # PRECEDES OR EQUAL TO +227D ; [*03ED.0020.0002.227D] # SUCCEEDS OR EQUAL TO +227E ; [*03EE.0020.0002.227E] # PRECEDES OR EQUIVALENT TO +227F ; [*03EF.0020.0002.227F] # SUCCEEDS OR EQUIVALENT TO +2280 ; [*03EA.0054.0002.2280] # DOES NOT PRECEDE; CANONSEQ +2281 ; [*03EB.0054.0002.2281] # DOES NOT SUCCEED; CANONSEQ +2282 ; [*03F0.0020.0002.2282] # SUBSET OF +2283 ; [*03F1.0020.0002.2283] # SUPERSET OF +2284 ; [*03F0.0054.0002.2284] # NOT A SUBSET OF; CANONSEQ +2285 ; [*03F1.0054.0002.2285] # NOT A SUPERSET OF; CANONSEQ +2286 ; [*03F2.0020.0002.2286] # SUBSET OF OR EQUAL TO +2287 ; [*03F3.0020.0002.2287] # SUPERSET OF OR EQUAL TO +2288 ; [*03F2.0054.0002.2288] # NEITHER A SUBSET OF NOR EQUAL TO; CANONSEQ +2289 ; [*03F3.0054.0002.2289] # NEITHER A SUPERSET OF NOR EQUAL TO; CANONSEQ +228A ; [*03F4.0020.0002.228A] # SUBSET OF WITH NOT EQUAL TO +228B ; [*03F5.0020.0002.228B] # SUPERSET OF WITH NOT EQUAL TO +228C ; [*03F6.0020.0002.228C] # MULTISET +228D ; [*03F7.0020.0002.228D] # MULTISET MULTIPLICATION +228E ; [*03F8.0020.0002.228E] # MULTISET UNION +22A2 ; [*040C.0020.0002.22A2] # RIGHT TACK +22A3 ; [*040D.0020.0002.22A3] # LEFT TACK +22A4 ; [*040E.0020.0002.22A4] # DOWN TACK +22A5 ; [*040F.0020.0002.22A5] # UP TACK +22A6 ; [*0410.0020.0002.22A6] # ASSERTION +22A7 ; [*0411.0020.0002.22A7] # MODELS +22A8 ; [*0412.0020.0002.22A8] # TRUE +22A9 ; [*0413.0020.0002.22A9] # FORCES +22AA ; [*0414.0020.0002.22AA] # TRIPLE VERTICAL BAR RIGHT TURNSTILE +22AB ; [*0415.0020.0002.22AB] # DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +22AC ; [*040C.0054.0002.22AC] # DOES NOT PROVE; CANONSEQ +22AD ; [*0412.0054.0002.22AD] # NOT TRUE; CANONSEQ +22AE ; [*0413.0054.0002.22AE] # DOES NOT FORCE; CANONSEQ +22AF ; [*0415.0054.0002.22AF] # NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE; CANONSEQ +22B0 ; [*0416.0020.0002.22B0] # PRECEDES UNDER RELATION +22B1 ; [*0417.0020.0002.22B1] # SUCCEEDS UNDER RELATION +22B2 ; [*0418.0020.0002.22B2] # NORMAL SUBGROUP OF +22B3 ; [*0419.0020.0002.22B3] # CONTAINS AS NORMAL SUBGROUP +22B4 ; [*041A.0020.0002.22B4] # NORMAL SUBGROUP OF OR EQUAL TO +22B5 ; [*041B.0020.0002.22B5] # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO +22B6 ; [*041C.0020.0002.22B6] # ORIGINAL OF +22B7 ; [*041D.0020.0002.22B7] # IMAGE OF +22B8 ; [*041E.0020.0002.22B8] # MULTIMAP +22B9 ; [*041F.0020.0002.22B9] # HERMITIAN CONJUGATE MATRIX +22BA ; [*0420.0020.0002.22BA] # INTERCALATE +22BB ; [*0421.0020.0002.22BB] # XOR +22BC ; [*0422.0020.0002.22BC] # NAND +22BD ; [*0423.0020.0002.22BD] # NOR +22C0 ; [*0426.0020.0002.22C0] # N-ARY LOGICAL AND +22C1 ; [*0427.0020.0002.22C1] # N-ARY LOGICAL OR +22C2 ; [*0428.0020.0002.22C2] # N-ARY INTERSECTION +22C3 ; [*0429.0020.0002.22C3] # N-ARY UNION +22C4 ; [*042A.0020.0002.22C4] # DIAMOND OPERATOR +22C5 ; [*042B.0020.0002.22C5] # DOT OPERATOR +22C7 ; [*042D.0020.0002.22C7] # DIVISION TIMES +22C8 ; [*042E.0020.0002.22C8] # BOWTIE +22C9 ; [*042F.0020.0002.22C9] # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT +22CA ; [*0430.0020.0002.22CA] # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT +22CB ; [*0431.0020.0002.22CB] # LEFT SEMIDIRECT PRODUCT +22CC ; [*0432.0020.0002.22CC] # RIGHT SEMIDIRECT PRODUCT +22CD ; [*0433.0020.0002.22CD] # REVERSED TILDE EQUALS +22CE ; [*0434.0020.0002.22CE] # CURLY LOGICAL OR +22CF ; [*0435.0020.0002.22CF] # CURLY LOGICAL AND +22D0 ; [*0436.0020.0002.22D0] # DOUBLE SUBSET +22D1 ; [*0437.0020.0002.22D1] # DOUBLE SUPERSET +22D2 ; [*0438.0020.0002.22D2] # DOUBLE INTERSECTION +22D3 ; [*0439.0020.0002.22D3] # DOUBLE UNION +22D4 ; [*043A.0020.0002.22D4] # PITCHFORK +22D5 ; [*043B.0020.0002.22D5] # EQUAL AND PARALLEL TO +22D6 ; [*043C.0020.0002.22D6] # LESS-THAN WITH DOT +22D7 ; [*043D.0020.0002.22D7] # GREATER-THAN WITH DOT +22D8 ; [*043E.0020.0002.22D8] # VERY MUCH LESS-THAN +22D9 ; [*043F.0020.0002.22D9] # VERY MUCH GREATER-THAN +22DA ; [*0440.0020.0002.22DA] # LESS-THAN EQUAL TO OR GREATER-THAN +22DB ; [*0441.0020.0002.22DB] # GREATER-THAN EQUAL TO OR LESS-THAN +22DC ; [*0442.0020.0002.22DC] # EQUAL TO OR LESS-THAN +22DD ; [*0443.0020.0002.22DD] # EQUAL TO OR GREATER-THAN +22DE ; [*0444.0020.0002.22DE] # EQUAL TO OR PRECEDES +22DF ; [*0445.0020.0002.22DF] # EQUAL TO OR SUCCEEDS +22E0 ; [*03EC.0054.0002.22E0] # DOES NOT PRECEDE OR EQUAL; CANONSEQ +22E1 ; [*03ED.0054.0002.22E1] # DOES NOT SUCCEED OR EQUAL; CANONSEQ +22E6 ; [*0448.0020.0002.22E6] # LESS-THAN BUT NOT EQUIVALENT TO +22E7 ; [*0449.0020.0002.22E7] # GREATER-THAN BUT NOT EQUIVALENT TO +22E8 ; [*044A.0020.0002.22E8] # PRECEDES BUT NOT EQUIVALENT TO +22E9 ; [*044B.0020.0002.22E9] # SUCCEEDS BUT NOT EQUIVALENT TO +22EA ; [*0418.0054.0002.22EA] # NOT NORMAL SUBGROUP OF; CANONSEQ +22EB ; [*0419.0054.0002.22EB] # DOES NOT CONTAIN AS NORMAL SUBGROUP; CANONSEQ +22EC ; [*041A.0054.0002.22EC] # NOT NORMAL SUBGROUP OF OR EQUAL TO; CANONSEQ +22ED ; [*041B.0054.0002.22ED] # DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL; CANONSEQ +22EE ; [*044C.0020.0002.22EE] # VERTICAL ELLIPSIS +22EF ; [*044D.0020.0002.22EF] # MIDLINE HORIZONTAL ELLIPSIS +22F0 ; [*044E.0020.0002.22F0] # UP RIGHT DIAGONAL ELLIPSIS +22F1 ; [*044F.0020.0002.22F1] # DOWN RIGHT DIAGONAL ELLIPSIS +2300 ; [*0450.0020.0002.2300] # DIAMETER SIGN +2302 ; [*0452.0020.0002.2302] # HOUSE +2303 ; [*0453.0020.0002.2303] # UP ARROWHEAD +2304 ; [*0454.0020.0002.2304] # DOWN ARROWHEAD +2305 ; [*0455.0020.0002.2305] # PROJECTIVE +2306 ; [*0456.0020.0002.2306] # PERSPECTIVE +2307 ; [*0457.0020.0002.2307] # WAVY LINE +2308 ; [*0458.0020.0002.2308] # LEFT CEILING +2309 ; [*0459.0020.0002.2309] # RIGHT CEILING +230A ; [*045A.0020.0002.230A] # LEFT FLOOR +230B ; [*045B.0020.0002.230B] # RIGHT FLOOR +230C ; [*045C.0020.0002.230C] # BOTTOM RIGHT CROP +230D ; [*045D.0020.0002.230D] # BOTTOM LEFT CROP +230E ; [*045E.0020.0002.230E] # TOP RIGHT CROP +230F ; [*045F.0020.0002.230F] # TOP LEFT CROP +2310 ; [*0460.0020.0002.2310] # REVERSED NOT SIGN +2312 ; [*0462.0020.0002.2312] # ARC +2313 ; [*0463.0020.0002.2313] # SEGMENT +2314 ; [*0464.0020.0002.2314] # SECTOR +2315 ; [*0465.0020.0002.2315] # TELEPHONE RECORDER +2316 ; [*0466.0020.0002.2316] # POSITION INDICATOR +2318 ; [*0468.0020.0002.2318] # PLACE OF INTEREST SIGN +2319 ; [*0469.0020.0002.2319] # TURNED NOT SIGN +231A ; [*046A.0020.0002.231A] # WATCH +231B ; [*046B.0020.0002.231B] # HOURGLASS +2322 ; [*0472.0020.0002.2322] # FROWN +2323 ; [*0473.0020.0002.2323] # SMILE +2324 ; [*0474.0020.0002.2324] # UP ARROWHEAD BETWEEN TWO HORIZONTAL BARS +2325 ; [*0475.0020.0002.2325] # OPTION KEY +2326 ; [*0476.0020.0002.2326] # ERASE TO THE RIGHT +2328 ; [*0478.0020.0002.2328] # KEYBOARD +232B ; [*0479.0020.0002.232B] # ERASE TO THE LEFT +232C ; [*047A.0020.0002.232C] # BENZENE RING +232D ; [*047B.0020.0002.232D] # CYLINDRICITY +232E ; [*047C.0020.0002.232E] # ALL AROUND-PROFILE +232F ; [*047D.0020.0002.232F] # SYMMETRY +2330 ; [*047E.0020.0002.2330] # TOTAL RUNOUT +2331 ; [*047F.0020.0002.2331] # DIMENSION ORIGIN +2332 ; [*0480.0020.0002.2332] # CONICAL TAPER +2333 ; [*0481.0020.0002.2333] # SLOPE +2334 ; [*0482.0020.0002.2334] # COUNTERBORE +2335 ; [*0483.0020.0002.2335] # COUNTERSINK +237B ; [*04C9.0020.0002.237B] # NOT CHECK MARK +237F ; [*04CC.0020.0002.237F] # VERTICAL LINE WITH MIDDLE DOT +2397 ; [*04E4.0020.0002.2397] # PREVIOUS PAGE +2398 ; [*04E5.0020.0002.2398] # NEXT PAGE +25B0 ; [*05C0.0020.0002.25B0] # BLACK PARALLELOGRAM +25B1 ; [*05C1.0020.0002.25B1] # WHITE PARALLELOGRAM +25C6 ; [*05D6.0020.0002.25C6] # BLACK DIAMOND +25C7 ; [*05D7.0020.0002.25C7] # WHITE DIAMOND +25C8 ; [*05D8.0020.0002.25C8] # WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND +25C9 ; [*05D9.0020.0002.25C9] # FISHEYE +25CA ; [*05DA.0020.0002.25CA] # LOZENGE +25CE ; [*05DE.0020.0002.25CE] # BULLSEYE +25D8 ; [*05E8.0020.0002.25D8] # INVERSE BULLET +25E6 ; [*05F6.0020.0002.25E6] # WHITE BULLET +2600 ; [*0608.0020.0002.2600] # BLACK SUN WITH RAYS +2601 ; [*0609.0020.0002.2601] # CLOUD +2602 ; [*060A.0020.0002.2602] # UMBRELLA +2603 ; [*060B.0020.0002.2603] # SNOWMAN +2604 ; [*060C.0020.0002.2604] # COMET +2607 ; [*060F.0020.0002.2607] # LIGHTNING +2608 ; [*0610.0020.0002.2608] # THUNDERSTORM +2609 ; [*0611.0020.0002.2609] # SUN +260A ; [*0612.0020.0002.260A] # ASCENDING NODE +260B ; [*0613.0020.0002.260B] # DESCENDING NODE +260C ; [*0614.0020.0002.260C] # CONJUNCTION +260D ; [*0615.0020.0002.260D] # OPPOSITION +260E ; [*0616.0020.0002.260E] # BLACK TELEPHONE +260F ; [*0617.0020.0002.260F] # WHITE TELEPHONE +2613 ; [*061B.0020.0002.2613] # SALTIRE +2619 ; [*061C.0020.0002.2619] # REVERSED ROTATED FLORAL HEART BULLET +2620 ; [*0623.0020.0002.2620] # SKULL AND CROSSBONES +2621 ; [*0624.0020.0002.2621] # CAUTION SIGN +2622 ; [*0625.0020.0002.2622] # RADIOACTIVE SIGN +2623 ; [*0626.0020.0002.2623] # BIOHAZARD SIGN +2624 ; [*0627.0020.0002.2624] # CADUCEUS +2625 ; [*0628.0020.0002.2625] # ANKH +2626 ; [*0629.0020.0002.2626] # ORTHODOX CROSS +2627 ; [*062A.0020.0002.2627] # CHI RHO +2628 ; [*062B.0020.0002.2628] # CROSS OF LORRAINE +2629 ; [*062C.0020.0002.2629] # CROSS OF JERUSALEM +262C ; [*062F.0020.0002.262C] # ADI SHAKTI +262D ; [*0630.0020.0002.262D] # HAMMER AND SICKLE +262F ; [*0632.0020.0002.262F] # YIN YANG +2638 ; [*063B.0020.0002.2638] # WHEEL OF DHARMA +2639 ; [*063C.0020.0002.2639] # WHITE FROWNING FACE +263A ; [*063D.0020.0002.263A] # WHITE SMILING FACE +263B ; [*063E.0020.0002.263B] # BLACK SMILING FACE +263C ; [*063F.0020.0002.263C] # WHITE SUN WITH RAYS +263D ; [*0640.0020.0002.263D] # FIRST QUARTER MOON +263E ; [*0641.0020.0002.263E] # LAST QUARTER MOON +263F ; [*0642.0020.0002.263F] # MERCURY +2640 ; [*0643.0020.0002.2640] # FEMALE SIGN +2641 ; [*0644.0020.0002.2641] # EARTH +2642 ; [*0645.0020.0002.2642] # MALE SIGN +2643 ; [*0646.0020.0002.2643] # JUPITER +2644 ; [*0647.0020.0002.2644] # SATURN +2645 ; [*0648.0020.0002.2645] # URANUS +2646 ; [*0649.0020.0002.2646] # NEPTUNE +2647 ; [*064A.0020.0002.2647] # PLUTO +2648 ; [*064B.0020.0002.2648] # ARIES +2649 ; [*064C.0020.0002.2649] # TAURUS +264A ; [*064D.0020.0002.264A] # GEMINI +264B ; [*064E.0020.0002.264B] # CANCER +264C ; [*064F.0020.0002.264C] # LEO +264D ; [*0650.0020.0002.264D] # VIRGO +264E ; [*0651.0020.0002.264E] # LIBRA +264F ; [*0652.0020.0002.264F] # SCORPIUS +2650 ; [*0653.0020.0002.2650] # SAGITTARIUS +2651 ; [*0654.0020.0002.2651] # CAPRICORN +2652 ; [*0655.0020.0002.2652] # AQUARIUS +2653 ; [*0656.0020.0002.2653] # PISCES +2668 ; [*066B.0020.0002.2668] # HOT SPRINGS +2669 ; [*066C.0020.0002.2669] # QUARTER NOTE +266A ; [*066D.0020.0002.266A] # EIGHTH NOTE +266B ; [*066E.0020.0002.266B] # BEAMED EIGHTH NOTES +266C ; [*066F.0020.0002.266C] # BEAMED SIXTEENTH NOTES +2701 ; [*0672.0020.0002.2701] # UPPER BLADE SCISSORS +2702 ; [*0673.0020.0002.2702] # BLACK SCISSORS +2703 ; [*0674.0020.0002.2703] # LOWER BLADE SCISSORS +2704 ; [*0675.0020.0002.2704] # WHITE SCISSORS +2706 ; [*0676.0020.0002.2706] # TELEPHONE LOCATION SIGN +2707 ; [*0677.0020.0002.2707] # TAPE DRIVE +2708 ; [*0678.0020.0002.2708] # AIRPLANE +2709 ; [*0679.0020.0002.2709] # ENVELOPE +270C ; [*067A.0020.0002.270C] # VICTORY HAND +270D ; [*067B.0020.0002.270D] # WRITING HAND +270E ; [*067C.0020.0002.270E] # LOWER RIGHT PENCIL +270F ; [*067D.0020.0002.270F] # PENCIL +2710 ; [*067E.0020.0002.2710] # UPPER RIGHT PENCIL +2711 ; [*067F.0020.0002.2711] # WHITE NIB +2712 ; [*0680.0020.0002.2712] # BLACK NIB +2713 ; [*0681.0020.0002.2713] # CHECK MARK +2714 ; [*0682.0020.0002.2714] # HEAVY CHECK MARK +2715 ; [*0683.0020.0002.2715] # MULTIPLICATION X +2716 ; [*0684.0020.0002.2716] # HEAVY MULTIPLICATION X +2717 ; [*0685.0020.0002.2717] # BALLOT X +2718 ; [*0686.0020.0002.2718] # HEAVY BALLOT X +271B ; [*0689.0020.0002.271B] # OPEN CENTRE CROSS +271C ; [*068A.0020.0002.271C] # HEAVY OPEN CENTRE CROSS +271D ; [*068B.0020.0002.271D] # LATIN CROSS +271E ; [*068C.0020.0002.271E] # SHADOWED WHITE LATIN CROSS +271F ; [*068D.0020.0002.271F] # OUTLINED LATIN CROSS +2720 ; [*068E.0020.0002.2720] # MALTESE CROSS +2722 ; [*0690.0020.0002.2722] # FOUR TEARDROP-SPOKED ASTERISK +2723 ; [*0691.0020.0002.2723] # FOUR BALLOON-SPOKED ASTERISK +2724 ; [*0692.0020.0002.2724] # HEAVY FOUR BALLOON-SPOKED ASTERISK +2725 ; [*0693.0020.0002.2725] # FOUR CLUB-SPOKED ASTERISK +2731 ; [*069E.0020.0002.2731] # HEAVY ASTERISK +2732 ; [*069F.0020.0002.2732] # OPEN CENTRE ASTERISK +2733 ; [*06A0.0020.0002.2733] # EIGHT SPOKED ASTERISK +273A ; [*06A7.0020.0002.273A] # SIXTEEN POINTED ASTERISK +273B ; [*06A8.0020.0002.273B] # TEARDROP-SPOKED ASTERISK +273C ; [*06A9.0020.0002.273C] # OPEN CENTRE TEARDROP-SPOKED ASTERISK +273D ; [*06AA.0020.0002.273D] # HEAVY TEARDROP-SPOKED ASTERISK +273E ; [*06AB.0020.0002.273E] # SIX PETALLED BLACK AND WHITE FLORETTE +273F ; [*06AC.0020.0002.273F] # BLACK FLORETTE +2740 ; [*06AD.0020.0002.2740] # WHITE FLORETTE +2741 ; [*06AE.0020.0002.2741] # EIGHT PETALLED OUTLINED BLACK FLORETTE +2743 ; [*06B0.0020.0002.2743] # HEAVY TEARDROP-SPOKED PINWHEEL ASTERISK +2744 ; [*06B1.0020.0002.2744] # SNOWFLAKE +2745 ; [*06B2.0020.0002.2745] # TIGHT TRIFOLIATE SNOWFLAKE +2746 ; [*06B3.0020.0002.2746] # HEAVY CHEVRON SNOWFLAKE +2747 ; [*06B4.0020.0002.2747] # SPARKLE +2748 ; [*06B5.0020.0002.2748] # HEAVY SPARKLE +2749 ; [*06B6.0020.0002.2749] # BALLOON-SPOKED ASTERISK +274A ; [*06B7.0020.0002.274A] # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK +274B ; [*06B8.0020.0002.274B] # HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK +2756 ; [*06BE.0020.0002.2756] # BLACK DIAMOND MINUS WHITE X +2758 ; [*06BF.0020.0002.2758] # LIGHT VERTICAL BAR +2759 ; [*06C0.0020.0002.2759] # MEDIUM VERTICAL BAR +275A ; [*06C1.0020.0002.275A] # HEAVY VERTICAL BAR +275B ; [*06C2.0020.0002.275B] # HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT +275C ; [*06C3.0020.0002.275C] # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT +275D ; [*06C4.0020.0002.275D] # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT +275E ; [*06C5.0020.0002.275E] # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT +2761 ; [*06C6.0020.0002.2761] # CURVED STEM PARAGRAPH SIGN ORNAMENT +2762 ; [*06C7.0020.0002.2762] # HEAVY EXCLAMATION MARK ORNAMENT +2763 ; [*06C8.0020.0002.2763] # HEAVY HEART EXCLAMATION MARK ORNAMENT +2764 ; [*06C9.0020.0002.2764] # HEAVY BLACK HEART +2765 ; [*06CA.0020.0002.2765] # ROTATED HEAVY BLACK HEART BULLET +2766 ; [*06CB.0020.0002.2766] # FLORAL HEART +2767 ; [*06CC.0020.0002.2767] # ROTATED FLORAL HEART BULLET +27A2 ; [*06DB.0020.0002.27A2] # THREE-D TOP-LIGHTED RIGHTWARDS ARROWHEAD +27A3 ; [*06DC.0020.0002.27A3] # THREE-D BOTTOM-LIGHTED RIGHTWARDS ARROWHEAD +27A4 ; [*06DD.0020.0002.27A4] # BLACK RIGHTWARDS ARROWHEAD +2FF0 ; [*07F7.0020.0002.2FF0] # IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT +2FF1 ; [*07F8.0020.0002.2FF1] # IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW +2FF2 ; [*07F9.0020.0002.2FF2] # IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT +2FF3 ; [*07FA.0020.0002.2FF3] # IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW +2FF4 ; [*07FB.0020.0002.2FF4] # IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND +2FF5 ; [*07FC.0020.0002.2FF5] # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE +2FF6 ; [*07FD.0020.0002.2FF6] # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM BELOW +2FF7 ; [*07FE.0020.0002.2FF7] # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LEFT +2FF8 ; [*07FF.0020.0002.2FF8] # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER LEFT +2FF9 ; [*0800.0020.0002.2FF9] # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT +2FFA ; [*0801.0020.0002.2FFA] # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT +2FFB ; [*0802.0020.0002.2FFB] # IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +3001 ; [*0220.0020.0002.3001] # IDEOGRAPHIC COMMA +3002 ; [*024A.0020.0002.3002] # IDEOGRAPHIC FULL STOP +3003 ; [*02A4.0020.0002.3003] # DITTO MARK +3010 ; [*027C.0020.0002.3010] # LEFT BLACK LENTICULAR BRACKET +3011 ; [*027D.0020.0002.3011] # RIGHT BLACK LENTICULAR BRACKET +3012 ; [*0804.0020.0002.3012] # POSTAL MARK +3013 ; [*0805.0020.0002.3013] # GETA MARK +3014 ; [*027E.0020.0002.3014] # LEFT TORTOISE SHELL BRACKET +3015 ; [*027F.0020.0002.3015] # RIGHT TORTOISE SHELL BRACKET +3016 ; [*0280.0020.0002.3016] # LEFT WHITE LENTICULAR BRACKET +3017 ; [*0281.0020.0002.3017] # RIGHT WHITE LENTICULAR BRACKET +3018 ; [*0282.0020.0002.3018] # LEFT WHITE TORTOISE SHELL BRACKET +3019 ; [*0283.0020.0002.3019] # RIGHT WHITE TORTOISE SHELL BRACKET +301C ; [*0216.0020.0002.301C] # WAVE DASH +301D ; [*0261.0020.0002.301D] # REVERSED DOUBLE PRIME QUOTATION MARK +301E ; [*0262.0020.0002.301E] # DOUBLE PRIME QUOTATION MARK +301F ; [*0263.0020.0002.301F] # LOW DOUBLE PRIME QUOTATION MARK +3020 ; [*0806.0020.0002.3020] # POSTAL MARK FACE +3030 ; [*0217.0020.0002.3030] # WAVY DASH +303E ; [*0808.0020.0002.303E] # IDEOGRAPHIC VARIATION INDICATOR +303F ; [*0809.0020.0002.303F] # IDEOGRAPHIC HALF FILL SPACE +30FB ; [*0218.0020.0002.30FB] # KATAKANA MIDDLE DOT +3190 ; [*080A.0020.0002.3190] # IDEOGRAPHIC ANNOTATION LINKING MARK +3191 ; [*080B.0020.0002.3191] # IDEOGRAPHIC ANNOTATION REVERSE MARK +FD3E ; [*0286.0020.0002.FD3E] # ORNATE LEFT PARENTHESIS +FD3F ; [*0287.0020.0002.FD3F] # ORNATE RIGHT PARENTHESIS +FEFF ; [.0000.0000.0000.FEFF] # ZERO WIDTH NO-BREAK SPACE +FFFC ; [*080D.0020.0002.FFFC] # OBJECT REPLACEMENT CHARACTER +FFFD ; [*080E.0020.0002.FFFD] # REPLACEMENT CHARACTER +0332 ; [.0000.0021.0002.0332] # COMBINING LOW LINE +0313 ; [.0000.0022.0002.0313] # COMBINING COMMA ABOVE +0314 ; [.0000.002A.0002.0314] # COMBINING REVERSED COMMA ABOVE +0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT +0341 ; [.0000.0032.0002.0341] # COMBINING ACUTE TONE MARK; CANON +0300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT +0340 ; [.0000.0035.0002.0340] # COMBINING GRAVE TONE MARK; CANON +0306 ; [.0000.0037.0002.0306] # COMBINING BREVE +0302 ; [.0000.003C.0002.0302] # COMBINING CIRCUMFLEX ACCENT +030C ; [.0000.0041.0002.030C] # COMBINING CARON +030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE +0308 ; [.0000.0047.0002.0308] # COMBINING DIAERESIS +030B ; [.0000.004D.0002.030B] # COMBINING DOUBLE ACUTE ACCENT +0303 ; [.0000.004E.0002.0303] # COMBINING TILDE +0307 ; [.0000.0052.0002.0307] # COMBINING DOT ABOVE +0338 ; [.0000.0054.0002.0338] # COMBINING LONG SOLIDUS OVERLAY +0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA +0328 ; [.0000.0058.0002.0328] # COMBINING OGONEK +0304 ; [.0000.005A.0002.0304] # COMBINING MACRON +0305 ; [.0000.005E.0002.0305] # COMBINING OVERLINE +0309 ; [.0000.005F.0002.0309] # COMBINING HOOK ABOVE +030D ; [.0000.0060.0002.030D] # COMBINING VERTICAL LINE ABOVE +030E ; [.0000.0061.0002.030E] # COMBINING DOUBLE VERTICAL LINE ABOVE +030F ; [.0000.0062.0002.030F] # COMBINING DOUBLE GRAVE ACCENT +0310 ; [.0000.0063.0002.0310] # COMBINING CANDRABINDU +0311 ; [.0000.0064.0002.0311] # COMBINING INVERTED BREVE +0312 ; [.0000.0065.0002.0312] # COMBINING TURNED COMMA ABOVE +0315 ; [.0000.0066.0002.0315] # COMBINING COMMA ABOVE RIGHT +0316 ; [.0000.0067.0002.0316] # COMBINING GRAVE ACCENT BELOW +0317 ; [.0000.0068.0002.0317] # COMBINING ACUTE ACCENT BELOW +0318 ; [.0000.0069.0002.0318] # COMBINING LEFT TACK BELOW +0319 ; [.0000.006A.0002.0319] # COMBINING RIGHT TACK BELOW +031B ; [.0000.006C.0002.031B] # COMBINING HORN +031C ; [.0000.0072.0002.031C] # COMBINING LEFT HALF RING BELOW +031D ; [.0000.0073.0002.031D] # COMBINING UP TACK BELOW +031E ; [.0000.0074.0002.031E] # COMBINING DOWN TACK BELOW +031F ; [.0000.0075.0002.031F] # COMBINING PLUS SIGN BELOW +0320 ; [.0000.0076.0002.0320] # COMBINING MINUS SIGN BELOW +0321 ; [.0000.0077.0002.0321] # COMBINING PALATALIZED HOOK BELOW +0322 ; [.0000.0078.0002.0322] # COMBINING RETROFLEX HOOK BELOW +0323 ; [.0000.0079.0002.0323] # COMBINING DOT BELOW +0324 ; [.0000.007E.0002.0324] # COMBINING DIAERESIS BELOW +0325 ; [.0000.007F.0002.0325] # COMBINING RING BELOW +0326 ; [.0000.0080.0002.0326] # COMBINING COMMA BELOW +0329 ; [.0000.0081.0002.0329] # COMBINING VERTICAL LINE BELOW +032A ; [.0000.0082.0002.032A] # COMBINING BRIDGE BELOW +032B ; [.0000.0083.0002.032B] # COMBINING INVERTED DOUBLE ARCH BELOW +032C ; [.0000.0084.0002.032C] # COMBINING CARON BELOW +032D ; [.0000.0085.0002.032D] # COMBINING CIRCUMFLEX ACCENT BELOW +032E ; [.0000.0086.0002.032E] # COMBINING BREVE BELOW +032F ; [.0000.0087.0002.032F] # COMBINING INVERTED BREVE BELOW +0330 ; [.0000.0088.0002.0330] # COMBINING TILDE BELOW +0331 ; [.0000.0089.0002.0331] # COMBINING MACRON BELOW +0333 ; [.0000.008A.0002.0333] # COMBINING DOUBLE LOW LINE +0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY +0335 ; [.0000.008C.0002.0335] # COMBINING SHORT STROKE OVERLAY +0336 ; [.0000.008D.0002.0336] # COMBINING LONG STROKE OVERLAY +0337 ; [.0000.008E.0002.0337] # COMBINING SHORT SOLIDUS OVERLAY +0339 ; [.0000.008F.0002.0339] # COMBINING RIGHT HALF RING BELOW +033A ; [.0000.0090.0002.033A] # COMBINING INVERTED BRIDGE BELOW +033C ; [.0000.0092.0002.033C] # COMBINING SEAGULL BELOW +033D ; [.0000.0093.0002.033D] # COMBINING X ABOVE +033E ; [.0000.0094.0002.033E] # COMBINING VERTICAL TILDE +033F ; [.0000.0095.0002.033F] # COMBINING DOUBLE OVERLINE +0346 ; [.0000.0097.0002.0346] # COMBINING BRIDGE ABOVE +0347 ; [.0000.0098.0002.0347] # COMBINING EQUALS SIGN BELOW +0348 ; [.0000.0099.0002.0348] # COMBINING DOUBLE VERTICAL LINE BELOW +034A ; [.0000.009B.0002.034A] # COMBINING NOT TILDE ABOVE +034B ; [.0000.009C.0002.034B] # COMBINING HOMOTHETIC ABOVE +034C ; [.0000.009D.0002.034C] # COMBINING ALMOST EQUAL TO ABOVE +0360 ; [.0000.00A0.0002.0360] # COMBINING DOUBLE TILDE +0361 ; [.0000.00A1.0002.0361] # COMBINING DOUBLE INVERTED BREVE +FE20 ; [.0000.00A3.0002.FE20] # COMBINING LIGATURE LEFT HALF +FE21 ; [.0000.00A4.0002.FE21] # COMBINING LIGATURE RIGHT HALF +FE22 ; [.0000.00A5.0002.FE22] # COMBINING DOUBLE TILDE LEFT HALF +FE23 ; [.0000.00A6.0002.FE23] # COMBINING DOUBLE TILDE RIGHT HALF +302A ; [.0000.0138.0002.302A] # IDEOGRAPHIC LEVEL TONE MARK +302B ; [.0000.0139.0002.302B] # IDEOGRAPHIC RISING TONE MARK +302C ; [.0000.013A.0002.302C] # IDEOGRAPHIC DEPARTING TONE MARK +302D ; [.0000.013B.0002.302D] # IDEOGRAPHIC ENTERING TONE MARK +302E ; [.0000.013C.0002.302E] # HANGUL SINGLE DOT TONE MARK +302F ; [.0000.013D.0002.302F] # HANGUL DOUBLE DOT TONE MARK +3099 ; [.0000.013E.0002.3099] # COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK +309A ; [.0000.013F.0002.309A] # COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +20D2 ; [.0000.0142.0002.20D2] # COMBINING LONG VERTICAL LINE OVERLAY +20D3 ; [.0000.0143.0002.20D3] # COMBINING SHORT VERTICAL LINE OVERLAY +20D8 ; [.0000.0148.0002.20D8] # COMBINING RING OVERLAY +20D9 ; [.0000.0149.0002.20D9] # COMBINING CLOCKWISE RING OVERLAY +20DA ; [.0000.014A.0002.20DA] # COMBINING ANTICLOCKWISE RING OVERLAY +20DB ; [.0000.014B.0002.20DB] # COMBINING THREE DOTS ABOVE +20DC ; [.0000.014C.0002.20DC] # COMBINING FOUR DOTS ABOVE +20DF ; [.0000.014F.0002.20DF] # COMBINING ENCLOSING DIAMOND +20E2 ; [.0000.0152.0002.20E2] # COMBINING ENCLOSING SCREEN +20E3 ; [.0000.0153.0002.20E3] # COMBINING ENCLOSING KEYCAP +02D0 ; [.081F.0020.0002.02D0] # MODIFIER LETTER TRIANGULAR COLON +02D1 ; [.0820.0020.0002.02D1] # MODIFIER LETTER HALF TRIANGULAR COLON +3005 ; [.0823.0020.0002.3005] # IDEOGRAPHIC ITERATION MARK +3031 ; [.0824.0020.0002.3031] # VERTICAL KANA REPEAT MARK +3032 ; [.0824.013E.0002.3032] # VERTICAL KANA REPEAT WITH VOICED SOUND MARK; CANONSEQ +3033 ; [.0825.0020.0002.3033] # VERTICAL KANA REPEAT MARK UPPER HALF +3034 ; [.0825.013E.0002.3034] # VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF; CANONSEQ +3035 ; [.0826.0020.0002.3035] # VERTICAL KANA REPEAT MARK LOWER HALF +309D ; [.0827.0020.0002.309D] # HIRAGANA ITERATION MARK +309E ; [.0827.013E.0002.309E] # HIRAGANA VOICED ITERATION MARK; CANONSEQ +30FC ; [.0828.0020.0002.30FC] # KATAKANA-HIRAGANA PROLONGED SOUND MARK +30FD ; [.0829.0020.0002.30FD] # KATAKANA ITERATION MARK +30FE ; [.0829.013E.0002.30FE] # KATAKANA VOICED ITERATION MARK; CANONSEQ +00A4 ; [.082A.0020.0002.00A4] # CURRENCY SIGN +00A2 ; [.082B.0020.0002.00A2] # CENT SIGN +0024 ; [.082C.0020.0002.0024] # DOLLAR SIGN +00A3 ; [.082D.0020.0002.00A3] # POUND SIGN +00A5 ; [.082E.0020.0002.00A5] # YEN SIGN +20A0 ; [.0833.0020.0002.20A0] # EURO-CURRENCY SIGN +20A1 ; [.0834.0020.0002.20A1] # COLON SIGN +20A2 ; [.0835.0020.0002.20A2] # CRUZEIRO SIGN +20A3 ; [.0836.0020.0002.20A3] # FRENCH FRANC SIGN +20A4 ; [.0837.0020.0002.20A4] # LIRA SIGN +20A5 ; [.0838.0020.0002.20A5] # MILL SIGN +20A6 ; [.0839.0020.0002.20A6] # NAIRA SIGN +20A7 ; [.083A.0020.0002.20A7] # PESETA SIGN +20A9 ; [.083B.0020.0002.20A9] # WON SIGN +20AA ; [.083C.0020.0002.20AA] # NEW SHEQEL SIGN +20AB ; [.083D.0020.0002.20AB] # DONG SIGN +20AC ; [.083E.0020.0002.20AC] # EURO SIGN +20AD ; [.083F.0020.0002.20AD] # KIP SIGN +20AE ; [.0840.0020.0002.20AE] # TUGRIK SIGN +20AF ; [.0841.0020.0002.20AF] # DRACHMA SIGN +2108 ; [.0843.0020.0002.2108] # SCRUPLE +2117 ; [.0845.0020.0002.2117] # SOUND RECORDING COPYRIGHT +2118 ; [.0846.0020.0002.2118] # SCRIPT CAPITAL P +211E ; [.0847.0020.0002.211E] # PRESCRIPTION TAKE +211F ; [.0848.0020.0002.211F] # RESPONSE +2123 ; [.0849.0020.0002.2123] # VERSICLE +2125 ; [.084A.0020.0002.2125] # OUNCE SIGN +2127 ; [.084B.0020.0002.2127] # INVERTED OHM SIGN +2132 ; [.084E.0020.0002.2132] # TURNED CAPITAL F +213A ; [.084F.0020.0002.213A] # ROTATED CAPITAL Q +2180 ; [.0850.0020.0002.2180] # ROMAN NUMERAL ONE THOUSAND C D +2181 ; [.0851.0020.0002.2181] # ROMAN NUMERAL FIVE THOUSAND +2182 ; [.0852.0020.0002.2182] # ROMAN NUMERAL TEN THOUSAND +2183 ; [.0853.0020.0002.2183] # ROMAN NUMERAL REVERSED ONE HUNDRED +266D ; [.0854.0020.0002.266D] # MUSIC FLAT SIGN +266E ; [.0855.0020.0002.266E] # MUSIC NATURAL SIGN +266F ; [.0856.0020.0002.266F] # MUSIC SHARP SIGN +0030 ; [.0857.0020.0002.0030] # DIGIT ZERO +3007 ; [.0857.016E.0002.3007] # IDEOGRAPHIC NUMBER ZERO +0031 ; [.0858.0020.0002.0031] # DIGIT ONE +0032 ; [.0859.0020.0002.0032] # DIGIT TWO +0033 ; [.085A.0020.0002.0033] # DIGIT THREE +0034 ; [.085B.0020.0002.0034] # DIGIT FOUR +0035 ; [.085C.0020.0002.0035] # DIGIT FIVE +0036 ; [.085D.0020.0002.0036] # DIGIT SIX +0037 ; [.085E.0020.0002.0037] # DIGIT SEVEN +0038 ; [.085F.0020.0002.0038] # DIGIT EIGHT +0039 ; [.0860.0020.0002.0039] # DIGIT NINE +0061 ; [.0861.0020.0002.0061] # LATIN SMALL LETTER A +0041 ; [.0861.0020.0008.0041] # LATIN CAPITAL LETTER A +00E1 ; [.0861.0032.0002.00E1] # LATIN SMALL LETTER A WITH ACUTE; CANONSEQ +00C1 ; [.0861.0032.0008.00C1] # LATIN CAPITAL LETTER A WITH ACUTE; CANONSEQ +00E0 ; [.0861.0035.0002.00E0] # LATIN SMALL LETTER A WITH GRAVE; CANONSEQ +00C0 ; [.0861.0035.0008.00C0] # LATIN CAPITAL LETTER A WITH GRAVE; CANONSEQ +0103 ; [.0861.0037.0002.0103] # LATIN SMALL LETTER A WITH BREVE; CANONSEQ +0102 ; [.0861.0037.0008.0102] # LATIN CAPITAL LETTER A WITH BREVE; CANONSEQ +1EAF ; [.0861.0038.0002.1EAF] # LATIN SMALL LETTER A WITH BREVE AND ACUTE; CANONSEQ +1EAE ; [.0861.0038.0008.1EAE] # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE; CANONSEQ +1EB1 ; [.0861.0039.0002.1EB1] # LATIN SMALL LETTER A WITH BREVE AND GRAVE; CANONSEQ +1EB0 ; [.0861.0039.0008.1EB0] # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE; CANONSEQ +1EB5 ; [.0861.003A.0002.1EB5] # LATIN SMALL LETTER A WITH BREVE AND TILDE; CANONSEQ +1EB4 ; [.0861.003A.0008.1EB4] # LATIN CAPITAL LETTER A WITH BREVE AND TILDE; CANONSEQ +1EB3 ; [.0861.003B.0002.1EB3] # LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE; CANONSEQ +1EB2 ; [.0861.003B.0008.1EB2] # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE; CANONSEQ +00E2 ; [.0861.003C.0002.00E2] # LATIN SMALL LETTER A WITH CIRCUMFLEX; CANONSEQ +00C2 ; [.0861.003C.0008.00C2] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX; CANONSEQ +1EA5 ; [.0861.003D.0002.1EA5] # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE; CANONSEQ +1EA4 ; [.0861.003D.0008.1EA4] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE; CANONSEQ +1EA7 ; [.0861.003E.0002.1EA7] # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE; CANONSEQ +1EA6 ; [.0861.003E.0008.1EA6] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE; CANONSEQ +1EAB ; [.0861.003F.0002.1EAB] # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE; CANONSEQ +1EAA ; [.0861.003F.0008.1EAA] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE; CANONSEQ +1EA9 ; [.0861.0040.0002.1EA9] # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE; CANONSEQ +1EA8 ; [.0861.0040.0008.1EA8] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE; CANONSEQ +01CE ; [.0861.0041.0002.01CE] # LATIN SMALL LETTER A WITH CARON; CANONSEQ +01CD ; [.0861.0041.0008.01CD] # LATIN CAPITAL LETTER A WITH CARON; CANONSEQ +00E5 ; [.0861.0043.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; CANONSEQ +00C5 ; [.0861.0043.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; CANONSEQ +212B ; [.0861.0043.0008.212B] # ANGSTROM SIGN; CANONSEQ +01FB ; [.0861.0044.0002.01FB] # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE; CANONSEQ +01FA ; [.0861.0044.0008.01FA] # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE; CANONSEQ +00E4 ; [.0861.0047.0002.00E4] # LATIN SMALL LETTER A WITH DIAERESIS; CANONSEQ +00C4 ; [.0861.0047.0008.00C4] # LATIN CAPITAL LETTER A WITH DIAERESIS; CANONSEQ +01DF ; [.0861.004B.0002.01DF] # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON; CANONSEQ +01DE ; [.0861.004B.0008.01DE] # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON; CANONSEQ +00E3 ; [.0861.004E.0002.00E3] # LATIN SMALL LETTER A WITH TILDE; CANONSEQ +00C3 ; [.0861.004E.0008.00C3] # LATIN CAPITAL LETTER A WITH TILDE; CANONSEQ +0227 ; [.0861.0052.0002.0227] # LATIN SMALL LETTER A WITH DOT ABOVE; CANONSEQ +0226 ; [.0861.0052.0008.0226] # LATIN CAPITAL LETTER A WITH DOT ABOVE; CANONSEQ +01E1 ; [.0861.0053.0002.01E1] # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON; CANONSEQ +01E0 ; [.0861.0053.0008.01E0] # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON; CANONSEQ +0105 ; [.0861.0058.0002.0105] # LATIN SMALL LETTER A WITH OGONEK; CANONSEQ +0104 ; [.0861.0058.0008.0104] # LATIN CAPITAL LETTER A WITH OGONEK; CANONSEQ +0101 ; [.0861.005A.0002.0101] # LATIN SMALL LETTER A WITH MACRON; CANONSEQ +0100 ; [.0861.005A.0008.0100] # LATIN CAPITAL LETTER A WITH MACRON; CANONSEQ +1EA3 ; [.0861.005F.0002.1EA3] # LATIN SMALL LETTER A WITH HOOK ABOVE; CANONSEQ +1EA2 ; [.0861.005F.0008.1EA2] # LATIN CAPITAL LETTER A WITH HOOK ABOVE; CANONSEQ +0201 ; [.0861.0062.0002.0201] # LATIN SMALL LETTER A WITH DOUBLE GRAVE; CANONSEQ +0200 ; [.0861.0062.0008.0200] # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE; CANONSEQ +0203 ; [.0861.0064.0002.0203] # LATIN SMALL LETTER A WITH INVERTED BREVE; CANONSEQ +0202 ; [.0861.0064.0008.0202] # LATIN CAPITAL LETTER A WITH INVERTED BREVE; CANONSEQ +1EA1 ; [.0861.0079.0002.1EA1] # LATIN SMALL LETTER A WITH DOT BELOW; CANONSEQ +1EA0 ; [.0861.0079.0008.1EA0] # LATIN CAPITAL LETTER A WITH DOT BELOW; CANONSEQ +1EB7 ; [.0861.007A.0002.1EB7] # LATIN SMALL LETTER A WITH BREVE AND DOT BELOW; CANONSEQ +1EB6 ; [.0861.007A.0008.1EB6] # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW; CANONSEQ +1EAD ; [.0861.007B.0002.1EAD] # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW; CANONSEQ +1EAC ; [.0861.007B.0008.1EAC] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW; CANONSEQ +1E01 ; [.0861.007F.0002.1E01] # LATIN SMALL LETTER A WITH RING BELOW; CANONSEQ +1E00 ; [.0861.007F.0008.1E00] # LATIN CAPITAL LETTER A WITH RING BELOW; CANONSEQ +00E6 ; [.0865.0020.0002.00E6] # LATIN SMALL LETTER AE +00C6 ; [.0865.0020.0008.00C6] # LATIN CAPITAL LETTER AE +01FD ; [.0865.0032.0002.01FD] # LATIN SMALL LETTER AE WITH ACUTE; CANONSEQ +01FC ; [.0865.0032.0008.01FC] # LATIN CAPITAL LETTER AE WITH ACUTE; CANONSEQ +01E3 ; [.0865.005A.0002.01E3] # LATIN SMALL LETTER AE WITH MACRON; CANONSEQ +01E2 ; [.0865.005A.0008.01E2] # LATIN CAPITAL LETTER AE WITH MACRON; CANONSEQ +0250 ; [.0869.0020.0002.0250] # LATIN SMALL LETTER TURNED A +0251 ; [.086D.0020.0002.0251] # LATIN SMALL LETTER ALPHA +0252 ; [.0871.0020.0002.0252] # LATIN SMALL LETTER TURNED ALPHA +0062 ; [.0875.0020.0002.0062] # LATIN SMALL LETTER B +0042 ; [.0875.0020.0008.0042] # LATIN CAPITAL LETTER B +1E03 ; [.0875.0052.0002.1E03] # LATIN SMALL LETTER B WITH DOT ABOVE; CANONSEQ +1E02 ; [.0875.0052.0008.1E02] # LATIN CAPITAL LETTER B WITH DOT ABOVE; CANONSEQ +1E05 ; [.0875.0079.0002.1E05] # LATIN SMALL LETTER B WITH DOT BELOW; CANONSEQ +1E04 ; [.0875.0079.0008.1E04] # LATIN CAPITAL LETTER B WITH DOT BELOW; CANONSEQ +1E07 ; [.0875.0089.0002.1E07] # LATIN SMALL LETTER B WITH LINE BELOW; CANONSEQ +1E06 ; [.0875.0089.0008.1E06] # LATIN CAPITAL LETTER B WITH LINE BELOW; CANONSEQ +0299 ; [.0879.0020.0002.0299] # LATIN LETTER SMALL CAPITAL B +0180 ; [.087D.0020.0002.0180] # LATIN SMALL LETTER B WITH STROKE +0253 ; [.0881.0020.0002.0253] # LATIN SMALL LETTER B WITH HOOK +0181 ; [.0881.0020.0008.0181] # LATIN CAPITAL LETTER B WITH HOOK +0183 ; [.0885.0020.0002.0183] # LATIN SMALL LETTER B WITH TOPBAR +0182 ; [.0885.0020.0008.0182] # LATIN CAPITAL LETTER B WITH TOPBAR +0063 ; [.0889.0020.0002.0063] # LATIN SMALL LETTER C +0043 ; [.0889.0020.0008.0043] # LATIN CAPITAL LETTER C +0107 ; [.0889.0032.0002.0107] # LATIN SMALL LETTER C WITH ACUTE; CANONSEQ +0106 ; [.0889.0032.0008.0106] # LATIN CAPITAL LETTER C WITH ACUTE; CANONSEQ +0109 ; [.0889.003C.0002.0109] # LATIN SMALL LETTER C WITH CIRCUMFLEX; CANONSEQ +0108 ; [.0889.003C.0008.0108] # LATIN CAPITAL LETTER C WITH CIRCUMFLEX; CANONSEQ +010D ; [.0889.0041.0002.010D] # LATIN SMALL LETTER C WITH CARON; CANONSEQ +010C ; [.0889.0041.0008.010C] # LATIN CAPITAL LETTER C WITH CARON; CANONSEQ +010B ; [.0889.0052.0002.010B] # LATIN SMALL LETTER C WITH DOT ABOVE; CANONSEQ +010A ; [.0889.0052.0008.010A] # LATIN CAPITAL LETTER C WITH DOT ABOVE; CANONSEQ +00E7 ; [.0889.0055.0002.00E7] # LATIN SMALL LETTER C WITH CEDILLA; CANONSEQ +00C7 ; [.0889.0055.0008.00C7] # LATIN CAPITAL LETTER C WITH CEDILLA; CANONSEQ +1E09 ; [.0889.0056.0002.1E09] # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE; CANONSEQ +1E08 ; [.0889.0056.0008.1E08] # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE; CANONSEQ +0188 ; [.088D.0020.0002.0188] # LATIN SMALL LETTER C WITH HOOK +0187 ; [.088D.0020.0008.0187] # LATIN CAPITAL LETTER C WITH HOOK +0255 ; [.0891.0020.0002.0255] # LATIN SMALL LETTER C WITH CURL +0064 ; [.0895.0020.0002.0064] # LATIN SMALL LETTER D +0044 ; [.0895.0020.0008.0044] # LATIN CAPITAL LETTER D +010F ; [.0895.0041.0002.010F] # LATIN SMALL LETTER D WITH CARON; CANONSEQ +010E ; [.0895.0041.0008.010E] # LATIN CAPITAL LETTER D WITH CARON; CANONSEQ +1E0B ; [.0895.0052.0002.1E0B] # LATIN SMALL LETTER D WITH DOT ABOVE; CANONSEQ +1E0A ; [.0895.0052.0008.1E0A] # LATIN CAPITAL LETTER D WITH DOT ABOVE; CANONSEQ +1E11 ; [.0895.0055.0002.1E11] # LATIN SMALL LETTER D WITH CEDILLA; CANONSEQ +1E10 ; [.0895.0055.0008.1E10] # LATIN CAPITAL LETTER D WITH CEDILLA; CANONSEQ +1E0D ; [.0895.0079.0002.1E0D] # LATIN SMALL LETTER D WITH DOT BELOW; CANONSEQ +1E0C ; [.0895.0079.0008.1E0C] # LATIN CAPITAL LETTER D WITH DOT BELOW; CANONSEQ +1E13 ; [.0895.0085.0002.1E13] # LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW; CANONSEQ +1E12 ; [.0895.0085.0008.1E12] # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW; CANONSEQ +1E0F ; [.0895.0089.0002.1E0F] # LATIN SMALL LETTER D WITH LINE BELOW; CANONSEQ +1E0E ; [.0895.0089.0008.1E0E] # LATIN CAPITAL LETTER D WITH LINE BELOW; CANONSEQ +0111 ; [.0899.0020.0002.0111] # LATIN SMALL LETTER D WITH STROKE +0110 ; [.0899.0020.0008.0110] # LATIN CAPITAL LETTER D WITH STROKE +0256 ; [.089D.0020.0002.0256] # LATIN SMALL LETTER D WITH TAIL +0189 ; [.089D.0020.0008.0189] # LATIN CAPITAL LETTER AFRICAN D +0257 ; [.08A1.0020.0002.0257] # LATIN SMALL LETTER D WITH HOOK +018A ; [.08A1.0020.0008.018A] # LATIN CAPITAL LETTER D WITH HOOK +018C ; [.08A5.0020.0002.018C] # LATIN SMALL LETTER D WITH TOPBAR +018B ; [.08A5.0020.0008.018B] # LATIN CAPITAL LETTER D WITH TOPBAR +00F0 ; [.08A9.0020.0002.00F0] # LATIN SMALL LETTER ETH +00D0 ; [.08A9.0020.0008.00D0] # LATIN CAPITAL LETTER ETH +018D ; [.08AD.0020.0002.018D] # LATIN SMALL LETTER TURNED DELTA +0065 ; [.08B1.0020.0002.0065] # LATIN SMALL LETTER E +0045 ; [.08B1.0020.0008.0045] # LATIN CAPITAL LETTER E +00E9 ; [.08B1.0032.0002.00E9] # LATIN SMALL LETTER E WITH ACUTE; CANONSEQ +00C9 ; [.08B1.0032.0008.00C9] # LATIN CAPITAL LETTER E WITH ACUTE; CANONSEQ +00E8 ; [.08B1.0035.0002.00E8] # LATIN SMALL LETTER E WITH GRAVE; CANONSEQ +00C8 ; [.08B1.0035.0008.00C8] # LATIN CAPITAL LETTER E WITH GRAVE; CANONSEQ +0115 ; [.08B1.0037.0002.0115] # LATIN SMALL LETTER E WITH BREVE; CANONSEQ +0114 ; [.08B1.0037.0008.0114] # LATIN CAPITAL LETTER E WITH BREVE; CANONSEQ +00EA ; [.08B1.003C.0002.00EA] # LATIN SMALL LETTER E WITH CIRCUMFLEX; CANONSEQ +00CA ; [.08B1.003C.0008.00CA] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX; CANONSEQ +1EBF ; [.08B1.003D.0002.1EBF] # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE; CANONSEQ +1EBE ; [.08B1.003D.0008.1EBE] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE; CANONSEQ +1EC1 ; [.08B1.003E.0002.1EC1] # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE; CANONSEQ +1EC0 ; [.08B1.003E.0008.1EC0] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE; CANONSEQ +1EC5 ; [.08B1.003F.0002.1EC5] # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE; CANONSEQ +1EC4 ; [.08B1.003F.0008.1EC4] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE; CANONSEQ +1EC3 ; [.08B1.0040.0002.1EC3] # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE; CANONSEQ +1EC2 ; [.08B1.0040.0008.1EC2] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE; CANONSEQ +011B ; [.08B1.0041.0002.011B] # LATIN SMALL LETTER E WITH CARON; CANONSEQ +011A ; [.08B1.0041.0008.011A] # LATIN CAPITAL LETTER E WITH CARON; CANONSEQ +00EB ; [.08B1.0047.0002.00EB] # LATIN SMALL LETTER E WITH DIAERESIS; CANONSEQ +00CB ; [.08B1.0047.0008.00CB] # LATIN CAPITAL LETTER E WITH DIAERESIS; CANONSEQ +1EBD ; [.08B1.004E.0002.1EBD] # LATIN SMALL LETTER E WITH TILDE; CANONSEQ +1EBC ; [.08B1.004E.0008.1EBC] # LATIN CAPITAL LETTER E WITH TILDE; CANONSEQ +0117 ; [.08B1.0052.0002.0117] # LATIN SMALL LETTER E WITH DOT ABOVE; CANONSEQ +0116 ; [.08B1.0052.0008.0116] # LATIN CAPITAL LETTER E WITH DOT ABOVE; CANONSEQ +0229 ; [.08B1.0055.0002.0229] # LATIN SMALL LETTER E WITH CEDILLA; CANONSEQ +0228 ; [.08B1.0055.0008.0228] # LATIN CAPITAL LETTER E WITH CEDILLA; CANONSEQ +1E1D ; [.08B1.0057.0002.1E1D] # LATIN SMALL LETTER E WITH CEDILLA AND BREVE; CANONSEQ +1E1C ; [.08B1.0057.0008.1E1C] # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE; CANONSEQ +0119 ; [.08B1.0058.0002.0119] # LATIN SMALL LETTER E WITH OGONEK; CANONSEQ +0118 ; [.08B1.0058.0008.0118] # LATIN CAPITAL LETTER E WITH OGONEK; CANONSEQ +0113 ; [.08B1.005A.0002.0113] # LATIN SMALL LETTER E WITH MACRON; CANONSEQ +0112 ; [.08B1.005A.0008.0112] # LATIN CAPITAL LETTER E WITH MACRON; CANONSEQ +1E17 ; [.08B1.005B.0002.1E17] # LATIN SMALL LETTER E WITH MACRON AND ACUTE; CANONSEQ +1E16 ; [.08B1.005B.0008.1E16] # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE; CANONSEQ +1E15 ; [.08B1.005C.0002.1E15] # LATIN SMALL LETTER E WITH MACRON AND GRAVE; CANONSEQ +1E14 ; [.08B1.005C.0008.1E14] # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE; CANONSEQ +1EBB ; [.08B1.005F.0002.1EBB] # LATIN SMALL LETTER E WITH HOOK ABOVE; CANONSEQ +1EBA ; [.08B1.005F.0008.1EBA] # LATIN CAPITAL LETTER E WITH HOOK ABOVE; CANONSEQ +0205 ; [.08B1.0062.0002.0205] # LATIN SMALL LETTER E WITH DOUBLE GRAVE; CANONSEQ +0204 ; [.08B1.0062.0008.0204] # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE; CANONSEQ +0207 ; [.08B1.0064.0002.0207] # LATIN SMALL LETTER E WITH INVERTED BREVE; CANONSEQ +0206 ; [.08B1.0064.0008.0206] # LATIN CAPITAL LETTER E WITH INVERTED BREVE; CANONSEQ +1EB9 ; [.08B1.0079.0002.1EB9] # LATIN SMALL LETTER E WITH DOT BELOW; CANONSEQ +1EB8 ; [.08B1.0079.0008.1EB8] # LATIN CAPITAL LETTER E WITH DOT BELOW; CANONSEQ +1EC7 ; [.08B1.007B.0002.1EC7] # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW; CANONSEQ +1EC6 ; [.08B1.007B.0008.1EC6] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW; CANONSEQ +1E19 ; [.08B1.0085.0002.1E19] # LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW; CANONSEQ +1E18 ; [.08B1.0085.0008.1E18] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW; CANONSEQ +1E1B ; [.08B1.0088.0002.1E1B] # LATIN SMALL LETTER E WITH TILDE BELOW; CANONSEQ +1E1A ; [.08B1.0088.0008.1E1A] # LATIN CAPITAL LETTER E WITH TILDE BELOW; CANONSEQ +01DD ; [.08B5.0020.0002.01DD] # LATIN SMALL LETTER TURNED E +018E ; [.08B5.0020.0008.018E] # LATIN CAPITAL LETTER REVERSED E +0259 ; [.08B9.0020.0002.0259] # LATIN SMALL LETTER SCHWA +018F ; [.08B9.0020.0008.018F] # LATIN CAPITAL LETTER SCHWA +025B ; [.08BD.0020.0002.025B] # LATIN SMALL LETTER OPEN E +0190 ; [.08BD.0020.0008.0190] # LATIN CAPITAL LETTER OPEN E +0258 ; [.08C1.0020.0002.0258] # LATIN SMALL LETTER REVERSED E +025A ; [.08C5.0020.0002.025A] # LATIN SMALL LETTER SCHWA WITH HOOK +025C ; [.08C9.0020.0002.025C] # LATIN SMALL LETTER REVERSED OPEN E +025D ; [.08CD.0020.0002.025D] # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK +025E ; [.08D1.0020.0002.025E] # LATIN SMALL LETTER CLOSED REVERSED OPEN E +029A ; [.08D5.0020.0002.029A] # LATIN SMALL LETTER CLOSED OPEN E +0264 ; [.08D9.0020.0002.0264] # LATIN SMALL LETTER RAMS HORN +0066 ; [.08DD.0020.0002.0066] # LATIN SMALL LETTER F +0046 ; [.08DD.0020.0008.0046] # LATIN CAPITAL LETTER F +1E1F ; [.08DD.0052.0002.1E1F] # LATIN SMALL LETTER F WITH DOT ABOVE; CANONSEQ +1E1E ; [.08DD.0052.0008.1E1E] # LATIN CAPITAL LETTER F WITH DOT ABOVE; CANONSEQ +0192 ; [.08E1.0020.0002.0192] # LATIN SMALL LETTER F WITH HOOK +0191 ; [.08E1.0020.0008.0191] # LATIN CAPITAL LETTER F WITH HOOK +0067 ; [.08E5.0020.0002.0067] # LATIN SMALL LETTER G +0047 ; [.08E5.0020.0008.0047] # LATIN CAPITAL LETTER G +01F5 ; [.08E5.0032.0002.01F5] # LATIN SMALL LETTER G WITH ACUTE; CANONSEQ +01F4 ; [.08E5.0032.0008.01F4] # LATIN CAPITAL LETTER G WITH ACUTE; CANONSEQ +011F ; [.08E5.0037.0002.011F] # LATIN SMALL LETTER G WITH BREVE; CANONSEQ +011E ; [.08E5.0037.0008.011E] # LATIN CAPITAL LETTER G WITH BREVE; CANONSEQ +011D ; [.08E5.003C.0002.011D] # LATIN SMALL LETTER G WITH CIRCUMFLEX; CANONSEQ +011C ; [.08E5.003C.0008.011C] # LATIN CAPITAL LETTER G WITH CIRCUMFLEX; CANONSEQ +01E7 ; [.08E5.0041.0002.01E7] # LATIN SMALL LETTER G WITH CARON; CANONSEQ +01E6 ; [.08E5.0041.0008.01E6] # LATIN CAPITAL LETTER G WITH CARON; CANONSEQ +0121 ; [.08E5.0052.0002.0121] # LATIN SMALL LETTER G WITH DOT ABOVE; CANONSEQ +0120 ; [.08E5.0052.0008.0120] # LATIN CAPITAL LETTER G WITH DOT ABOVE; CANONSEQ +0123 ; [.08E5.0055.0002.0123] # LATIN SMALL LETTER G WITH CEDILLA; CANONSEQ +0122 ; [.08E5.0055.0008.0122] # LATIN CAPITAL LETTER G WITH CEDILLA; CANONSEQ +1E21 ; [.08E5.005A.0002.1E21] # LATIN SMALL LETTER G WITH MACRON; CANONSEQ +1E20 ; [.08E5.005A.0008.1E20] # LATIN CAPITAL LETTER G WITH MACRON; CANONSEQ +0262 ; [.08E9.0020.0002.0262] # LATIN LETTER SMALL CAPITAL G +01E5 ; [.08ED.0020.0002.01E5] # LATIN SMALL LETTER G WITH STROKE +01E4 ; [.08ED.0020.0008.01E4] # LATIN CAPITAL LETTER G WITH STROKE +0260 ; [.08F1.0020.0002.0260] # LATIN SMALL LETTER G WITH HOOK +0193 ; [.08F1.0020.0008.0193] # LATIN CAPITAL LETTER G WITH HOOK +029B ; [.08F5.0020.0002.029B] # LATIN LETTER SMALL CAPITAL G WITH HOOK +0263 ; [.08F9.0020.0002.0263] # LATIN SMALL LETTER GAMMA +0194 ; [.08F9.0020.0008.0194] # LATIN CAPITAL LETTER GAMMA +0261 ; [.08FD.0020.0002.0261] # LATIN SMALL LETTER SCRIPT G +01A3 ; [.0901.0020.0002.01A3] # LATIN SMALL LETTER OI +01A2 ; [.0901.0020.0008.01A2] # LATIN CAPITAL LETTER OI +0068 ; [.0905.0020.0002.0068] # LATIN SMALL LETTER H +0048 ; [.0905.0020.0008.0048] # LATIN CAPITAL LETTER H +0125 ; [.0905.003C.0002.0125] # LATIN SMALL LETTER H WITH CIRCUMFLEX; CANONSEQ +0124 ; [.0905.003C.0008.0124] # LATIN CAPITAL LETTER H WITH CIRCUMFLEX; CANONSEQ +021F ; [.0905.0041.0002.021F] # LATIN SMALL LETTER H WITH CARON; CANONSEQ +021E ; [.0905.0041.0008.021E] # LATIN CAPITAL LETTER H WITH CARON; CANONSEQ +1E27 ; [.0905.0047.0002.1E27] # LATIN SMALL LETTER H WITH DIAERESIS; CANONSEQ +1E26 ; [.0905.0047.0008.1E26] # LATIN CAPITAL LETTER H WITH DIAERESIS; CANONSEQ +1E23 ; [.0905.0052.0002.1E23] # LATIN SMALL LETTER H WITH DOT ABOVE; CANONSEQ +1E22 ; [.0905.0052.0008.1E22] # LATIN CAPITAL LETTER H WITH DOT ABOVE; CANONSEQ +1E29 ; [.0905.0055.0002.1E29] # LATIN SMALL LETTER H WITH CEDILLA; CANONSEQ +1E28 ; [.0905.0055.0008.1E28] # LATIN CAPITAL LETTER H WITH CEDILLA; CANONSEQ +1E25 ; [.0905.0079.0002.1E25] # LATIN SMALL LETTER H WITH DOT BELOW; CANONSEQ +1E24 ; [.0905.0079.0008.1E24] # LATIN CAPITAL LETTER H WITH DOT BELOW; CANONSEQ +1E2B ; [.0905.0086.0002.1E2B] # LATIN SMALL LETTER H WITH BREVE BELOW; CANONSEQ +1E2A ; [.0905.0086.0008.1E2A] # LATIN CAPITAL LETTER H WITH BREVE BELOW; CANONSEQ +1E96 ; [.0905.0089.0002.1E96] # LATIN SMALL LETTER H WITH LINE BELOW; CANONSEQ +029C ; [.0909.0020.0002.029C] # LATIN LETTER SMALL CAPITAL H +0195 ; [.090D.0020.0002.0195] # LATIN SMALL LETTER HV +01F6 ; [.090D.0020.0008.01F6] # LATIN CAPITAL LETTER HWAIR +0127 ; [.0911.0020.0002.0127] # LATIN SMALL LETTER H WITH STROKE +0126 ; [.0911.0020.0008.0126] # LATIN CAPITAL LETTER H WITH STROKE +0266 ; [.0915.0020.0002.0266] # LATIN SMALL LETTER H WITH HOOK +0267 ; [.0919.0020.0002.0267] # LATIN SMALL LETTER HENG WITH HOOK +02BB ; [.091D.0020.0002.02BB] # MODIFIER LETTER TURNED COMMA +02BD ; [.091E.0020.0002.02BD] # MODIFIER LETTER REVERSED COMMA +0069 ; [.091F.0020.0002.0069] # LATIN SMALL LETTER I +0049 ; [.091F.0020.0008.0049] # LATIN CAPITAL LETTER I +00ED ; [.091F.0032.0002.00ED] # LATIN SMALL LETTER I WITH ACUTE; CANONSEQ +00CD ; [.091F.0032.0008.00CD] # LATIN CAPITAL LETTER I WITH ACUTE; CANONSEQ +00EC ; [.091F.0035.0002.00EC] # LATIN SMALL LETTER I WITH GRAVE; CANONSEQ +00CC ; [.091F.0035.0008.00CC] # LATIN CAPITAL LETTER I WITH GRAVE; CANONSEQ +012D ; [.091F.0037.0002.012D] # LATIN SMALL LETTER I WITH BREVE; CANONSEQ +012C ; [.091F.0037.0008.012C] # LATIN CAPITAL LETTER I WITH BREVE; CANONSEQ +00EE ; [.091F.003C.0002.00EE] # LATIN SMALL LETTER I WITH CIRCUMFLEX; CANONSEQ +00CE ; [.091F.003C.0008.00CE] # LATIN CAPITAL LETTER I WITH CIRCUMFLEX; CANONSEQ +01D0 ; [.091F.0041.0002.01D0] # LATIN SMALL LETTER I WITH CARON; CANONSEQ +01CF ; [.091F.0041.0008.01CF] # LATIN CAPITAL LETTER I WITH CARON; CANONSEQ +00EF ; [.091F.0047.0002.00EF] # LATIN SMALL LETTER I WITH DIAERESIS; CANONSEQ +00CF ; [.091F.0047.0008.00CF] # LATIN CAPITAL LETTER I WITH DIAERESIS; CANONSEQ +1E2F ; [.091F.0048.0002.1E2F] # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE; CANONSEQ +1E2E ; [.091F.0048.0008.1E2E] # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE; CANONSEQ +0129 ; [.091F.004E.0002.0129] # LATIN SMALL LETTER I WITH TILDE; CANONSEQ +0128 ; [.091F.004E.0008.0128] # LATIN CAPITAL LETTER I WITH TILDE; CANONSEQ +0130 ; [.091F.0052.0008.0130] # LATIN CAPITAL LETTER I WITH DOT ABOVE; CANONSEQ +012F ; [.091F.0058.0002.012F] # LATIN SMALL LETTER I WITH OGONEK; CANONSEQ +012E ; [.091F.0058.0008.012E] # LATIN CAPITAL LETTER I WITH OGONEK; CANONSEQ +012B ; [.091F.005A.0002.012B] # LATIN SMALL LETTER I WITH MACRON; CANONSEQ +012A ; [.091F.005A.0008.012A] # LATIN CAPITAL LETTER I WITH MACRON; CANONSEQ +1EC9 ; [.091F.005F.0002.1EC9] # LATIN SMALL LETTER I WITH HOOK ABOVE; CANONSEQ +1EC8 ; [.091F.005F.0008.1EC8] # LATIN CAPITAL LETTER I WITH HOOK ABOVE; CANONSEQ +0209 ; [.091F.0062.0002.0209] # LATIN SMALL LETTER I WITH DOUBLE GRAVE; CANONSEQ +0208 ; [.091F.0062.0008.0208] # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE; CANONSEQ +020B ; [.091F.0064.0002.020B] # LATIN SMALL LETTER I WITH INVERTED BREVE; CANONSEQ +020A ; [.091F.0064.0008.020A] # LATIN CAPITAL LETTER I WITH INVERTED BREVE; CANONSEQ +1ECB ; [.091F.0079.0002.1ECB] # LATIN SMALL LETTER I WITH DOT BELOW; CANONSEQ +1ECA ; [.091F.0079.0008.1ECA] # LATIN CAPITAL LETTER I WITH DOT BELOW; CANONSEQ +1E2D ; [.091F.0088.0002.1E2D] # LATIN SMALL LETTER I WITH TILDE BELOW; CANONSEQ +1E2C ; [.091F.0088.0008.1E2C] # LATIN CAPITAL LETTER I WITH TILDE BELOW; CANONSEQ +0131 ; [.0923.0020.0002.0131] # LATIN SMALL LETTER DOTLESS I +026A ; [.0927.0020.0002.026A] # LATIN LETTER SMALL CAPITAL I +0268 ; [.092B.0020.0002.0268] # LATIN SMALL LETTER I WITH STROKE +0197 ; [.092B.0020.0008.0197] # LATIN CAPITAL LETTER I WITH STROKE +0269 ; [.092F.0020.0002.0269] # LATIN SMALL LETTER IOTA +0196 ; [.092F.0020.0008.0196] # LATIN CAPITAL LETTER IOTA +006A ; [.0933.0020.0002.006A] # LATIN SMALL LETTER J +004A ; [.0933.0020.0008.004A] # LATIN CAPITAL LETTER J +0135 ; [.0933.003C.0002.0135] # LATIN SMALL LETTER J WITH CIRCUMFLEX; CANONSEQ +0134 ; [.0933.003C.0008.0134] # LATIN CAPITAL LETTER J WITH CIRCUMFLEX; CANONSEQ +01F0 ; [.0933.0041.0002.01F0] # LATIN SMALL LETTER J WITH CARON; CANONSEQ +029D ; [.0937.0020.0002.029D] # LATIN SMALL LETTER J WITH CROSSED-TAIL +025F ; [.093B.0020.0002.025F] # LATIN SMALL LETTER DOTLESS J WITH STROKE +0284 ; [.093F.0020.0002.0284] # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK +006B ; [.0943.0020.0002.006B] # LATIN SMALL LETTER K +004B ; [.0943.0020.0008.004B] # LATIN CAPITAL LETTER K +212A ; [.0943.0020.0008.212A] # KELVIN SIGN; CANON +1E31 ; [.0943.0032.0002.1E31] # LATIN SMALL LETTER K WITH ACUTE; CANONSEQ +1E30 ; [.0943.0032.0008.1E30] # LATIN CAPITAL LETTER K WITH ACUTE; CANONSEQ +01E9 ; [.0943.0041.0002.01E9] # LATIN SMALL LETTER K WITH CARON; CANONSEQ +01E8 ; [.0943.0041.0008.01E8] # LATIN CAPITAL LETTER K WITH CARON; CANONSEQ +0137 ; [.0943.0055.0002.0137] # LATIN SMALL LETTER K WITH CEDILLA; CANONSEQ +0136 ; [.0943.0055.0008.0136] # LATIN CAPITAL LETTER K WITH CEDILLA; CANONSEQ +1E33 ; [.0943.0079.0002.1E33] # LATIN SMALL LETTER K WITH DOT BELOW; CANONSEQ +1E32 ; [.0943.0079.0008.1E32] # LATIN CAPITAL LETTER K WITH DOT BELOW; CANONSEQ +1E35 ; [.0943.0089.0002.1E35] # LATIN SMALL LETTER K WITH LINE BELOW; CANONSEQ +1E34 ; [.0943.0089.0008.1E34] # LATIN CAPITAL LETTER K WITH LINE BELOW; CANONSEQ +0199 ; [.0947.0020.0002.0199] # LATIN SMALL LETTER K WITH HOOK +0198 ; [.0947.0020.0008.0198] # LATIN CAPITAL LETTER K WITH HOOK +029E ; [.094B.0020.0002.029E] # LATIN SMALL LETTER TURNED K +006C ; [.094F.0020.0002.006C] # LATIN SMALL LETTER L +004C ; [.094F.0020.0008.004C] # LATIN CAPITAL LETTER L +013A ; [.094F.0032.0002.013A] # LATIN SMALL LETTER L WITH ACUTE; CANONSEQ +0139 ; [.094F.0032.0008.0139] # LATIN CAPITAL LETTER L WITH ACUTE; CANONSEQ +013E ; [.094F.0041.0002.013E] # LATIN SMALL LETTER L WITH CARON; CANONSEQ +013D ; [.094F.0041.0008.013D] # LATIN CAPITAL LETTER L WITH CARON; CANONSEQ +013C ; [.094F.0055.0002.013C] # LATIN SMALL LETTER L WITH CEDILLA; CANONSEQ +013B ; [.094F.0055.0008.013B] # LATIN CAPITAL LETTER L WITH CEDILLA; CANONSEQ +1E37 ; [.094F.0079.0002.1E37] # LATIN SMALL LETTER L WITH DOT BELOW; CANONSEQ +1E36 ; [.094F.0079.0008.1E36] # LATIN CAPITAL LETTER L WITH DOT BELOW; CANONSEQ +1E39 ; [.094F.007D.0002.1E39] # LATIN SMALL LETTER L WITH DOT BELOW AND MACRON; CANONSEQ +1E38 ; [.094F.007D.0008.1E38] # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON; CANONSEQ +1E3D ; [.094F.0085.0002.1E3D] # LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW; CANONSEQ +1E3C ; [.094F.0085.0008.1E3C] # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW; CANONSEQ +1E3B ; [.094F.0089.0002.1E3B] # LATIN SMALL LETTER L WITH LINE BELOW; CANONSEQ +1E3A ; [.094F.0089.0008.1E3A] # LATIN CAPITAL LETTER L WITH LINE BELOW; CANONSEQ +029F ; [.0953.0020.0002.029F] # LATIN LETTER SMALL CAPITAL L +0142 ; [.0957.0020.0002.0142] # LATIN SMALL LETTER L WITH STROKE +0141 ; [.0957.0020.0008.0141] # LATIN CAPITAL LETTER L WITH STROKE +019A ; [.095B.0020.0002.019A] # LATIN SMALL LETTER L WITH BAR +026B ; [.095F.0020.0002.026B] # LATIN SMALL LETTER L WITH MIDDLE TILDE +026C ; [.0963.0020.0002.026C] # LATIN SMALL LETTER L WITH BELT +026D ; [.0967.0020.0002.026D] # LATIN SMALL LETTER L WITH RETROFLEX HOOK +026E ; [.096B.0020.0002.026E] # LATIN SMALL LETTER LEZH +019B ; [.096F.0020.0002.019B] # LATIN SMALL LETTER LAMBDA WITH STROKE +028E ; [.0973.0020.0002.028E] # LATIN SMALL LETTER TURNED Y +006D ; [.0977.0020.0002.006D] # LATIN SMALL LETTER M +004D ; [.0977.0020.0008.004D] # LATIN CAPITAL LETTER M +1E3F ; [.0977.0032.0002.1E3F] # LATIN SMALL LETTER M WITH ACUTE; CANONSEQ +1E3E ; [.0977.0032.0008.1E3E] # LATIN CAPITAL LETTER M WITH ACUTE; CANONSEQ +1E41 ; [.0977.0052.0002.1E41] # LATIN SMALL LETTER M WITH DOT ABOVE; CANONSEQ +1E40 ; [.0977.0052.0008.1E40] # LATIN CAPITAL LETTER M WITH DOT ABOVE; CANONSEQ +1E43 ; [.0977.0079.0002.1E43] # LATIN SMALL LETTER M WITH DOT BELOW; CANONSEQ +1E42 ; [.0977.0079.0008.1E42] # LATIN CAPITAL LETTER M WITH DOT BELOW; CANONSEQ +0271 ; [.097B.0020.0002.0271] # LATIN SMALL LETTER M WITH HOOK +006E ; [.097F.0020.0002.006E] # LATIN SMALL LETTER N +004E ; [.097F.0020.0008.004E] # LATIN CAPITAL LETTER N +0144 ; [.097F.0032.0002.0144] # LATIN SMALL LETTER N WITH ACUTE; CANONSEQ +0143 ; [.097F.0032.0008.0143] # LATIN CAPITAL LETTER N WITH ACUTE; CANONSEQ +01F9 ; [.097F.0035.0002.01F9] # LATIN SMALL LETTER N WITH GRAVE; CANONSEQ +01F8 ; [.097F.0035.0008.01F8] # LATIN CAPITAL LETTER N WITH GRAVE; CANONSEQ +0148 ; [.097F.0041.0002.0148] # LATIN SMALL LETTER N WITH CARON; CANONSEQ +0147 ; [.097F.0041.0008.0147] # LATIN CAPITAL LETTER N WITH CARON; CANONSEQ +00F1 ; [.097F.004E.0002.00F1] # LATIN SMALL LETTER N WITH TILDE; CANONSEQ +00D1 ; [.097F.004E.0008.00D1] # LATIN CAPITAL LETTER N WITH TILDE; CANONSEQ +1E45 ; [.097F.0052.0002.1E45] # LATIN SMALL LETTER N WITH DOT ABOVE; CANONSEQ +1E44 ; [.097F.0052.0008.1E44] # LATIN CAPITAL LETTER N WITH DOT ABOVE; CANONSEQ +0146 ; [.097F.0055.0002.0146] # LATIN SMALL LETTER N WITH CEDILLA; CANONSEQ +0145 ; [.097F.0055.0008.0145] # LATIN CAPITAL LETTER N WITH CEDILLA; CANONSEQ +1E47 ; [.097F.0079.0002.1E47] # LATIN SMALL LETTER N WITH DOT BELOW; CANONSEQ +1E46 ; [.097F.0079.0008.1E46] # LATIN CAPITAL LETTER N WITH DOT BELOW; CANONSEQ +1E4B ; [.097F.0085.0002.1E4B] # LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW; CANONSEQ +1E4A ; [.097F.0085.0008.1E4A] # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW; CANONSEQ +1E49 ; [.097F.0089.0002.1E49] # LATIN SMALL LETTER N WITH LINE BELOW; CANONSEQ +1E48 ; [.097F.0089.0008.1E48] # LATIN CAPITAL LETTER N WITH LINE BELOW; CANONSEQ +0274 ; [.0983.0020.0002.0274] # LATIN LETTER SMALL CAPITAL N +0272 ; [.0987.0020.0002.0272] # LATIN SMALL LETTER N WITH LEFT HOOK +019D ; [.0987.0020.0008.019D] # LATIN CAPITAL LETTER N WITH LEFT HOOK +019E ; [.098B.0020.0002.019E] # LATIN SMALL LETTER N WITH LONG RIGHT LEG +0273 ; [.098F.0020.0002.0273] # LATIN SMALL LETTER N WITH RETROFLEX HOOK +014B ; [.0993.0020.0002.014B] # LATIN SMALL LETTER ENG +014A ; [.0993.0020.0008.014A] # LATIN CAPITAL LETTER ENG +006F ; [.0997.0020.0002.006F] # LATIN SMALL LETTER O +004F ; [.0997.0020.0008.004F] # LATIN CAPITAL LETTER O +00F3 ; [.0997.0032.0002.00F3] # LATIN SMALL LETTER O WITH ACUTE; CANONSEQ +00D3 ; [.0997.0032.0008.00D3] # LATIN CAPITAL LETTER O WITH ACUTE; CANONSEQ +00F2 ; [.0997.0035.0002.00F2] # LATIN SMALL LETTER O WITH GRAVE; CANONSEQ +00D2 ; [.0997.0035.0008.00D2] # LATIN CAPITAL LETTER O WITH GRAVE; CANONSEQ +014F ; [.0997.0037.0002.014F] # LATIN SMALL LETTER O WITH BREVE; CANONSEQ +014E ; [.0997.0037.0008.014E] # LATIN CAPITAL LETTER O WITH BREVE; CANONSEQ +00F4 ; [.0997.003C.0002.00F4] # LATIN SMALL LETTER O WITH CIRCUMFLEX; CANONSEQ +00D4 ; [.0997.003C.0008.00D4] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX; CANONSEQ +1ED1 ; [.0997.003D.0002.1ED1] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE; CANONSEQ +1ED0 ; [.0997.003D.0008.1ED0] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE; CANONSEQ +1ED3 ; [.0997.003E.0002.1ED3] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE; CANONSEQ +1ED2 ; [.0997.003E.0008.1ED2] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE; CANONSEQ +1ED7 ; [.0997.003F.0002.1ED7] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE; CANONSEQ +1ED6 ; [.0997.003F.0008.1ED6] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE; CANONSEQ +1ED5 ; [.0997.0040.0002.1ED5] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE; CANONSEQ +1ED4 ; [.0997.0040.0008.1ED4] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE; CANONSEQ +01D2 ; [.0997.0041.0002.01D2] # LATIN SMALL LETTER O WITH CARON; CANONSEQ +01D1 ; [.0997.0041.0008.01D1] # LATIN CAPITAL LETTER O WITH CARON; CANONSEQ +00F6 ; [.0997.0047.0002.00F6] # LATIN SMALL LETTER O WITH DIAERESIS; CANONSEQ +00D6 ; [.0997.0047.0008.00D6] # LATIN CAPITAL LETTER O WITH DIAERESIS; CANONSEQ +022B ; [.0997.004B.0002.022B] # LATIN SMALL LETTER O WITH DIAERESIS AND MACRON; CANONSEQ +022A ; [.0997.004B.0008.022A] # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON; CANONSEQ +0151 ; [.0997.004D.0002.0151] # LATIN SMALL LETTER O WITH DOUBLE ACUTE; CANONSEQ +0150 ; [.0997.004D.0008.0150] # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE; CANONSEQ +00F5 ; [.0997.004E.0002.00F5] # LATIN SMALL LETTER O WITH TILDE; CANONSEQ +00D5 ; [.0997.004E.0008.00D5] # LATIN CAPITAL LETTER O WITH TILDE; CANONSEQ +1E4D ; [.0997.004F.0002.1E4D] # LATIN SMALL LETTER O WITH TILDE AND ACUTE; CANONSEQ +1E4C ; [.0997.004F.0008.1E4C] # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE; CANONSEQ +1E4F ; [.0997.0050.0002.1E4F] # LATIN SMALL LETTER O WITH TILDE AND DIAERESIS; CANONSEQ +1E4E ; [.0997.0050.0008.1E4E] # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS; CANONSEQ +022D ; [.0997.0051.0002.022D] # LATIN SMALL LETTER O WITH TILDE AND MACRON; CANONSEQ +022C ; [.0997.0051.0008.022C] # LATIN CAPITAL LETTER O WITH TILDE AND MACRON; CANONSEQ +022F ; [.0997.0052.0002.022F] # LATIN SMALL LETTER O WITH DOT ABOVE; CANONSEQ +022E ; [.0997.0052.0008.022E] # LATIN CAPITAL LETTER O WITH DOT ABOVE; CANONSEQ +0231 ; [.0997.0053.0002.0231] # LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON; CANONSEQ +0230 ; [.0997.0053.0008.0230] # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON; CANONSEQ +01EB ; [.0997.0058.0002.01EB] # LATIN SMALL LETTER O WITH OGONEK; CANONSEQ +01EA ; [.0997.0058.0008.01EA] # LATIN CAPITAL LETTER O WITH OGONEK; CANONSEQ +01ED ; [.0997.0059.0002.01ED] # LATIN SMALL LETTER O WITH OGONEK AND MACRON; CANONSEQ +01EC ; [.0997.0059.0008.01EC] # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON; CANONSEQ +014D ; [.0997.005A.0002.014D] # LATIN SMALL LETTER O WITH MACRON; CANONSEQ +014C ; [.0997.005A.0008.014C] # LATIN CAPITAL LETTER O WITH MACRON; CANONSEQ +1E53 ; [.0997.005B.0002.1E53] # LATIN SMALL LETTER O WITH MACRON AND ACUTE; CANONSEQ +1E52 ; [.0997.005B.0008.1E52] # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE; CANONSEQ +1E51 ; [.0997.005C.0002.1E51] # LATIN SMALL LETTER O WITH MACRON AND GRAVE; CANONSEQ +1E50 ; [.0997.005C.0008.1E50] # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE; CANONSEQ +1ECF ; [.0997.005F.0002.1ECF] # LATIN SMALL LETTER O WITH HOOK ABOVE; CANONSEQ +1ECE ; [.0997.005F.0008.1ECE] # LATIN CAPITAL LETTER O WITH HOOK ABOVE; CANONSEQ +020D ; [.0997.0062.0002.020D] # LATIN SMALL LETTER O WITH DOUBLE GRAVE; CANONSEQ +020C ; [.0997.0062.0008.020C] # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE; CANONSEQ +020F ; [.0997.0064.0002.020F] # LATIN SMALL LETTER O WITH INVERTED BREVE; CANONSEQ +020E ; [.0997.0064.0008.020E] # LATIN CAPITAL LETTER O WITH INVERTED BREVE; CANONSEQ +01A1 ; [.0997.006C.0002.01A1] # LATIN SMALL LETTER O WITH HORN; CANONSEQ +01A0 ; [.0997.006C.0008.01A0] # LATIN CAPITAL LETTER O WITH HORN; CANONSEQ +1EDB ; [.0997.006D.0002.1EDB] # LATIN SMALL LETTER O WITH HORN AND ACUTE; CANONSEQ +1EDA ; [.0997.006D.0008.1EDA] # LATIN CAPITAL LETTER O WITH HORN AND ACUTE; CANONSEQ +1EDD ; [.0997.006E.0002.1EDD] # LATIN SMALL LETTER O WITH HORN AND GRAVE; CANONSEQ +1EDC ; [.0997.006E.0008.1EDC] # LATIN CAPITAL LETTER O WITH HORN AND GRAVE; CANONSEQ +1EE1 ; [.0997.006F.0002.1EE1] # LATIN SMALL LETTER O WITH HORN AND TILDE; CANONSEQ +1EE0 ; [.0997.006F.0008.1EE0] # LATIN CAPITAL LETTER O WITH HORN AND TILDE; CANONSEQ +1EDF ; [.0997.0070.0002.1EDF] # LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE; CANONSEQ +1EDE ; [.0997.0070.0008.1EDE] # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE; CANONSEQ +1EE3 ; [.0997.0071.0002.1EE3] # LATIN SMALL LETTER O WITH HORN AND DOT BELOW; CANONSEQ +1EE2 ; [.0997.0071.0008.1EE2] # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW; CANONSEQ +1ECD ; [.0997.0079.0002.1ECD] # LATIN SMALL LETTER O WITH DOT BELOW; CANONSEQ +1ECC ; [.0997.0079.0008.1ECC] # LATIN CAPITAL LETTER O WITH DOT BELOW; CANONSEQ +1ED9 ; [.0997.007B.0002.1ED9] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW; CANONSEQ +1ED8 ; [.0997.007B.0008.1ED8] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW; CANONSEQ +0276 ; [.099B.0020.0002.0276] # LATIN LETTER SMALL CAPITAL OE +00F8 ; [.099F.0020.0002.00F8] # LATIN SMALL LETTER O WITH STROKE +00D8 ; [.099F.0020.0008.00D8] # LATIN CAPITAL LETTER O WITH STROKE +01FF ; [.099F.0032.0002.01FF] # LATIN SMALL LETTER O WITH STROKE AND ACUTE; CANONSEQ +01FE ; [.099F.0032.0008.01FE] # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE; CANONSEQ +0254 ; [.09A3.0020.0002.0254] # LATIN SMALL LETTER OPEN O +0186 ; [.09A3.0020.0008.0186] # LATIN CAPITAL LETTER OPEN O +0275 ; [.09A7.0020.0002.0275] # LATIN SMALL LETTER BARRED O +019F ; [.09A7.0020.0008.019F] # LATIN CAPITAL LETTER O WITH MIDDLE TILDE +0277 ; [.09AB.0020.0002.0277] # LATIN SMALL LETTER CLOSED OMEGA +0223 ; [.09AF.0020.0002.0223] # LATIN SMALL LETTER OU +0222 ; [.09AF.0020.0008.0222] # LATIN CAPITAL LETTER OU +0070 ; [.09B3.0020.0002.0070] # LATIN SMALL LETTER P +0050 ; [.09B3.0020.0008.0050] # LATIN CAPITAL LETTER P +1E55 ; [.09B3.0032.0002.1E55] # LATIN SMALL LETTER P WITH ACUTE; CANONSEQ +1E54 ; [.09B3.0032.0008.1E54] # LATIN CAPITAL LETTER P WITH ACUTE; CANONSEQ +1E57 ; [.09B3.0052.0002.1E57] # LATIN SMALL LETTER P WITH DOT ABOVE; CANONSEQ +1E56 ; [.09B3.0052.0008.1E56] # LATIN CAPITAL LETTER P WITH DOT ABOVE; CANONSEQ +01A5 ; [.09B7.0020.0002.01A5] # LATIN SMALL LETTER P WITH HOOK +01A4 ; [.09B7.0020.0008.01A4] # LATIN CAPITAL LETTER P WITH HOOK +0278 ; [.09BB.0020.0002.0278] # LATIN SMALL LETTER PHI +0071 ; [.09BF.0020.0002.0071] # LATIN SMALL LETTER Q +0051 ; [.09BF.0020.0008.0051] # LATIN CAPITAL LETTER Q +02A0 ; [.09C3.0020.0002.02A0] # LATIN SMALL LETTER Q WITH HOOK +0138 ; [.09C7.0020.0002.0138] # LATIN SMALL LETTER KRA +0072 ; [.09CB.0020.0002.0072] # LATIN SMALL LETTER R +0052 ; [.09CB.0020.0008.0052] # LATIN CAPITAL LETTER R +0155 ; [.09CB.0032.0002.0155] # LATIN SMALL LETTER R WITH ACUTE; CANONSEQ +0154 ; [.09CB.0032.0008.0154] # LATIN CAPITAL LETTER R WITH ACUTE; CANONSEQ +0159 ; [.09CB.0041.0002.0159] # LATIN SMALL LETTER R WITH CARON; CANONSEQ +0158 ; [.09CB.0041.0008.0158] # LATIN CAPITAL LETTER R WITH CARON; CANONSEQ +1E59 ; [.09CB.0052.0002.1E59] # LATIN SMALL LETTER R WITH DOT ABOVE; CANONSEQ +1E58 ; [.09CB.0052.0008.1E58] # LATIN CAPITAL LETTER R WITH DOT ABOVE; CANONSEQ +0157 ; [.09CB.0055.0002.0157] # LATIN SMALL LETTER R WITH CEDILLA; CANONSEQ +0156 ; [.09CB.0055.0008.0156] # LATIN CAPITAL LETTER R WITH CEDILLA; CANONSEQ +0211 ; [.09CB.0062.0002.0211] # LATIN SMALL LETTER R WITH DOUBLE GRAVE; CANONSEQ +0210 ; [.09CB.0062.0008.0210] # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE; CANONSEQ +0213 ; [.09CB.0064.0002.0213] # LATIN SMALL LETTER R WITH INVERTED BREVE; CANONSEQ +0212 ; [.09CB.0064.0008.0212] # LATIN CAPITAL LETTER R WITH INVERTED BREVE; CANONSEQ +1E5B ; [.09CB.0079.0002.1E5B] # LATIN SMALL LETTER R WITH DOT BELOW; CANONSEQ +1E5A ; [.09CB.0079.0008.1E5A] # LATIN CAPITAL LETTER R WITH DOT BELOW; CANONSEQ +1E5D ; [.09CB.007D.0002.1E5D] # LATIN SMALL LETTER R WITH DOT BELOW AND MACRON; CANONSEQ +1E5C ; [.09CB.007D.0008.1E5C] # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON; CANONSEQ +1E5F ; [.09CB.0089.0002.1E5F] # LATIN SMALL LETTER R WITH LINE BELOW; CANONSEQ +1E5E ; [.09CB.0089.0008.1E5E] # LATIN CAPITAL LETTER R WITH LINE BELOW; CANONSEQ +0280 ; [.09CF.0020.0002.0280] # LATIN LETTER SMALL CAPITAL R +01A6 ; [.09CF.0020.0008.01A6] # LATIN LETTER YR +0279 ; [.09D3.0020.0002.0279] # LATIN SMALL LETTER TURNED R +027A ; [.09D7.0020.0002.027A] # LATIN SMALL LETTER TURNED R WITH LONG LEG +027B ; [.09DB.0020.0002.027B] # LATIN SMALL LETTER TURNED R WITH HOOK +027C ; [.09DF.0020.0002.027C] # LATIN SMALL LETTER R WITH LONG LEG +027D ; [.09E3.0020.0002.027D] # LATIN SMALL LETTER R WITH TAIL +027E ; [.09E7.0020.0002.027E] # LATIN SMALL LETTER R WITH FISHHOOK +027F ; [.09EB.0020.0002.027F] # LATIN SMALL LETTER REVERSED R WITH FISHHOOK +0281 ; [.09EF.0020.0002.0281] # LATIN LETTER SMALL CAPITAL INVERTED R +0073 ; [.09F3.0020.0002.0073] # LATIN SMALL LETTER S +0053 ; [.09F3.0020.0008.0053] # LATIN CAPITAL LETTER S +015B ; [.09F3.0032.0002.015B] # LATIN SMALL LETTER S WITH ACUTE; CANONSEQ +015A ; [.09F3.0032.0008.015A] # LATIN CAPITAL LETTER S WITH ACUTE; CANONSEQ +1E65 ; [.09F3.0033.0002.1E65] # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE; CANONSEQ +1E64 ; [.09F3.0033.0008.1E64] # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE; CANONSEQ +015D ; [.09F3.003C.0002.015D] # LATIN SMALL LETTER S WITH CIRCUMFLEX; CANONSEQ +015C ; [.09F3.003C.0008.015C] # LATIN CAPITAL LETTER S WITH CIRCUMFLEX; CANONSEQ +0161 ; [.09F3.0041.0002.0161] # LATIN SMALL LETTER S WITH CARON; CANONSEQ +0160 ; [.09F3.0041.0008.0160] # LATIN CAPITAL LETTER S WITH CARON; CANONSEQ +1E67 ; [.09F3.0042.0002.1E67] # LATIN SMALL LETTER S WITH CARON AND DOT ABOVE; CANONSEQ +1E66 ; [.09F3.0042.0008.1E66] # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE; CANONSEQ +1E61 ; [.09F3.0052.0002.1E61] # LATIN SMALL LETTER S WITH DOT ABOVE; CANONSEQ +1E60 ; [.09F3.0052.0008.1E60] # LATIN CAPITAL LETTER S WITH DOT ABOVE; CANONSEQ +015F ; [.09F3.0055.0002.015F] # LATIN SMALL LETTER S WITH CEDILLA; CANONSEQ +015E ; [.09F3.0055.0008.015E] # LATIN CAPITAL LETTER S WITH CEDILLA; CANONSEQ +1E63 ; [.09F3.0079.0002.1E63] # LATIN SMALL LETTER S WITH DOT BELOW; CANONSEQ +1E62 ; [.09F3.0079.0008.1E62] # LATIN CAPITAL LETTER S WITH DOT BELOW; CANONSEQ +1E69 ; [.09F3.007C.0002.1E69] # LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE; CANONSEQ +1E68 ; [.09F3.007C.0008.1E68] # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE; CANONSEQ +0219 ; [.09F3.0080.0002.0219] # LATIN SMALL LETTER S WITH COMMA BELOW; CANONSEQ +0218 ; [.09F3.0080.0008.0218] # LATIN CAPITAL LETTER S WITH COMMA BELOW; CANONSEQ +0282 ; [.09F7.0020.0002.0282] # LATIN SMALL LETTER S WITH HOOK +0283 ; [.09FB.0020.0002.0283] # LATIN SMALL LETTER ESH +01A9 ; [.09FB.0020.0008.01A9] # LATIN CAPITAL LETTER ESH +01AA ; [.09FF.0020.0002.01AA] # LATIN LETTER REVERSED ESH LOOP +0285 ; [.0A03.0020.0002.0285] # LATIN SMALL LETTER SQUAT REVERSED ESH +0286 ; [.0A07.0020.0002.0286] # LATIN SMALL LETTER ESH WITH CURL +0074 ; [.0A0B.0020.0002.0074] # LATIN SMALL LETTER T +0054 ; [.0A0B.0020.0008.0054] # LATIN CAPITAL LETTER T +0165 ; [.0A0B.0041.0002.0165] # LATIN SMALL LETTER T WITH CARON; CANONSEQ +0164 ; [.0A0B.0041.0008.0164] # LATIN CAPITAL LETTER T WITH CARON; CANONSEQ +1E97 ; [.0A0B.0047.0002.1E97] # LATIN SMALL LETTER T WITH DIAERESIS; CANONSEQ +1E6B ; [.0A0B.0052.0002.1E6B] # LATIN SMALL LETTER T WITH DOT ABOVE; CANONSEQ +1E6A ; [.0A0B.0052.0008.1E6A] # LATIN CAPITAL LETTER T WITH DOT ABOVE; CANONSEQ +0163 ; [.0A0B.0055.0002.0163] # LATIN SMALL LETTER T WITH CEDILLA; CANONSEQ +0162 ; [.0A0B.0055.0008.0162] # LATIN CAPITAL LETTER T WITH CEDILLA; CANONSEQ +1E6D ; [.0A0B.0079.0002.1E6D] # LATIN SMALL LETTER T WITH DOT BELOW; CANONSEQ +1E6C ; [.0A0B.0079.0008.1E6C] # LATIN CAPITAL LETTER T WITH DOT BELOW; CANONSEQ +021B ; [.0A0B.0080.0002.021B] # LATIN SMALL LETTER T WITH COMMA BELOW; CANONSEQ +021A ; [.0A0B.0080.0008.021A] # LATIN CAPITAL LETTER T WITH COMMA BELOW; CANONSEQ +1E71 ; [.0A0B.0085.0002.1E71] # LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW; CANONSEQ +1E70 ; [.0A0B.0085.0008.1E70] # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW; CANONSEQ +1E6F ; [.0A0B.0089.0002.1E6F] # LATIN SMALL LETTER T WITH LINE BELOW; CANONSEQ +1E6E ; [.0A0B.0089.0008.1E6E] # LATIN CAPITAL LETTER T WITH LINE BELOW; CANONSEQ +0167 ; [.0A0F.0020.0002.0167] # LATIN SMALL LETTER T WITH STROKE +0166 ; [.0A0F.0020.0008.0166] # LATIN CAPITAL LETTER T WITH STROKE +01AB ; [.0A13.0020.0002.01AB] # LATIN SMALL LETTER T WITH PALATAL HOOK +01AD ; [.0A17.0020.0002.01AD] # LATIN SMALL LETTER T WITH HOOK +01AC ; [.0A17.0020.0008.01AC] # LATIN CAPITAL LETTER T WITH HOOK +0288 ; [.0A1B.0020.0002.0288] # LATIN SMALL LETTER T WITH RETROFLEX HOOK +01AE ; [.0A1B.0020.0008.01AE] # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +0287 ; [.0A1F.0020.0002.0287] # LATIN SMALL LETTER TURNED T +0075 ; [.0A23.0020.0002.0075] # LATIN SMALL LETTER U +0055 ; [.0A23.0020.0008.0055] # LATIN CAPITAL LETTER U +00FA ; [.0A23.0032.0002.00FA] # LATIN SMALL LETTER U WITH ACUTE; CANONSEQ +00DA ; [.0A23.0032.0008.00DA] # LATIN CAPITAL LETTER U WITH ACUTE; CANONSEQ +00F9 ; [.0A23.0035.0002.00F9] # LATIN SMALL LETTER U WITH GRAVE; CANONSEQ +00D9 ; [.0A23.0035.0008.00D9] # LATIN CAPITAL LETTER U WITH GRAVE; CANONSEQ +016D ; [.0A23.0037.0002.016D] # LATIN SMALL LETTER U WITH BREVE; CANONSEQ +016C ; [.0A23.0037.0008.016C] # LATIN CAPITAL LETTER U WITH BREVE; CANONSEQ +00FB ; [.0A23.003C.0002.00FB] # LATIN SMALL LETTER U WITH CIRCUMFLEX; CANONSEQ +00DB ; [.0A23.003C.0008.00DB] # LATIN CAPITAL LETTER U WITH CIRCUMFLEX; CANONSEQ +01D4 ; [.0A23.0041.0002.01D4] # LATIN SMALL LETTER U WITH CARON; CANONSEQ +01D3 ; [.0A23.0041.0008.01D3] # LATIN CAPITAL LETTER U WITH CARON; CANONSEQ +016F ; [.0A23.0043.0002.016F] # LATIN SMALL LETTER U WITH RING ABOVE; CANONSEQ +016E ; [.0A23.0043.0008.016E] # LATIN CAPITAL LETTER U WITH RING ABOVE; CANONSEQ +00FC ; [.0A23.0047.0002.00FC] # LATIN SMALL LETTER U WITH DIAERESIS; CANONSEQ +00DC ; [.0A23.0047.0008.00DC] # LATIN CAPITAL LETTER U WITH DIAERESIS; CANONSEQ +01D8 ; [.0A23.0048.0002.01D8] # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE; CANONSEQ +01D7 ; [.0A23.0048.0008.01D7] # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE; CANONSEQ +01DC ; [.0A23.0049.0002.01DC] # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE; CANONSEQ +01DB ; [.0A23.0049.0008.01DB] # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE; CANONSEQ +01DA ; [.0A23.004A.0002.01DA] # LATIN SMALL LETTER U WITH DIAERESIS AND CARON; CANONSEQ +01D9 ; [.0A23.004A.0008.01D9] # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON; CANONSEQ +01D6 ; [.0A23.004B.0002.01D6] # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON; CANONSEQ +01D5 ; [.0A23.004B.0008.01D5] # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON; CANONSEQ +0171 ; [.0A23.004D.0002.0171] # LATIN SMALL LETTER U WITH DOUBLE ACUTE; CANONSEQ +0170 ; [.0A23.004D.0008.0170] # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE; CANONSEQ +0169 ; [.0A23.004E.0002.0169] # LATIN SMALL LETTER U WITH TILDE; CANONSEQ +0168 ; [.0A23.004E.0008.0168] # LATIN CAPITAL LETTER U WITH TILDE; CANONSEQ +1E79 ; [.0A23.004F.0002.1E79] # LATIN SMALL LETTER U WITH TILDE AND ACUTE; CANONSEQ +1E78 ; [.0A23.004F.0008.1E78] # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE; CANONSEQ +0173 ; [.0A23.0058.0002.0173] # LATIN SMALL LETTER U WITH OGONEK; CANONSEQ +0172 ; [.0A23.0058.0008.0172] # LATIN CAPITAL LETTER U WITH OGONEK; CANONSEQ +016B ; [.0A23.005A.0002.016B] # LATIN SMALL LETTER U WITH MACRON; CANONSEQ +016A ; [.0A23.005A.0008.016A] # LATIN CAPITAL LETTER U WITH MACRON; CANONSEQ +1E7B ; [.0A23.005D.0002.1E7B] # LATIN SMALL LETTER U WITH MACRON AND DIAERESIS; CANONSEQ +1E7A ; [.0A23.005D.0008.1E7A] # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS; CANONSEQ +1EE7 ; [.0A23.005F.0002.1EE7] # LATIN SMALL LETTER U WITH HOOK ABOVE; CANONSEQ +1EE6 ; [.0A23.005F.0008.1EE6] # LATIN CAPITAL LETTER U WITH HOOK ABOVE; CANONSEQ +0215 ; [.0A23.0062.0002.0215] # LATIN SMALL LETTER U WITH DOUBLE GRAVE; CANONSEQ +0214 ; [.0A23.0062.0008.0214] # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE; CANONSEQ +0217 ; [.0A23.0064.0002.0217] # LATIN SMALL LETTER U WITH INVERTED BREVE; CANONSEQ +0216 ; [.0A23.0064.0008.0216] # LATIN CAPITAL LETTER U WITH INVERTED BREVE; CANONSEQ +01B0 ; [.0A23.006C.0002.01B0] # LATIN SMALL LETTER U WITH HORN; CANONSEQ +01AF ; [.0A23.006C.0008.01AF] # LATIN CAPITAL LETTER U WITH HORN; CANONSEQ +1EE9 ; [.0A23.006D.0002.1EE9] # LATIN SMALL LETTER U WITH HORN AND ACUTE; CANONSEQ +1EE8 ; [.0A23.006D.0008.1EE8] # LATIN CAPITAL LETTER U WITH HORN AND ACUTE; CANONSEQ +1EEB ; [.0A23.006E.0002.1EEB] # LATIN SMALL LETTER U WITH HORN AND GRAVE; CANONSEQ +1EEA ; [.0A23.006E.0008.1EEA] # LATIN CAPITAL LETTER U WITH HORN AND GRAVE; CANONSEQ +1EEF ; [.0A23.006F.0002.1EEF] # LATIN SMALL LETTER U WITH HORN AND TILDE; CANONSEQ +1EEE ; [.0A23.006F.0008.1EEE] # LATIN CAPITAL LETTER U WITH HORN AND TILDE; CANONSEQ +1EED ; [.0A23.0070.0002.1EED] # LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE; CANONSEQ +1EEC ; [.0A23.0070.0008.1EEC] # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE; CANONSEQ +1EF1 ; [.0A23.0071.0002.1EF1] # LATIN SMALL LETTER U WITH HORN AND DOT BELOW; CANONSEQ +1EF0 ; [.0A23.0071.0008.1EF0] # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW; CANONSEQ +1EE5 ; [.0A23.0079.0002.1EE5] # LATIN SMALL LETTER U WITH DOT BELOW; CANONSEQ +1EE4 ; [.0A23.0079.0008.1EE4] # LATIN CAPITAL LETTER U WITH DOT BELOW; CANONSEQ +1E73 ; [.0A23.007E.0002.1E73] # LATIN SMALL LETTER U WITH DIAERESIS BELOW; CANONSEQ +1E72 ; [.0A23.007E.0008.1E72] # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW; CANONSEQ +1E77 ; [.0A23.0085.0002.1E77] # LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW; CANONSEQ +1E76 ; [.0A23.0085.0008.1E76] # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW; CANONSEQ +1E75 ; [.0A23.0088.0002.1E75] # LATIN SMALL LETTER U WITH TILDE BELOW; CANONSEQ +1E74 ; [.0A23.0088.0008.1E74] # LATIN CAPITAL LETTER U WITH TILDE BELOW; CANONSEQ +0289 ; [.0A27.0020.0002.0289] # LATIN SMALL LETTER U BAR +0265 ; [.0A2B.0020.0002.0265] # LATIN SMALL LETTER TURNED H +026F ; [.0A2F.0020.0002.026F] # LATIN SMALL LETTER TURNED M +019C ; [.0A2F.0020.0008.019C] # LATIN CAPITAL LETTER TURNED M +0270 ; [.0A33.0020.0002.0270] # LATIN SMALL LETTER TURNED M WITH LONG LEG +028A ; [.0A37.0020.0002.028A] # LATIN SMALL LETTER UPSILON +01B1 ; [.0A37.0020.0008.01B1] # LATIN CAPITAL LETTER UPSILON +0076 ; [.0A3B.0020.0002.0076] # LATIN SMALL LETTER V +0056 ; [.0A3B.0020.0008.0056] # LATIN CAPITAL LETTER V +1E7D ; [.0A3B.004E.0002.1E7D] # LATIN SMALL LETTER V WITH TILDE; CANONSEQ +1E7C ; [.0A3B.004E.0008.1E7C] # LATIN CAPITAL LETTER V WITH TILDE; CANONSEQ +1E7F ; [.0A3B.0079.0002.1E7F] # LATIN SMALL LETTER V WITH DOT BELOW; CANONSEQ +1E7E ; [.0A3B.0079.0008.1E7E] # LATIN CAPITAL LETTER V WITH DOT BELOW; CANONSEQ +028B ; [.0A3F.0020.0002.028B] # LATIN SMALL LETTER V WITH HOOK +01B2 ; [.0A3F.0020.0008.01B2] # LATIN CAPITAL LETTER V WITH HOOK +028C ; [.0A43.0020.0002.028C] # LATIN SMALL LETTER TURNED V +0077 ; [.0A47.0020.0002.0077] # LATIN SMALL LETTER W +0057 ; [.0A47.0020.0008.0057] # LATIN CAPITAL LETTER W +1E83 ; [.0A47.0032.0002.1E83] # LATIN SMALL LETTER W WITH ACUTE; CANONSEQ +1E82 ; [.0A47.0032.0008.1E82] # LATIN CAPITAL LETTER W WITH ACUTE; CANONSEQ +1E81 ; [.0A47.0035.0002.1E81] # LATIN SMALL LETTER W WITH GRAVE; CANONSEQ +1E80 ; [.0A47.0035.0008.1E80] # LATIN CAPITAL LETTER W WITH GRAVE; CANONSEQ +0175 ; [.0A47.003C.0002.0175] # LATIN SMALL LETTER W WITH CIRCUMFLEX; CANONSEQ +0174 ; [.0A47.003C.0008.0174] # LATIN CAPITAL LETTER W WITH CIRCUMFLEX; CANONSEQ +1E98 ; [.0A47.0043.0002.1E98] # LATIN SMALL LETTER W WITH RING ABOVE; CANONSEQ +1E85 ; [.0A47.0047.0002.1E85] # LATIN SMALL LETTER W WITH DIAERESIS; CANONSEQ +1E84 ; [.0A47.0047.0008.1E84] # LATIN CAPITAL LETTER W WITH DIAERESIS; CANONSEQ +1E87 ; [.0A47.0052.0002.1E87] # LATIN SMALL LETTER W WITH DOT ABOVE; CANONSEQ +1E86 ; [.0A47.0052.0008.1E86] # LATIN CAPITAL LETTER W WITH DOT ABOVE; CANONSEQ +1E89 ; [.0A47.0079.0002.1E89] # LATIN SMALL LETTER W WITH DOT BELOW; CANONSEQ +1E88 ; [.0A47.0079.0008.1E88] # LATIN CAPITAL LETTER W WITH DOT BELOW; CANONSEQ +028D ; [.0A4B.0020.0002.028D] # LATIN SMALL LETTER TURNED W +0078 ; [.0A4F.0020.0002.0078] # LATIN SMALL LETTER X +0058 ; [.0A4F.0020.0008.0058] # LATIN CAPITAL LETTER X +1E8D ; [.0A4F.0047.0002.1E8D] # LATIN SMALL LETTER X WITH DIAERESIS; CANONSEQ +1E8C ; [.0A4F.0047.0008.1E8C] # LATIN CAPITAL LETTER X WITH DIAERESIS; CANONSEQ +1E8B ; [.0A4F.0052.0002.1E8B] # LATIN SMALL LETTER X WITH DOT ABOVE; CANONSEQ +1E8A ; [.0A4F.0052.0008.1E8A] # LATIN CAPITAL LETTER X WITH DOT ABOVE; CANONSEQ +0079 ; [.0A53.0020.0002.0079] # LATIN SMALL LETTER Y +0059 ; [.0A53.0020.0008.0059] # LATIN CAPITAL LETTER Y +00FD ; [.0A53.0032.0002.00FD] # LATIN SMALL LETTER Y WITH ACUTE; CANONSEQ +00DD ; [.0A53.0032.0008.00DD] # LATIN CAPITAL LETTER Y WITH ACUTE; CANONSEQ +1EF3 ; [.0A53.0035.0002.1EF3] # LATIN SMALL LETTER Y WITH GRAVE; CANONSEQ +1EF2 ; [.0A53.0035.0008.1EF2] # LATIN CAPITAL LETTER Y WITH GRAVE; CANONSEQ +0177 ; [.0A53.003C.0002.0177] # LATIN SMALL LETTER Y WITH CIRCUMFLEX; CANONSEQ +0176 ; [.0A53.003C.0008.0176] # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX; CANONSEQ +1E99 ; [.0A53.0043.0002.1E99] # LATIN SMALL LETTER Y WITH RING ABOVE; CANONSEQ +00FF ; [.0A53.0047.0002.00FF] # LATIN SMALL LETTER Y WITH DIAERESIS; CANONSEQ +0178 ; [.0A53.0047.0008.0178] # LATIN CAPITAL LETTER Y WITH DIAERESIS; CANONSEQ +1EF9 ; [.0A53.004E.0002.1EF9] # LATIN SMALL LETTER Y WITH TILDE; CANONSEQ +1EF8 ; [.0A53.004E.0008.1EF8] # LATIN CAPITAL LETTER Y WITH TILDE; CANONSEQ +1E8F ; [.0A53.0052.0002.1E8F] # LATIN SMALL LETTER Y WITH DOT ABOVE; CANONSEQ +1E8E ; [.0A53.0052.0008.1E8E] # LATIN CAPITAL LETTER Y WITH DOT ABOVE; CANONSEQ +0233 ; [.0A53.005A.0002.0233] # LATIN SMALL LETTER Y WITH MACRON; CANONSEQ +0232 ; [.0A53.005A.0008.0232] # LATIN CAPITAL LETTER Y WITH MACRON; CANONSEQ +1EF7 ; [.0A53.005F.0002.1EF7] # LATIN SMALL LETTER Y WITH HOOK ABOVE; CANONSEQ +1EF6 ; [.0A53.005F.0008.1EF6] # LATIN CAPITAL LETTER Y WITH HOOK ABOVE; CANONSEQ +1EF5 ; [.0A53.0079.0002.1EF5] # LATIN SMALL LETTER Y WITH DOT BELOW; CANONSEQ +1EF4 ; [.0A53.0079.0008.1EF4] # LATIN CAPITAL LETTER Y WITH DOT BELOW; CANONSEQ +028F ; [.0A57.0020.0002.028F] # LATIN LETTER SMALL CAPITAL Y +01B4 ; [.0A5B.0020.0002.01B4] # LATIN SMALL LETTER Y WITH HOOK +01B3 ; [.0A5B.0020.0008.01B3] # LATIN CAPITAL LETTER Y WITH HOOK +007A ; [.0A5F.0020.0002.007A] # LATIN SMALL LETTER Z +005A ; [.0A5F.0020.0008.005A] # LATIN CAPITAL LETTER Z +017A ; [.0A5F.0032.0002.017A] # LATIN SMALL LETTER Z WITH ACUTE; CANONSEQ +0179 ; [.0A5F.0032.0008.0179] # LATIN CAPITAL LETTER Z WITH ACUTE; CANONSEQ +1E91 ; [.0A5F.003C.0002.1E91] # LATIN SMALL LETTER Z WITH CIRCUMFLEX; CANONSEQ +1E90 ; [.0A5F.003C.0008.1E90] # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX; CANONSEQ +017E ; [.0A5F.0041.0002.017E] # LATIN SMALL LETTER Z WITH CARON; CANONSEQ +017D ; [.0A5F.0041.0008.017D] # LATIN CAPITAL LETTER Z WITH CARON; CANONSEQ +017C ; [.0A5F.0052.0002.017C] # LATIN SMALL LETTER Z WITH DOT ABOVE; CANONSEQ +017B ; [.0A5F.0052.0008.017B] # LATIN CAPITAL LETTER Z WITH DOT ABOVE; CANONSEQ +1E93 ; [.0A5F.0079.0002.1E93] # LATIN SMALL LETTER Z WITH DOT BELOW; CANONSEQ +1E92 ; [.0A5F.0079.0008.1E92] # LATIN CAPITAL LETTER Z WITH DOT BELOW; CANONSEQ +1E95 ; [.0A5F.0089.0002.1E95] # LATIN SMALL LETTER Z WITH LINE BELOW; CANONSEQ +1E94 ; [.0A5F.0089.0008.1E94] # LATIN CAPITAL LETTER Z WITH LINE BELOW; CANONSEQ +01B6 ; [.0A63.0020.0002.01B6] # LATIN SMALL LETTER Z WITH STROKE +01B5 ; [.0A63.0020.0008.01B5] # LATIN CAPITAL LETTER Z WITH STROKE +0225 ; [.0A67.0020.0002.0225] # LATIN SMALL LETTER Z WITH HOOK +0224 ; [.0A67.0020.0008.0224] # LATIN CAPITAL LETTER Z WITH HOOK +0290 ; [.0A6B.0020.0002.0290] # LATIN SMALL LETTER Z WITH RETROFLEX HOOK +0291 ; [.0A6F.0020.0002.0291] # LATIN SMALL LETTER Z WITH CURL +0292 ; [.0A73.0020.0002.0292] # LATIN SMALL LETTER EZH +01B7 ; [.0A73.0020.0008.01B7] # LATIN CAPITAL LETTER EZH +01EF ; [.0A73.0041.0002.01EF] # LATIN SMALL LETTER EZH WITH CARON; CANONSEQ +01EE ; [.0A73.0041.0008.01EE] # LATIN CAPITAL LETTER EZH WITH CARON; CANONSEQ +01B9 ; [.0A77.0020.0002.01B9] # LATIN SMALL LETTER EZH REVERSED +01B8 ; [.0A77.0020.0008.01B8] # LATIN CAPITAL LETTER EZH REVERSED +01BA ; [.0A7B.0020.0002.01BA] # LATIN SMALL LETTER EZH WITH TAIL +0293 ; [.0A7F.0020.0002.0293] # LATIN SMALL LETTER EZH WITH CURL +021D ; [.0A83.0020.0002.021D] # LATIN SMALL LETTER YOGH +021C ; [.0A83.0020.0008.021C] # LATIN CAPITAL LETTER YOGH +00FE ; [.0A87.0020.0002.00FE] # LATIN SMALL LETTER THORN +00DE ; [.0A87.0020.0008.00DE] # LATIN CAPITAL LETTER THORN +01BF ; [.0A8B.0020.0002.01BF] # LATIN LETTER WYNN +01F7 ; [.0A8B.0020.0008.01F7] # LATIN CAPITAL LETTER WYNN +01BB ; [.0A8F.0020.0002.01BB] # LATIN LETTER TWO WITH STROKE +01A8 ; [.0A93.0020.0002.01A8] # LATIN SMALL LETTER TONE TWO +01A7 ; [.0A93.0020.0008.01A7] # LATIN CAPITAL LETTER TONE TWO +01BD ; [.0A97.0020.0002.01BD] # LATIN SMALL LETTER TONE FIVE +01BC ; [.0A97.0020.0008.01BC] # LATIN CAPITAL LETTER TONE FIVE +0185 ; [.0A9B.0020.0002.0185] # LATIN SMALL LETTER TONE SIX +0184 ; [.0A9B.0020.0008.0184] # LATIN CAPITAL LETTER TONE SIX +0294 ; [.0A9F.0020.0002.0294] # LATIN LETTER GLOTTAL STOP +02C0 ; [.0AA3.0020.0002.02C0] # MODIFIER LETTER GLOTTAL STOP +02BC ; [.0AA4.0020.0002.02BC] # MODIFIER LETTER APOSTROPHE +02EE ; [.0AA5.0020.0002.02EE] # MODIFIER LETTER DOUBLE APOSTROPHE +02BE ; [.0AA6.0020.0002.02BE] # MODIFIER LETTER RIGHT HALF RING +0295 ; [.0AA7.0020.0002.0295] # LATIN LETTER PHARYNGEAL VOICED FRICATIVE +02BF ; [.0AAB.0020.0002.02BF] # MODIFIER LETTER LEFT HALF RING +02C1 ; [.0AAC.0020.0002.02C1] # MODIFIER LETTER REVERSED GLOTTAL STOP +01BE ; [.0AAD.0020.0002.01BE] # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE +02A1 ; [.0AB1.0020.0002.02A1] # LATIN LETTER GLOTTAL STOP WITH STROKE +02A2 ; [.0AB5.0020.0002.02A2] # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE +0296 ; [.0AB9.0020.0002.0296] # LATIN LETTER INVERTED GLOTTAL STOP +01C0 ; [.0ABD.0020.0002.01C0] # LATIN LETTER DENTAL CLICK +01C1 ; [.0AC1.0020.0002.01C1] # LATIN LETTER LATERAL CLICK +01C2 ; [.0AC5.0020.0002.01C2] # LATIN LETTER ALVEOLAR CLICK +01C3 ; [.0AC9.0020.0002.01C3] # LATIN LETTER RETROFLEX CLICK +0297 ; [.0ACD.0020.0002.0297] # LATIN LETTER STRETCHED C +0298 ; [.0AD1.0020.0002.0298] # LATIN LETTER BILABIAL CLICK +02AC ; [.0AD5.0020.0002.02AC] # LATIN LETTER BILABIAL PERCUSSIVE +02AD ; [.0AD9.0020.0002.02AD] # LATIN LETTER BIDENTAL PERCUSSIVE +2126 ; [.0AF8.0020.0008.2126] # OHM SIGN; CANON +1100 ; [.1684.0020.0002.1100] # HANGUL CHOSEONG KIYEOK +1101 ; [.1685.0020.0002.1101] # HANGUL CHOSEONG SSANGKIYEOK +1102 ; [.1686.0020.0002.1102] # HANGUL CHOSEONG NIEUN +1103 ; [.1687.0020.0002.1103] # HANGUL CHOSEONG TIKEUT +1104 ; [.1688.0020.0002.1104] # HANGUL CHOSEONG SSANGTIKEUT +1105 ; [.1689.0020.0002.1105] # HANGUL CHOSEONG RIEUL +1106 ; [.168A.0020.0002.1106] # HANGUL CHOSEONG MIEUM +1107 ; [.168B.0020.0002.1107] # HANGUL CHOSEONG PIEUP +1108 ; [.168C.0020.0002.1108] # HANGUL CHOSEONG SSANGPIEUP +1109 ; [.168D.0020.0002.1109] # HANGUL CHOSEONG SIOS +110A ; [.168E.0020.0002.110A] # HANGUL CHOSEONG SSANGSIOS +110B ; [.168F.0020.0002.110B] # HANGUL CHOSEONG IEUNG +110C ; [.1690.0020.0002.110C] # HANGUL CHOSEONG CIEUC +110D ; [.1691.0020.0002.110D] # HANGUL CHOSEONG SSANGCIEUC +110E ; [.1692.0020.0002.110E] # HANGUL CHOSEONG CHIEUCH +110F ; [.1693.0020.0002.110F] # HANGUL CHOSEONG KHIEUKH +1110 ; [.1694.0020.0002.1110] # HANGUL CHOSEONG THIEUTH +1111 ; [.1695.0020.0002.1111] # HANGUL CHOSEONG PHIEUPH +1112 ; [.1696.0020.0002.1112] # HANGUL CHOSEONG HIEUH +1113 ; [.1697.0020.0002.1113] # HANGUL CHOSEONG NIEUN-KIYEOK +1114 ; [.1698.0020.0002.1114] # HANGUL CHOSEONG SSANGNIEUN +1115 ; [.1699.0020.0002.1115] # HANGUL CHOSEONG NIEUN-TIKEUT +1116 ; [.169A.0020.0002.1116] # HANGUL CHOSEONG NIEUN-PIEUP +1117 ; [.169B.0020.0002.1117] # HANGUL CHOSEONG TIKEUT-KIYEOK +1118 ; [.169C.0020.0002.1118] # HANGUL CHOSEONG RIEUL-NIEUN +1119 ; [.169D.0020.0002.1119] # HANGUL CHOSEONG SSANGRIEUL +111A ; [.169E.0020.0002.111A] # HANGUL CHOSEONG RIEUL-HIEUH +111B ; [.169F.0020.0002.111B] # HANGUL CHOSEONG KAPYEOUNRIEUL +111C ; [.16A0.0020.0002.111C] # HANGUL CHOSEONG MIEUM-PIEUP +111D ; [.16A1.0020.0002.111D] # HANGUL CHOSEONG KAPYEOUNMIEUM +111E ; [.16A2.0020.0002.111E] # HANGUL CHOSEONG PIEUP-KIYEOK +111F ; [.16A3.0020.0002.111F] # HANGUL CHOSEONG PIEUP-NIEUN +1120 ; [.16A4.0020.0002.1120] # HANGUL CHOSEONG PIEUP-TIKEUT +1121 ; [.16A5.0020.0002.1121] # HANGUL CHOSEONG PIEUP-SIOS +1122 ; [.16A6.0020.0002.1122] # HANGUL CHOSEONG PIEUP-SIOS-KIYEOK +1123 ; [.16A7.0020.0002.1123] # HANGUL CHOSEONG PIEUP-SIOS-TIKEUT +1124 ; [.16A8.0020.0002.1124] # HANGUL CHOSEONG PIEUP-SIOS-PIEUP +1125 ; [.16A9.0020.0002.1125] # HANGUL CHOSEONG PIEUP-SSANGSIOS +1126 ; [.16AA.0020.0002.1126] # HANGUL CHOSEONG PIEUP-SIOS-CIEUC +1127 ; [.16AB.0020.0002.1127] # HANGUL CHOSEONG PIEUP-CIEUC +1128 ; [.16AC.0020.0002.1128] # HANGUL CHOSEONG PIEUP-CHIEUCH +1129 ; [.16AD.0020.0002.1129] # HANGUL CHOSEONG PIEUP-THIEUTH +112A ; [.16AE.0020.0002.112A] # HANGUL CHOSEONG PIEUP-PHIEUPH +112B ; [.16AF.0020.0002.112B] # HANGUL CHOSEONG KAPYEOUNPIEUP +112C ; [.16B0.0020.0002.112C] # HANGUL CHOSEONG KAPYEOUNSSANGPIEUP +112D ; [.16B1.0020.0002.112D] # HANGUL CHOSEONG SIOS-KIYEOK +112E ; [.16B2.0020.0002.112E] # HANGUL CHOSEONG SIOS-NIEUN +112F ; [.16B3.0020.0002.112F] # HANGUL CHOSEONG SIOS-TIKEUT +1130 ; [.16B4.0020.0002.1130] # HANGUL CHOSEONG SIOS-RIEUL +1131 ; [.16B5.0020.0002.1131] # HANGUL CHOSEONG SIOS-MIEUM +1132 ; [.16B6.0020.0002.1132] # HANGUL CHOSEONG SIOS-PIEUP +1133 ; [.16B7.0020.0002.1133] # HANGUL CHOSEONG SIOS-PIEUP-KIYEOK +1134 ; [.16B8.0020.0002.1134] # HANGUL CHOSEONG SIOS-SSANGSIOS +1135 ; [.16B9.0020.0002.1135] # HANGUL CHOSEONG SIOS-IEUNG +1136 ; [.16BA.0020.0002.1136] # HANGUL CHOSEONG SIOS-CIEUC +1137 ; [.16BB.0020.0002.1137] # HANGUL CHOSEONG SIOS-CHIEUCH +1138 ; [.16BC.0020.0002.1138] # HANGUL CHOSEONG SIOS-KHIEUKH +1139 ; [.16BD.0020.0002.1139] # HANGUL CHOSEONG SIOS-THIEUTH +113A ; [.16BE.0020.0002.113A] # HANGUL CHOSEONG SIOS-PHIEUPH +113B ; [.16BF.0020.0002.113B] # HANGUL CHOSEONG SIOS-HIEUH +113C ; [.16C0.0020.0002.113C] # HANGUL CHOSEONG CHITUEUMSIOS +113D ; [.16C1.0020.0002.113D] # HANGUL CHOSEONG CHITUEUMSSANGSIOS +113E ; [.16C2.0020.0002.113E] # HANGUL CHOSEONG CEONGCHIEUMSIOS +113F ; [.16C3.0020.0002.113F] # HANGUL CHOSEONG CEONGCHIEUMSSANGSIOS +1140 ; [.16C4.0020.0002.1140] # HANGUL CHOSEONG PANSIOS +1141 ; [.16C5.0020.0002.1141] # HANGUL CHOSEONG IEUNG-KIYEOK +1142 ; [.16C6.0020.0002.1142] # HANGUL CHOSEONG IEUNG-TIKEUT +1143 ; [.16C7.0020.0002.1143] # HANGUL CHOSEONG IEUNG-MIEUM +1144 ; [.16C8.0020.0002.1144] # HANGUL CHOSEONG IEUNG-PIEUP +1145 ; [.16C9.0020.0002.1145] # HANGUL CHOSEONG IEUNG-SIOS +1146 ; [.16CA.0020.0002.1146] # HANGUL CHOSEONG IEUNG-PANSIOS +1147 ; [.16CB.0020.0002.1147] # HANGUL CHOSEONG SSANGIEUNG +1148 ; [.16CC.0020.0002.1148] # HANGUL CHOSEONG IEUNG-CIEUC +1149 ; [.16CD.0020.0002.1149] # HANGUL CHOSEONG IEUNG-CHIEUCH +114A ; [.16CE.0020.0002.114A] # HANGUL CHOSEONG IEUNG-THIEUTH +114B ; [.16CF.0020.0002.114B] # HANGUL CHOSEONG IEUNG-PHIEUPH +114C ; [.16D0.0020.0002.114C] # HANGUL CHOSEONG YESIEUNG +114D ; [.16D1.0020.0002.114D] # HANGUL CHOSEONG CIEUC-IEUNG +114E ; [.16D2.0020.0002.114E] # HANGUL CHOSEONG CHITUEUMCIEUC +114F ; [.16D3.0020.0002.114F] # HANGUL CHOSEONG CHITUEUMSSANGCIEUC +1150 ; [.16D4.0020.0002.1150] # HANGUL CHOSEONG CEONGCHIEUMCIEUC +1151 ; [.16D5.0020.0002.1151] # HANGUL CHOSEONG CEONGCHIEUMSSANGCIEUC +1152 ; [.16D6.0020.0002.1152] # HANGUL CHOSEONG CHIEUCH-KHIEUKH +1153 ; [.16D7.0020.0002.1153] # HANGUL CHOSEONG CHIEUCH-HIEUH +1154 ; [.16D8.0020.0002.1154] # HANGUL CHOSEONG CHITUEUMCHIEUCH +1155 ; [.16D9.0020.0002.1155] # HANGUL CHOSEONG CEONGCHIEUMCHIEUCH +1156 ; [.16DA.0020.0002.1156] # HANGUL CHOSEONG PHIEUPH-PIEUP +1157 ; [.16DB.0020.0002.1157] # HANGUL CHOSEONG KAPYEOUNPHIEUPH +1158 ; [.16DC.0020.0002.1158] # HANGUL CHOSEONG SSANGHIEUH +1159 ; [.16DD.0020.0002.1159] # HANGUL CHOSEONG YEORINHIEUH +115F ; [.16DE.0020.0002.115F] # HANGUL CHOSEONG FILLER +1160 ; [.16DF.0020.0002.1160] # HANGUL JUNGSEONG FILLER +1161 ; [.16E0.0020.0002.1161] # HANGUL JUNGSEONG A +1162 ; [.16E1.0020.0002.1162] # HANGUL JUNGSEONG AE +1163 ; [.16E2.0020.0002.1163] # HANGUL JUNGSEONG YA +1164 ; [.16E3.0020.0002.1164] # HANGUL JUNGSEONG YAE +1165 ; [.16E4.0020.0002.1165] # HANGUL JUNGSEONG EO +1166 ; [.16E5.0020.0002.1166] # HANGUL JUNGSEONG E +1167 ; [.16E6.0020.0002.1167] # HANGUL JUNGSEONG YEO +1168 ; [.16E7.0020.0002.1168] # HANGUL JUNGSEONG YE +1169 ; [.16E8.0020.0002.1169] # HANGUL JUNGSEONG O +116A ; [.16E9.0020.0002.116A] # HANGUL JUNGSEONG WA +116B ; [.16EA.0020.0002.116B] # HANGUL JUNGSEONG WAE +116C ; [.16EB.0020.0002.116C] # HANGUL JUNGSEONG OE +116D ; [.16EC.0020.0002.116D] # HANGUL JUNGSEONG YO +116E ; [.16ED.0020.0002.116E] # HANGUL JUNGSEONG U +116F ; [.16EE.0020.0002.116F] # HANGUL JUNGSEONG WEO +1170 ; [.16EF.0020.0002.1170] # HANGUL JUNGSEONG WE +1171 ; [.16F0.0020.0002.1171] # HANGUL JUNGSEONG WI +1172 ; [.16F1.0020.0002.1172] # HANGUL JUNGSEONG YU +1173 ; [.16F2.0020.0002.1173] # HANGUL JUNGSEONG EU +1174 ; [.16F3.0020.0002.1174] # HANGUL JUNGSEONG YI +1175 ; [.16F4.0020.0002.1175] # HANGUL JUNGSEONG I +1176 ; [.16F5.0020.0002.1176] # HANGUL JUNGSEONG A-O +1177 ; [.16F6.0020.0002.1177] # HANGUL JUNGSEONG A-U +1178 ; [.16F7.0020.0002.1178] # HANGUL JUNGSEONG YA-O +1179 ; [.16F8.0020.0002.1179] # HANGUL JUNGSEONG YA-YO +117A ; [.16F9.0020.0002.117A] # HANGUL JUNGSEONG EO-O +117B ; [.16FA.0020.0002.117B] # HANGUL JUNGSEONG EO-U +117C ; [.16FB.0020.0002.117C] # HANGUL JUNGSEONG EO-EU +117D ; [.16FC.0020.0002.117D] # HANGUL JUNGSEONG YEO-O +117E ; [.16FD.0020.0002.117E] # HANGUL JUNGSEONG YEO-U +117F ; [.16FE.0020.0002.117F] # HANGUL JUNGSEONG O-EO +1180 ; [.16FF.0020.0002.1180] # HANGUL JUNGSEONG O-E +1181 ; [.1700.0020.0002.1181] # HANGUL JUNGSEONG O-YE +1182 ; [.1701.0020.0002.1182] # HANGUL JUNGSEONG O-O +1183 ; [.1702.0020.0002.1183] # HANGUL JUNGSEONG O-U +1184 ; [.1703.0020.0002.1184] # HANGUL JUNGSEONG YO-YA +1185 ; [.1704.0020.0002.1185] # HANGUL JUNGSEONG YO-YAE +1186 ; [.1705.0020.0002.1186] # HANGUL JUNGSEONG YO-YEO +1187 ; [.1706.0020.0002.1187] # HANGUL JUNGSEONG YO-O +1188 ; [.1707.0020.0002.1188] # HANGUL JUNGSEONG YO-I +1189 ; [.1708.0020.0002.1189] # HANGUL JUNGSEONG U-A +118A ; [.1709.0020.0002.118A] # HANGUL JUNGSEONG U-AE +118B ; [.170A.0020.0002.118B] # HANGUL JUNGSEONG U-EO-EU +118C ; [.170B.0020.0002.118C] # HANGUL JUNGSEONG U-YE +118D ; [.170C.0020.0002.118D] # HANGUL JUNGSEONG U-U +118E ; [.170D.0020.0002.118E] # HANGUL JUNGSEONG YU-A +118F ; [.170E.0020.0002.118F] # HANGUL JUNGSEONG YU-EO +1190 ; [.170F.0020.0002.1190] # HANGUL JUNGSEONG YU-E +1191 ; [.1710.0020.0002.1191] # HANGUL JUNGSEONG YU-YEO +1192 ; [.1711.0020.0002.1192] # HANGUL JUNGSEONG YU-YE +1193 ; [.1712.0020.0002.1193] # HANGUL JUNGSEONG YU-U +1194 ; [.1713.0020.0002.1194] # HANGUL JUNGSEONG YU-I +1195 ; [.1714.0020.0002.1195] # HANGUL JUNGSEONG EU-U +1196 ; [.1715.0020.0002.1196] # HANGUL JUNGSEONG EU-EU +1197 ; [.1716.0020.0002.1197] # HANGUL JUNGSEONG YI-U +1198 ; [.1717.0020.0002.1198] # HANGUL JUNGSEONG I-A +1199 ; [.1718.0020.0002.1199] # HANGUL JUNGSEONG I-YA +119A ; [.1719.0020.0002.119A] # HANGUL JUNGSEONG I-O +119B ; [.171A.0020.0002.119B] # HANGUL JUNGSEONG I-U +119C ; [.171B.0020.0002.119C] # HANGUL JUNGSEONG I-EU +119D ; [.171C.0020.0002.119D] # HANGUL JUNGSEONG I-ARAEA +119E ; [.171D.0020.0002.119E] # HANGUL JUNGSEONG ARAEA +119F ; [.171E.0020.0002.119F] # HANGUL JUNGSEONG ARAEA-EO +11A0 ; [.171F.0020.0002.11A0] # HANGUL JUNGSEONG ARAEA-U +11A1 ; [.1720.0020.0002.11A1] # HANGUL JUNGSEONG ARAEA-I +11A2 ; [.1721.0020.0002.11A2] # HANGUL JUNGSEONG SSANGARAEA +11A8 ; [.1722.0020.0002.11A8] # HANGUL JONGSEONG KIYEOK +11A9 ; [.1723.0020.0002.11A9] # HANGUL JONGSEONG SSANGKIYEOK +11AA ; [.1724.0020.0002.11AA] # HANGUL JONGSEONG KIYEOK-SIOS +11AB ; [.1725.0020.0002.11AB] # HANGUL JONGSEONG NIEUN +11AC ; [.1726.0020.0002.11AC] # HANGUL JONGSEONG NIEUN-CIEUC +11AD ; [.1727.0020.0002.11AD] # HANGUL JONGSEONG NIEUN-HIEUH +11AE ; [.1728.0020.0002.11AE] # HANGUL JONGSEONG TIKEUT +11AF ; [.1729.0020.0002.11AF] # HANGUL JONGSEONG RIEUL +11B0 ; [.172A.0020.0002.11B0] # HANGUL JONGSEONG RIEUL-KIYEOK +11B1 ; [.172B.0020.0002.11B1] # HANGUL JONGSEONG RIEUL-MIEUM +11B2 ; [.172C.0020.0002.11B2] # HANGUL JONGSEONG RIEUL-PIEUP +11B3 ; [.172D.0020.0002.11B3] # HANGUL JONGSEONG RIEUL-SIOS +11B4 ; [.172E.0020.0002.11B4] # HANGUL JONGSEONG RIEUL-THIEUTH +11B5 ; [.172F.0020.0002.11B5] # HANGUL JONGSEONG RIEUL-PHIEUPH +11B6 ; [.1730.0020.0002.11B6] # HANGUL JONGSEONG RIEUL-HIEUH +11B7 ; [.1731.0020.0002.11B7] # HANGUL JONGSEONG MIEUM +11B8 ; [.1732.0020.0002.11B8] # HANGUL JONGSEONG PIEUP +11B9 ; [.1733.0020.0002.11B9] # HANGUL JONGSEONG PIEUP-SIOS +11BA ; [.1734.0020.0002.11BA] # HANGUL JONGSEONG SIOS +11BB ; [.1735.0020.0002.11BB] # HANGUL JONGSEONG SSANGSIOS +11BC ; [.1736.0020.0002.11BC] # HANGUL JONGSEONG IEUNG +11BD ; [.1737.0020.0002.11BD] # HANGUL JONGSEONG CIEUC +11BE ; [.1738.0020.0002.11BE] # HANGUL JONGSEONG CHIEUCH +11BF ; [.1739.0020.0002.11BF] # HANGUL JONGSEONG KHIEUKH +11C0 ; [.173A.0020.0002.11C0] # HANGUL JONGSEONG THIEUTH +11C1 ; [.173B.0020.0002.11C1] # HANGUL JONGSEONG PHIEUPH +11C2 ; [.173C.0020.0002.11C2] # HANGUL JONGSEONG HIEUH +11C3 ; [.173D.0020.0002.11C3] # HANGUL JONGSEONG KIYEOK-RIEUL +11C4 ; [.173E.0020.0002.11C4] # HANGUL JONGSEONG KIYEOK-SIOS-KIYEOK +11C5 ; [.173F.0020.0002.11C5] # HANGUL JONGSEONG NIEUN-KIYEOK +11C6 ; [.1740.0020.0002.11C6] # HANGUL JONGSEONG NIEUN-TIKEUT +11C7 ; [.1741.0020.0002.11C7] # HANGUL JONGSEONG NIEUN-SIOS +11C8 ; [.1742.0020.0002.11C8] # HANGUL JONGSEONG NIEUN-PANSIOS +11C9 ; [.1743.0020.0002.11C9] # HANGUL JONGSEONG NIEUN-THIEUTH +11CA ; [.1744.0020.0002.11CA] # HANGUL JONGSEONG TIKEUT-KIYEOK +11CB ; [.1745.0020.0002.11CB] # HANGUL JONGSEONG TIKEUT-RIEUL +11CC ; [.1746.0020.0002.11CC] # HANGUL JONGSEONG RIEUL-KIYEOK-SIOS +11CD ; [.1747.0020.0002.11CD] # HANGUL JONGSEONG RIEUL-NIEUN +11CE ; [.1748.0020.0002.11CE] # HANGUL JONGSEONG RIEUL-TIKEUT +11CF ; [.1749.0020.0002.11CF] # HANGUL JONGSEONG RIEUL-TIKEUT-HIEUH +11D0 ; [.174A.0020.0002.11D0] # HANGUL JONGSEONG SSANGRIEUL +11D1 ; [.174B.0020.0002.11D1] # HANGUL JONGSEONG RIEUL-MIEUM-KIYEOK +11D2 ; [.174C.0020.0002.11D2] # HANGUL JONGSEONG RIEUL-MIEUM-SIOS +11D3 ; [.174D.0020.0002.11D3] # HANGUL JONGSEONG RIEUL-PIEUP-SIOS +11D4 ; [.174E.0020.0002.11D4] # HANGUL JONGSEONG RIEUL-PIEUP-HIEUH +11D5 ; [.174F.0020.0002.11D5] # HANGUL JONGSEONG RIEUL-KAPYEOUNPIEUP +11D6 ; [.1750.0020.0002.11D6] # HANGUL JONGSEONG RIEUL-SSANGSIOS +11D7 ; [.1751.0020.0002.11D7] # HANGUL JONGSEONG RIEUL-PANSIOS +11D8 ; [.1752.0020.0002.11D8] # HANGUL JONGSEONG RIEUL-KHIEUKH +11D9 ; [.1753.0020.0002.11D9] # HANGUL JONGSEONG RIEUL-YEORINHIEUH +11DA ; [.1754.0020.0002.11DA] # HANGUL JONGSEONG MIEUM-KIYEOK +11DB ; [.1755.0020.0002.11DB] # HANGUL JONGSEONG MIEUM-RIEUL +11DC ; [.1756.0020.0002.11DC] # HANGUL JONGSEONG MIEUM-PIEUP +11DD ; [.1757.0020.0002.11DD] # HANGUL JONGSEONG MIEUM-SIOS +11DE ; [.1758.0020.0002.11DE] # HANGUL JONGSEONG MIEUM-SSANGSIOS +11DF ; [.1759.0020.0002.11DF] # HANGUL JONGSEONG MIEUM-PANSIOS +11E0 ; [.175A.0020.0002.11E0] # HANGUL JONGSEONG MIEUM-CHIEUCH +11E1 ; [.175B.0020.0002.11E1] # HANGUL JONGSEONG MIEUM-HIEUH +11E2 ; [.175C.0020.0002.11E2] # HANGUL JONGSEONG KAPYEOUNMIEUM +11E3 ; [.175D.0020.0002.11E3] # HANGUL JONGSEONG PIEUP-RIEUL +11E4 ; [.175E.0020.0002.11E4] # HANGUL JONGSEONG PIEUP-PHIEUPH +11E5 ; [.175F.0020.0002.11E5] # HANGUL JONGSEONG PIEUP-HIEUH +11E6 ; [.1760.0020.0002.11E6] # HANGUL JONGSEONG KAPYEOUNPIEUP +11E7 ; [.1761.0020.0002.11E7] # HANGUL JONGSEONG SIOS-KIYEOK +11E8 ; [.1762.0020.0002.11E8] # HANGUL JONGSEONG SIOS-TIKEUT +11E9 ; [.1763.0020.0002.11E9] # HANGUL JONGSEONG SIOS-RIEUL +11EA ; [.1764.0020.0002.11EA] # HANGUL JONGSEONG SIOS-PIEUP +11EB ; [.1765.0020.0002.11EB] # HANGUL JONGSEONG PANSIOS +11EC ; [.1766.0020.0002.11EC] # HANGUL JONGSEONG IEUNG-KIYEOK +11ED ; [.1767.0020.0002.11ED] # HANGUL JONGSEONG IEUNG-SSANGKIYEOK +11EE ; [.1768.0020.0002.11EE] # HANGUL JONGSEONG SSANGIEUNG +11EF ; [.1769.0020.0002.11EF] # HANGUL JONGSEONG IEUNG-KHIEUKH +11F0 ; [.176A.0020.0002.11F0] # HANGUL JONGSEONG YESIEUNG +11F1 ; [.176B.0020.0002.11F1] # HANGUL JONGSEONG YESIEUNG-SIOS +11F2 ; [.176C.0020.0002.11F2] # HANGUL JONGSEONG YESIEUNG-PANSIOS +11F3 ; [.176D.0020.0002.11F3] # HANGUL JONGSEONG PHIEUPH-PIEUP +11F4 ; [.176E.0020.0002.11F4] # HANGUL JONGSEONG KAPYEOUNPHIEUPH +11F5 ; [.176F.0020.0002.11F5] # HANGUL JONGSEONG HIEUH-NIEUN +11F6 ; [.1770.0020.0002.11F6] # HANGUL JONGSEONG HIEUH-RIEUL +11F7 ; [.1771.0020.0002.11F7] # HANGUL JONGSEONG HIEUH-MIEUM +11F8 ; [.1772.0020.0002.11F8] # HANGUL JONGSEONG HIEUH-PIEUP +11F9 ; [.1773.0020.0002.11F9] # HANGUL JONGSEONG YEORINHIEUH +3041 ; [.1774.0020.000D.3041] # HIRAGANA LETTER SMALL A +3042 ; [.1774.0020.000E.3042] # HIRAGANA LETTER A +30A1 ; [.1774.0020.000F.30A1] # KATAKANA LETTER SMALL A +30A2 ; [.1774.0020.0011.30A2] # KATAKANA LETTER A +3043 ; [.1775.0020.000D.3043] # HIRAGANA LETTER SMALL I +3044 ; [.1775.0020.000E.3044] # HIRAGANA LETTER I +30A3 ; [.1775.0020.000F.30A3] # KATAKANA LETTER SMALL I +30A4 ; [.1775.0020.0011.30A4] # KATAKANA LETTER I +3045 ; [.1776.0020.000D.3045] # HIRAGANA LETTER SMALL U +3046 ; [.1776.0020.000E.3046] # HIRAGANA LETTER U +30A5 ; [.1776.0020.000F.30A5] # KATAKANA LETTER SMALL U +30A6 ; [.1776.0020.0011.30A6] # KATAKANA LETTER U +3094 ; [.1776.013E.000E.3094] # HIRAGANA LETTER VU; CANONSEQ +30F4 ; [.1776.013E.0011.30F4] # KATAKANA LETTER VU; CANONSEQ +3047 ; [.1777.0020.000D.3047] # HIRAGANA LETTER SMALL E +3048 ; [.1777.0020.000E.3048] # HIRAGANA LETTER E +30A7 ; [.1777.0020.000F.30A7] # KATAKANA LETTER SMALL E +30A8 ; [.1777.0020.0011.30A8] # KATAKANA LETTER E +3049 ; [.1778.0020.000D.3049] # HIRAGANA LETTER SMALL O +304A ; [.1778.0020.000E.304A] # HIRAGANA LETTER O +30A9 ; [.1778.0020.000F.30A9] # KATAKANA LETTER SMALL O +30AA ; [.1778.0020.0011.30AA] # KATAKANA LETTER O +304B ; [.1779.0020.000E.304B] # HIRAGANA LETTER KA +30F5 ; [.1779.0020.000F.30F5] # KATAKANA LETTER SMALL KA +30AB ; [.1779.0020.0011.30AB] # KATAKANA LETTER KA +304C ; [.1779.013E.000E.304C] # HIRAGANA LETTER GA; CANONSEQ +30AC ; [.1779.013E.0011.30AC] # KATAKANA LETTER GA; CANONSEQ +304D ; [.177A.0020.000E.304D] # HIRAGANA LETTER KI +30AD ; [.177A.0020.0011.30AD] # KATAKANA LETTER KI +304E ; [.177A.013E.000E.304E] # HIRAGANA LETTER GI; CANONSEQ +30AE ; [.177A.013E.0011.30AE] # KATAKANA LETTER GI; CANONSEQ +304F ; [.177B.0020.000E.304F] # HIRAGANA LETTER KU +30AF ; [.177B.0020.0011.30AF] # KATAKANA LETTER KU +3050 ; [.177B.013E.000E.3050] # HIRAGANA LETTER GU; CANONSEQ +30B0 ; [.177B.013E.0011.30B0] # KATAKANA LETTER GU; CANONSEQ +3051 ; [.177C.0020.000E.3051] # HIRAGANA LETTER KE +30F6 ; [.177C.0020.000F.30F6] # KATAKANA LETTER SMALL KE +30B1 ; [.177C.0020.0011.30B1] # KATAKANA LETTER KE +3052 ; [.177C.013E.000E.3052] # HIRAGANA LETTER GE; CANONSEQ +30B2 ; [.177C.013E.0011.30B2] # KATAKANA LETTER GE; CANONSEQ +3053 ; [.177D.0020.000E.3053] # HIRAGANA LETTER KO +30B3 ; [.177D.0020.0011.30B3] # KATAKANA LETTER KO +3054 ; [.177D.013E.000E.3054] # HIRAGANA LETTER GO; CANONSEQ +30B4 ; [.177D.013E.0011.30B4] # KATAKANA LETTER GO; CANONSEQ +3055 ; [.177E.0020.000E.3055] # HIRAGANA LETTER SA +30B5 ; [.177E.0020.0011.30B5] # KATAKANA LETTER SA +3056 ; [.177E.013E.000E.3056] # HIRAGANA LETTER ZA; CANONSEQ +30B6 ; [.177E.013E.0011.30B6] # KATAKANA LETTER ZA; CANONSEQ +3057 ; [.177F.0020.000E.3057] # HIRAGANA LETTER SI +30B7 ; [.177F.0020.0011.30B7] # KATAKANA LETTER SI +3058 ; [.177F.013E.000E.3058] # HIRAGANA LETTER ZI; CANONSEQ +30B8 ; [.177F.013E.0011.30B8] # KATAKANA LETTER ZI; CANONSEQ +3059 ; [.1780.0020.000E.3059] # HIRAGANA LETTER SU +30B9 ; [.1780.0020.0011.30B9] # KATAKANA LETTER SU +305A ; [.1780.013E.000E.305A] # HIRAGANA LETTER ZU; CANONSEQ +30BA ; [.1780.013E.0011.30BA] # KATAKANA LETTER ZU; CANONSEQ +305B ; [.1781.0020.000E.305B] # HIRAGANA LETTER SE +30BB ; [.1781.0020.0011.30BB] # KATAKANA LETTER SE +305C ; [.1781.013E.000E.305C] # HIRAGANA LETTER ZE; CANONSEQ +30BC ; [.1781.013E.0011.30BC] # KATAKANA LETTER ZE; CANONSEQ +305D ; [.1782.0020.000E.305D] # HIRAGANA LETTER SO +30BD ; [.1782.0020.0011.30BD] # KATAKANA LETTER SO +305E ; [.1782.013E.000E.305E] # HIRAGANA LETTER ZO; CANONSEQ +30BE ; [.1782.013E.0011.30BE] # KATAKANA LETTER ZO; CANONSEQ +305F ; [.1783.0020.000E.305F] # HIRAGANA LETTER TA +30BF ; [.1783.0020.0011.30BF] # KATAKANA LETTER TA +3060 ; [.1783.013E.000E.3060] # HIRAGANA LETTER DA; CANONSEQ +30C0 ; [.1783.013E.0011.30C0] # KATAKANA LETTER DA; CANONSEQ +3061 ; [.1784.0020.000E.3061] # HIRAGANA LETTER TI +30C1 ; [.1784.0020.0011.30C1] # KATAKANA LETTER TI +3062 ; [.1784.013E.000E.3062] # HIRAGANA LETTER DI; CANONSEQ +30C2 ; [.1784.013E.0011.30C2] # KATAKANA LETTER DI; CANONSEQ +3063 ; [.1785.0020.000D.3063] # HIRAGANA LETTER SMALL TU +3064 ; [.1785.0020.000E.3064] # HIRAGANA LETTER TU +30C3 ; [.1785.0020.000F.30C3] # KATAKANA LETTER SMALL TU +30C4 ; [.1785.0020.0011.30C4] # KATAKANA LETTER TU +3065 ; [.1785.013E.000E.3065] # HIRAGANA LETTER DU; CANONSEQ +30C5 ; [.1785.013E.0011.30C5] # KATAKANA LETTER DU; CANONSEQ +3066 ; [.1786.0020.000E.3066] # HIRAGANA LETTER TE +30C6 ; [.1786.0020.0011.30C6] # KATAKANA LETTER TE +3067 ; [.1786.013E.000E.3067] # HIRAGANA LETTER DE; CANONSEQ +30C7 ; [.1786.013E.0011.30C7] # KATAKANA LETTER DE; CANONSEQ +3068 ; [.1787.0020.000E.3068] # HIRAGANA LETTER TO +30C8 ; [.1787.0020.0011.30C8] # KATAKANA LETTER TO +3069 ; [.1787.013E.000E.3069] # HIRAGANA LETTER DO; CANONSEQ +30C9 ; [.1787.013E.0011.30C9] # KATAKANA LETTER DO; CANONSEQ +306A ; [.1788.0020.000E.306A] # HIRAGANA LETTER NA +30CA ; [.1788.0020.0011.30CA] # KATAKANA LETTER NA +306B ; [.1789.0020.000E.306B] # HIRAGANA LETTER NI +30CB ; [.1789.0020.0011.30CB] # KATAKANA LETTER NI +306C ; [.178A.0020.000E.306C] # HIRAGANA LETTER NU +30CC ; [.178A.0020.0011.30CC] # KATAKANA LETTER NU +306D ; [.178B.0020.000E.306D] # HIRAGANA LETTER NE +30CD ; [.178B.0020.0011.30CD] # KATAKANA LETTER NE +306E ; [.178C.0020.000E.306E] # HIRAGANA LETTER NO +30CE ; [.178C.0020.0011.30CE] # KATAKANA LETTER NO +306F ; [.178D.0020.000E.306F] # HIRAGANA LETTER HA +30CF ; [.178D.0020.0011.30CF] # KATAKANA LETTER HA +3070 ; [.178D.013E.000E.3070] # HIRAGANA LETTER BA; CANONSEQ +30D0 ; [.178D.013E.0011.30D0] # KATAKANA LETTER BA; CANONSEQ +3071 ; [.178D.013F.000E.3071] # HIRAGANA LETTER PA; CANONSEQ +30D1 ; [.178D.013F.0011.30D1] # KATAKANA LETTER PA; CANONSEQ +3072 ; [.178E.0020.000E.3072] # HIRAGANA LETTER HI +30D2 ; [.178E.0020.0011.30D2] # KATAKANA LETTER HI +3073 ; [.178E.013E.000E.3073] # HIRAGANA LETTER BI; CANONSEQ +30D3 ; [.178E.013E.0011.30D3] # KATAKANA LETTER BI; CANONSEQ +3074 ; [.178E.013F.000E.3074] # HIRAGANA LETTER PI; CANONSEQ +30D4 ; [.178E.013F.0011.30D4] # KATAKANA LETTER PI; CANONSEQ +3075 ; [.178F.0020.000E.3075] # HIRAGANA LETTER HU +30D5 ; [.178F.0020.0011.30D5] # KATAKANA LETTER HU +3076 ; [.178F.013E.000E.3076] # HIRAGANA LETTER BU; CANONSEQ +30D6 ; [.178F.013E.0011.30D6] # KATAKANA LETTER BU; CANONSEQ +3077 ; [.178F.013F.000E.3077] # HIRAGANA LETTER PU; CANONSEQ +30D7 ; [.178F.013F.0011.30D7] # KATAKANA LETTER PU; CANONSEQ +3078 ; [.1790.0020.000E.3078] # HIRAGANA LETTER HE +30D8 ; [.1790.0020.0011.30D8] # KATAKANA LETTER HE +3079 ; [.1790.013E.000E.3079] # HIRAGANA LETTER BE; CANONSEQ +30D9 ; [.1790.013E.0011.30D9] # KATAKANA LETTER BE; CANONSEQ +307A ; [.1790.013F.000E.307A] # HIRAGANA LETTER PE; CANONSEQ +30DA ; [.1790.013F.0011.30DA] # KATAKANA LETTER PE; CANONSEQ +307B ; [.1791.0020.000E.307B] # HIRAGANA LETTER HO +30DB ; [.1791.0020.0011.30DB] # KATAKANA LETTER HO +307C ; [.1791.013E.000E.307C] # HIRAGANA LETTER BO; CANONSEQ +30DC ; [.1791.013E.0011.30DC] # KATAKANA LETTER BO; CANONSEQ +307D ; [.1791.013F.000E.307D] # HIRAGANA LETTER PO; CANONSEQ +30DD ; [.1791.013F.0011.30DD] # KATAKANA LETTER PO; CANONSEQ +307E ; [.1792.0020.000E.307E] # HIRAGANA LETTER MA +30DE ; [.1792.0020.0011.30DE] # KATAKANA LETTER MA +307F ; [.1793.0020.000E.307F] # HIRAGANA LETTER MI +30DF ; [.1793.0020.0011.30DF] # KATAKANA LETTER MI +3080 ; [.1794.0020.000E.3080] # HIRAGANA LETTER MU +30E0 ; [.1794.0020.0011.30E0] # KATAKANA LETTER MU +3081 ; [.1795.0020.000E.3081] # HIRAGANA LETTER ME +30E1 ; [.1795.0020.0011.30E1] # KATAKANA LETTER ME +3082 ; [.1796.0020.000E.3082] # HIRAGANA LETTER MO +30E2 ; [.1796.0020.0011.30E2] # KATAKANA LETTER MO +3083 ; [.1797.0020.000D.3083] # HIRAGANA LETTER SMALL YA +3084 ; [.1797.0020.000E.3084] # HIRAGANA LETTER YA +30E3 ; [.1797.0020.000F.30E3] # KATAKANA LETTER SMALL YA +30E4 ; [.1797.0020.0011.30E4] # KATAKANA LETTER YA +3085 ; [.1798.0020.000D.3085] # HIRAGANA LETTER SMALL YU +3086 ; [.1798.0020.000E.3086] # HIRAGANA LETTER YU +30E5 ; [.1798.0020.000F.30E5] # KATAKANA LETTER SMALL YU +30E6 ; [.1798.0020.0011.30E6] # KATAKANA LETTER YU +3087 ; [.1799.0020.000D.3087] # HIRAGANA LETTER SMALL YO +3088 ; [.1799.0020.000E.3088] # HIRAGANA LETTER YO +30E7 ; [.1799.0020.000F.30E7] # KATAKANA LETTER SMALL YO +30E8 ; [.1799.0020.0011.30E8] # KATAKANA LETTER YO +3089 ; [.179A.0020.000E.3089] # HIRAGANA LETTER RA +30E9 ; [.179A.0020.0011.30E9] # KATAKANA LETTER RA +308A ; [.179B.0020.000E.308A] # HIRAGANA LETTER RI +30EA ; [.179B.0020.0011.30EA] # KATAKANA LETTER RI +308B ; [.179C.0020.000E.308B] # HIRAGANA LETTER RU +30EB ; [.179C.0020.0011.30EB] # KATAKANA LETTER RU +308C ; [.179D.0020.000E.308C] # HIRAGANA LETTER RE +30EC ; [.179D.0020.0011.30EC] # KATAKANA LETTER RE +308D ; [.179E.0020.000E.308D] # HIRAGANA LETTER RO +30ED ; [.179E.0020.0011.30ED] # KATAKANA LETTER RO +308E ; [.179F.0020.000D.308E] # HIRAGANA LETTER SMALL WA +308F ; [.179F.0020.000E.308F] # HIRAGANA LETTER WA +30EE ; [.179F.0020.000F.30EE] # KATAKANA LETTER SMALL WA +30EF ; [.179F.0020.0011.30EF] # KATAKANA LETTER WA +30F7 ; [.179F.013E.0011.30F7] # KATAKANA LETTER VA; CANONSEQ +3090 ; [.17A0.0020.000E.3090] # HIRAGANA LETTER WI +30F0 ; [.17A0.0020.0011.30F0] # KATAKANA LETTER WI +30F8 ; [.17A0.013E.0011.30F8] # KATAKANA LETTER VI; CANONSEQ +3091 ; [.17A1.0020.000E.3091] # HIRAGANA LETTER WE +30F1 ; [.17A1.0020.0011.30F1] # KATAKANA LETTER WE +30F9 ; [.17A1.013E.0011.30F9] # KATAKANA LETTER VE; CANONSEQ +3092 ; [.17A2.0020.000E.3092] # HIRAGANA LETTER WO +30F2 ; [.17A2.0020.0011.30F2] # KATAKANA LETTER WO +30FA ; [.17A2.013E.0011.30FA] # KATAKANA LETTER VO; CANONSEQ +3093 ; [.17A3.0020.000E.3093] # HIRAGANA LETTER N +30F3 ; [.17A3.0020.0011.30F3] # KATAKANA LETTER N +3105 ; [.17A4.0020.0002.3105] # BOPOMOFO LETTER B +3106 ; [.17A5.0020.0002.3106] # BOPOMOFO LETTER P +3107 ; [.17A6.0020.0002.3107] # BOPOMOFO LETTER M +3108 ; [.17A7.0020.0002.3108] # BOPOMOFO LETTER F +312A ; [.17A8.0020.0002.312A] # BOPOMOFO LETTER V +3109 ; [.17A9.0020.0002.3109] # BOPOMOFO LETTER D +310A ; [.17AA.0020.0002.310A] # BOPOMOFO LETTER T +310B ; [.17AB.0020.0002.310B] # BOPOMOFO LETTER N +310C ; [.17AC.0020.0002.310C] # BOPOMOFO LETTER L +310D ; [.17AD.0020.0002.310D] # BOPOMOFO LETTER G +310E ; [.17AE.0020.0002.310E] # BOPOMOFO LETTER K +312B ; [.17AF.0020.0002.312B] # BOPOMOFO LETTER NG +31AD ; [.17B0.0020.0002.31AD] # BOPOMOFO LETTER NGG +310F ; [.17B1.0020.0002.310F] # BOPOMOFO LETTER H +3110 ; [.17B2.0020.0002.3110] # BOPOMOFO LETTER J +3111 ; [.17B3.0020.0002.3111] # BOPOMOFO LETTER Q +3112 ; [.17B4.0020.0002.3112] # BOPOMOFO LETTER X +312C ; [.17B5.0020.0002.312C] # BOPOMOFO LETTER GN +3113 ; [.17B6.0020.0002.3113] # BOPOMOFO LETTER ZH +3114 ; [.17B7.0020.0002.3114] # BOPOMOFO LETTER CH +3115 ; [.17B8.0020.0002.3115] # BOPOMOFO LETTER SH +3116 ; [.17B9.0020.0002.3116] # BOPOMOFO LETTER R +3117 ; [.17BA.0020.0002.3117] # BOPOMOFO LETTER Z +3118 ; [.17BB.0020.0002.3118] # BOPOMOFO LETTER C +3119 ; [.17BC.0020.0002.3119] # BOPOMOFO LETTER S +311A ; [.17BD.0020.0002.311A] # BOPOMOFO LETTER A +311B ; [.17BE.0020.0002.311B] # BOPOMOFO LETTER O +31A6 ; [.17BF.0020.0002.31A6] # BOPOMOFO LETTER OO +311C ; [.17C0.0020.0002.311C] # BOPOMOFO LETTER E +311D ; [.17C1.0020.0002.311D] # BOPOMOFO LETTER EH +31A4 ; [.17C2.0020.0002.31A4] # BOPOMOFO LETTER EE +311E ; [.17C3.0020.0002.311E] # BOPOMOFO LETTER AI +311F ; [.17C4.0020.0002.311F] # BOPOMOFO LETTER EI +3120 ; [.17C5.0020.0002.3120] # BOPOMOFO LETTER AU +3121 ; [.17C6.0020.0002.3121] # BOPOMOFO LETTER OU +3122 ; [.17C7.0020.0002.3122] # BOPOMOFO LETTER AN +3123 ; [.17C8.0020.0002.3123] # BOPOMOFO LETTER EN +3124 ; [.17C9.0020.0002.3124] # BOPOMOFO LETTER ANG +31B2 ; [.17CA.0020.0002.31B2] # BOPOMOFO LETTER ONG +3125 ; [.17CB.0020.0002.3125] # BOPOMOFO LETTER ENG +31B0 ; [.17CC.0020.0002.31B0] # BOPOMOFO LETTER AM +31B1 ; [.17CD.0020.0002.31B1] # BOPOMOFO LETTER OM +31AC ; [.17CE.0020.0002.31AC] # BOPOMOFO LETTER IM +3126 ; [.17CF.0020.0002.3126] # BOPOMOFO LETTER ER +3127 ; [.17D0.0020.0002.3127] # BOPOMOFO LETTER I +3128 ; [.17D1.0020.0002.3128] # BOPOMOFO LETTER U +3129 ; [.17D2.0020.0002.3129] # BOPOMOFO LETTER IU diff --git a/gnu/usr.bin/perl/lib/Unicode/Collate/t/test.t b/gnu/usr.bin/perl/lib/Unicode/Collate/t/test.t new file mode 100644 index 00000000000..f5a7012ea95 --- /dev/null +++ b/gnu/usr.bin/perl/lib/Unicode/Collate/t/test.t @@ -0,0 +1,605 @@ +# Before `make install' is performed this script should be runnable with +# `make test'. After `make install' it should work as `perl test.pl' + +######################### + +BEGIN { + if (ord("A") == 193) { + print "1..0 # Unicode::Collate not ported to EBCDIC\n"; + exit 0; + } +} + +use Test; +BEGIN { plan tests => 160 }; +use Unicode::Collate; +ok(1); # If we made it this far, we're ok. + +######################### + +my $UCA_Version = "8.0"; + +ok(Unicode::Collate::UCA_Version, $UCA_Version); +ok(Unicode::Collate->UCA_Version, $UCA_Version); + +my $Collator = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, +); + +ok(ref $Collator, "Unicode::Collate"); + +ok($Collator->UCA_Version, $UCA_Version); +ok($Collator->UCA_Version(), $UCA_Version); + +ok( + join(':', $Collator->sort( + qw/ lib strict Carp ExtUtils CGI Time warnings Math overload Pod CPAN / + ) ), + join(':', + qw/ Carp CGI CPAN ExtUtils lib Math overload Pod strict Time warnings / + ), +); + +my $A_acute = pack('U', 0x00C1); +my $acute = pack('U', 0x0301); + +ok($Collator->cmp("A$acute", $A_acute), -1); +ok($Collator->cmp("", ""), 0); +ok(! $Collator->ne("", "") ); +ok( $Collator->eq("", "") ); +ok($Collator->cmp("", "perl"), -1); + +############## + +eval { require Unicode::Normalize }; + +if (!$@) { + my $NFD = Unicode::Collate->new( + table => 'keys.txt', + entry => <<'ENTRIES', +0430 ; [.0B01.0020.0002.0430] # CYRILLIC SMALL LETTER A +0410 ; [.0B01.0020.0008.0410] # CYRILLIC CAPITAL LETTER A +04D3 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS +0430 0308 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS +04D3 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS +0430 0308 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS +04D2 ; [.0B09.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS +0410 0308 ; [.0B09.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS +0430 3099 ; [.0B10.0020.0002.04D3] # A WITH KATAKANA VOICED +0430 3099 0308 ; [.0B11.0020.0002.04D3] # A WITH KATAKANA VOICED, DIAERESIS +ENTRIES + ); + ok($NFD->eq("A$acute", $A_acute)); + ok($NFD->eq("\x{4D3}\x{325}", "\x{430}\x{308}\x{325}")); + ok($NFD->lt("\x{430}\x{308}A", "\x{430}\x{308}B")); + ok($NFD->lt("\x{430}\x{3099}B", "\x{430}\x{308}\x{3099}A")); + ok($NFD->eq("\x{0430}\x{3099}\x{309A}\x{0308}", + "\x{0430}\x{309A}\x{3099}\x{0308}") ); +} +else { + ok(1); + ok(1); + ok(1); + ok(1); + ok(1); +} + +############## + +my $trad = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/, + level => 4, + entry => << 'ENTRIES', + 0063 0068 ; [.0893.0020.0002.0063] % "ch" in traditional Spanish + 0043 0068 ; [.0893.0020.0008.0043] # "Ch" in traditional Spanish + 00DF ; [.09F3.0154.0004.00DF] [.09F3.0020.0004.00DF] # eszet in Germany +ENTRIES +); + +ok( + join(':', $trad->sort( qw/ acha aca ada acia acka / ) ), + join(':', qw/ aca acia acka acha ada / ), +); + +ok( + join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ), + join(':', qw/ aca acha acia acka ada / ), +); + +my $hiragana = "\x{3042}\x{3044}"; +my $katakana = "\x{30A2}\x{30A4}"; + +# HIRAGANA and KATAKANA are ignorable via ignoreName +ok($trad->eq($hiragana, "")); +ok($trad->eq("", $katakana)); +ok($trad->eq($hiragana, $katakana)); +ok($trad->eq($katakana, $hiragana)); + +############## + +my $old_level = $Collator->{level}; + +$Collator->{level} = 2; + +ok( $Collator->cmp("ABC","abc"), 0); +ok( $Collator->eq("ABC","abc") ); +ok( $Collator->le("ABC","abc") ); +ok( $Collator->cmp($hiragana, $katakana), 0); +ok( $Collator->eq($hiragana, $katakana) ); +ok( $Collator->ge($hiragana, $katakana) ); + +# hangul +ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") ); +ok( $Collator->eq("a\x{AE00}b", "a\x{1100}\x{1173}\x{11AF}b") ); +ok( $Collator->gt("a\x{AE00}b", "a\x{1100}\x{1173}b\x{11AF}") ); +ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") ); +ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") ); +ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana + +$Collator->{level} = $old_level; + +$Collator->{katakana_before_hiragana} = 1; + +ok( $Collator->cmp("abc", "ABC"), -1); +ok( $Collator->ne("abc", "ABC") ); +ok( $Collator->lt("abc", "ABC") ); +ok( $Collator->le("abc", "ABC") ); +ok( $Collator->cmp($hiragana, $katakana), 1); +ok( $Collator->ne($hiragana, $katakana) ); +ok( $Collator->gt($hiragana, $katakana) ); +ok( $Collator->ge($hiragana, $katakana) ); + +$Collator->{upper_before_lower} = 1; + +ok( $Collator->cmp("abc", "ABC"), 1); +ok( $Collator->ge("abc", "ABC"), 1); +ok( $Collator->gt("abc", "ABC"), 1); +ok( $Collator->cmp($hiragana, $katakana), 1); +ok( $Collator->ge($hiragana, $katakana), 1); +ok( $Collator->gt($hiragana, $katakana), 1); + +$Collator->{katakana_before_hiragana} = 0; + +ok( $Collator->cmp("abc", "ABC"), 1); +ok( $Collator->cmp($hiragana, $katakana), -1); + +$Collator->{upper_before_lower} = 0; + +ok( $Collator->cmp("abc", "ABC"), -1); +ok( $Collator->le("abc", "ABC") ); +ok( $Collator->cmp($hiragana, $katakana), -1); +ok( $Collator->lt($hiragana, $katakana) ); + +############## + +my $ignoreAE = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + ignoreChar => qr/^[aAeE]$/, +); + +ok($ignoreAE->eq("element","lament")); +ok($ignoreAE->eq("Perl","ePrl")); + +############## + +my $onlyABC = Unicode::Collate->new( + table => undef, + normalization => undef, + entry => << 'ENTRIES', +0061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A +0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A +0062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B +0042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B +0063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C +0043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C +ENTRIES +); + +ok( + join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ), + join(':', qw/ A aB Ab ABA BAC cAc cc / ), +); + +############## + +my $undefAE = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + undefChar => qr/^[aAeE]$/, +); + +ok($undefAE ->gt("edge","fog")); +ok($Collator->lt("edge","fog")); +ok($undefAE ->gt("lake","like")); +ok($Collator->lt("lake","like")); + +############## + +$Collator->{level} = 2; + +my $str; + +my $orig = "This is a Perl book."; +my $sub = "PERL"; +my $rep = "camel"; +my $ret = "This is a camel book."; + +$str = $orig; +if (my($pos,$len) = $Collator->index($str, $sub)) { + substr($str, $pos, $len, $rep); +} + +ok($str, $ret); + +$Collator->{level} = $old_level; + +$str = $orig; +if (my($pos,$len) = $Collator->index($str, $sub)) { + substr($str, $pos, $len, $rep); +} + +ok($str, $orig); + +############## + +my $match; + +$Collator->{level} = 1; + +$str = "Pe\x{300}rl"; +$sub = "pe"; +$match = undef; +if (my($pos, $len) = $Collator->index($str, $sub)) { + $match = substr($str, $pos, $len); +} +ok($match, "Pe\x{300}"); + +$str = "P\x{300}e\x{300}\x{301}\x{303}rl"; +$sub = "pE"; +$match = undef; +if (my($pos, $len) = $Collator->index($str, $sub)) { + $match = substr($str, $pos, $len); +} +ok($match, "P\x{300}e\x{300}\x{301}\x{303}"); + +$Collator->{level} = $old_level; + +############## + +$trad->{level} = 1; + +$str = "Ich mu\x{00DF} studieren."; +$sub = "m\x{00FC}ss"; +$match = undef; +if (my($pos, $len) = $trad->index($str, $sub)) { + $match = substr($str, $pos, $len); +} +ok($match, "mu\x{00DF}"); + +$trad->{level} = $old_level; + +$str = "Ich mu\x{00DF} studieren."; +$sub = "m\x{00FC}ss"; +$match = undef; + +if (my($pos, $len) = $trad->index($str, $sub)) { + $match = substr($str, $pos, $len); +} +ok($match, undef); + +$match = undef; +if (my($pos,$len) = $Collator->index("", "")) { + $match = substr("", $pos, $len); +} +ok($match, ""); + +$match = undef; +if (my($pos,$len) = $Collator->index("", "abc")) { + $match = substr("", $pos, $len); +} +ok($match, undef); + +############## + +# Table is undefined, then no entry is defined. + +my $undef_table = Unicode::Collate->new( + table => undef, + normalization => undef, + level => 1, +); + +# in the Unicode code point order +ok($undef_table->lt('', 'A')); +ok($undef_table->lt('ABC', 'B')); + +# Hangul should be decomposed (even w/o Unicode::Normalize). + +ok($undef_table->lt("Perl", "\x{AC00}")); +ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}")); +ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}")); +ok($undef_table->lt("\x{AE00}", "\x{3042}")); + # U+AC00: Hangul GA + # U+AE00: Hangul GEUL + # U+3042: Hiragana A + +# Weight for CJK Ideographs is defined, though. + +ok($undef_table->lt("", "\x{4E00}")); +ok($undef_table->lt("\x{4E8C}","ABC")); +ok($undef_table->lt("\x{4E00}","\x{3042}")); +ok($undef_table->lt("\x{4E00}","\x{4E8C}")); + # U+4E00: Ideograph "ONE" + # U+4E8C: Ideograph "TWO" + + +############## + +my $few_entries = Unicode::Collate->new( + entry => <<'ENTRIES', +0050 ; [.0101.0020.0002.0050] # P +0045 ; [.0102.0020.0002.0045] # E +0052 ; [.0103.0020.0002.0052] # R +004C ; [.0104.0020.0002.004C] # L +1100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G +1175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I +5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" +ENTRIES + table => undef, + normalization => undef, +); + +# defined before undefined + +my $sortABC = join '', + $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ "); + +ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ"); + +ok($few_entries->lt('E', 'D')); +ok($few_entries->lt("\x{5B57}", "\x{4E00}")); +ok($few_entries->lt("\x{AE30}", "\x{AC00}")); + +# Hangul must be decomposed. + +ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}")); + +############## + +my $all_undef = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideCJK => undef, + overrideHangul => undef, +); + +# All in the Unicode code point order. +# No hangul decomposition. + +ok($all_undef->lt("\x{3042}", "\x{4E00}")); +ok($all_undef->lt("\x{4DFF}", "\x{4E00}")); +ok($all_undef->lt("\x{4E00}", "\x{AC00}")); +ok($all_undef->gt("\x{AC00}", "\x{1100}\x{1161}")); +ok($all_undef->gt("\x{AC00}", "\x{ABFF}")); + +############## + +my $ignoreCJK = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideCJK => sub {()}, + entry => <<'ENTRIES', +5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" +ENTRIES +); + +# All CJK Unified Ideographs except U+5B57 are ignored. + +ok($ignoreCJK->eq("\x{4E00}", "")); +ok($ignoreCJK->lt("\x{4E00}", "\0")); +ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK. +ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK. +ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned. + +############## + +my $ignoreHangul = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideHangul => sub {()}, + entry => <<'ENTRIES', +AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL +ENTRIES +); + +# All Hangul Syllables except U+AE00 are ignored. + +ok($ignoreHangul->eq("\x{AC00}", "")); +ok($ignoreHangul->lt("\x{AC00}", "\0")); +ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}")); +ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored. +ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned. + +############## + +my $blanked = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + alternate => 'Blanked', +); + +ok($blanked->lt("death", "de luge")); +ok($blanked->lt("de luge", "de-luge")); +ok($blanked->lt("de-luge", "deluge")); +ok($blanked->lt("deluge", "de\x{2010}luge")); +ok($blanked->lt("deluge", "de Luge")); + +############## + +my $nonIgn = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + alternate => 'Non-ignorable', +); + +ok($nonIgn->lt("de luge", "de Luge")); +ok($nonIgn->lt("de Luge", "de-luge")); +ok($nonIgn->lt("de-Luge", "de\x{2010}luge")); +ok($nonIgn->lt("de-luge", "death")); +ok($nonIgn->lt("death", "deluge")); + +############## + +my $shifted = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + alternate => 'Shifted', +); + +ok($shifted->lt("death", "de luge")); +ok($shifted->lt("de luge", "de-luge")); +ok($shifted->lt("de-luge", "deluge")); +ok($shifted->lt("deluge", "de Luge")); +ok($shifted->lt("de Luge", "deLuge")); + +############## + +my $shTrim = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + alternate => 'Shift-Trimmed', +); + +ok($shTrim->lt("death", "deluge")); +ok($shTrim->lt("deluge", "de luge")); +ok($shTrim->lt("de luge", "de-luge")); +ok($shTrim->lt("de-luge", "deLuge")); +ok($shTrim->lt("deLuge", "de Luge")); + +############## + +my $overCJK = Unicode::Collate->new( + table => undef, + normalization => undef, + entry => <<'ENTRIES', +0061 ; [.0101.0020.0002.0061] # latin a +0041 ; [.0101.0020.0008.0041] # LATIN A +4E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03. +ENTRIES + overrideCJK => sub { + my $u = 0xFFFF - $_[0]; # reversed + [$u, 0x20, 0x2, $u]; + }, +); + +ok($overCJK->lt("a", "A")); # diff. at level 3. +ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2. +ok($overCJK->lt("A\x{4E03}", "A\x{4E00}")); +ok($overCJK->lt("A\x{4E03}", "a\x{4E00}")); +ok($overCJK->lt("a\x{4E03}", "A\x{4E00}")); + +############## + +# rearranged : 0x0E40..0x0E44, 0x0EC0..0x0EC4 + +ok($Collator->lt("A", "B")); +ok($Collator->lt("\x{0E40}", "\x{0E41}")); +ok($Collator->lt("\x{0E40}A", "\x{0E41}B")); +ok($Collator->lt("\x{0E41}A", "\x{0E40}B")); +ok($Collator->lt("A\x{0E41}A", "A\x{0E40}B")); + +ok($all_undef->lt("A", "B")); +ok($all_undef->lt("\x{0E40}", "\x{0E41}")); +ok($all_undef->lt("\x{0E40}A", "\x{0E41}B")); +ok($all_undef->lt("\x{0E41}A", "\x{0E40}B")); +ok($all_undef->lt("A\x{0E41}A", "A\x{0E40}B")); + +############## + +my $no_rearrange = Unicode::Collate->new( + table => undef, + normalization => undef, + rearrange => [], +); + +ok($no_rearrange->lt("A", "B")); +ok($no_rearrange->lt("\x{0E40}", "\x{0E41}")); +ok($no_rearrange->lt("\x{0E40}A", "\x{0E41}B")); +ok($no_rearrange->gt("\x{0E41}A", "\x{0E40}B")); +ok($no_rearrange->gt("A\x{0E41}A", "A\x{0E40}B")); + +############## + +# equivalent to $no_rearrange + +my $undef_rearrange = Unicode::Collate->new( + table => undef, + normalization => undef, + rearrange => undef, +); + +ok($undef_rearrange->lt("A", "B")); +ok($undef_rearrange->lt("\x{0E40}", "\x{0E41}")); +ok($undef_rearrange->lt("\x{0E40}A", "\x{0E41}B")); +ok($undef_rearrange->gt("\x{0E41}A", "\x{0E40}B")); +ok($undef_rearrange->gt("A\x{0E41}A", "A\x{0E40}B")); + +############## + +my $dropArticles = Unicode::Collate->new( + table => "keys.txt", + normalization => undef, + preprocess => sub { + my $string = shift; + $string =~ s/\b(?:an?|the)\s+//ig; + $string; + }, +); + +ok($dropArticles->eq("camel", "a camel")); +ok($dropArticles->eq("Perl", "The Perl")); +ok($dropArticles->lt("the pen", "a pencil")); +ok($Collator->lt("Perl", "The Perl")); +ok($Collator->gt("the pen", "a pencil")); + +############## + +my $backLevel1 = Unicode::Collate->new( + table => undef, + normalization => undef, + backwards => [ 1 ], +); + +# all strings are reversed at level 1. + +ok($backLevel1->gt("AB", "BA")); +ok($backLevel1->gt("\x{3042}\x{3044}", "\x{3044}\x{3042}")); + +############## + +my $backLevel2 = Unicode::Collate->new( + table => "keys.txt", + normalization => undef, + undefName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/, + backwards => 2, +); + +ok($backLevel2->gt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}")); +ok($backLevel2->gt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}")); +ok($Collator ->lt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}")); +ok($Collator ->lt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}")); + + +# HIRAGANA and KATAKANA are made undefined via undefName. +# So they are after CJK Unified Ideographs. + +ok($backLevel2->lt("\x{4E00}", $hiragana)); +ok($backLevel2->lt("\x{4E03}", $katakana)); +ok($Collator ->gt("\x{4E00}", $hiragana)); +ok($Collator ->gt("\x{4E03}", $katakana)); + +############## diff --git a/gnu/usr.bin/perl/lib/Unicode/README b/gnu/usr.bin/perl/lib/Unicode/README new file mode 100644 index 00000000000..c5518612b4d --- /dev/null +++ b/gnu/usr.bin/perl/lib/Unicode/README @@ -0,0 +1,8 @@ +There used to be a directory called lib/unicode but everything that +used to be here is now in the lib/unicore directory. + +The renaming was done to avoid naming conflicts with the Perl core +Unicode files and modules in the Unicode:: space in case-ignoring +filesystems. The lib/Unicode directory now contains various +Unicode-related modules. + diff --git a/gnu/usr.bin/perl/lib/Unicode/UCD.pm b/gnu/usr.bin/perl/lib/Unicode/UCD.pm new file mode 100644 index 00000000000..96dee9a8164 --- /dev/null +++ b/gnu/usr.bin/perl/lib/Unicode/UCD.pm @@ -0,0 +1,746 @@ +package Unicode::UCD; + +use strict; +use warnings; + +our $VERSION = '0.2'; + +require Exporter; + +our @ISA = qw(Exporter); + +our @EXPORT_OK = qw(charinfo + charblock charscript + charblocks charscripts + charinrange + compexcl + casefold casespec); + +use Carp; + +=head1 NAME + +Unicode::UCD - Unicode character database + +=head1 SYNOPSIS + + use Unicode::UCD 'charinfo'; + my $charinfo = charinfo($codepoint); + + use Unicode::UCD 'charblock'; + my $charblock = charblock($codepoint); + + use Unicode::UCD 'charscript'; + my $charscript = charblock($codepoint); + + use Unicode::UCD 'charblocks'; + my $charblocks = charblocks(); + + use Unicode::UCD 'charscripts'; + my %charscripts = charscripts(); + + use Unicode::UCD qw(charscript charinrange); + my $range = charscript($script); + print "looks like $script\n" if charinrange($range, $codepoint); + + use Unicode::UCD 'compexcl'; + my $compexcl = compexcl($codepoint); + + my $unicode_version = Unicode::UCD::UnicodeVersion(); + +=head1 DESCRIPTION + +The Unicode::UCD module offers a simple interface to the Unicode +Character Database. + +=cut + +my $UNICODEFH; +my $BLOCKSFH; +my $SCRIPTSFH; +my $VERSIONFH; +my $COMPEXCLFH; +my $CASEFOLDFH; +my $CASESPECFH; + +sub openunicode { + my ($rfh, @path) = @_; + my $f; + unless (defined $$rfh) { + for my $d (@INC) { + use File::Spec; + $f = File::Spec->catfile($d, "unicore", @path); + last if open($$rfh, $f); + undef $f; + } + croak __PACKAGE__, ": failed to find ", + File::Spec->catfile(@path), " in @INC" + unless defined $f; + } + return $f; +} + +=head2 charinfo + + use Unicode::UCD 'charinfo'; + + my $charinfo = charinfo(0x41); + +charinfo() returns a reference to a hash that has the following fields +as defined by the Unicode standard: + + key + + code code point with at least four hexdigits + name name of the character IN UPPER CASE + category general category of the character + combining classes used in the Canonical Ordering Algorithm + bidi bidirectional category + decomposition character decomposition mapping + decimal if decimal digit this is the integer numeric value + digit if digit this is the numeric value + numeric if numeric is the integer or rational numeric value + mirrored if mirrored in bidirectional text + unicode10 Unicode 1.0 name if existed and different + comment ISO 10646 comment field + upper uppercase equivalent mapping + lower lowercase equivalent mapping + title titlecase equivalent mapping + + block block the character belongs to (used in \p{In...}) + script script the character belongs to + +If no match is found, a reference to an empty hash is returned. + +The C<block> property is the same as returned by charinfo(). It is +not defined in the Unicode Character Database proper (Chapter 4 of the +Unicode 3.0 Standard, aka TUS3) but instead in an auxiliary database +(Chapter 14 of TUS3). Similarly for the C<script> property. + +Note that you cannot do (de)composition and casing based solely on the +above C<decomposition> and C<lower>, C<upper>, C<title>, properties, +you will need also the compexcl(), casefold(), and casespec() functions. + +=cut + +sub _getcode { + my $arg = shift; + + if ($arg =~ /^\d+$/) { + return $arg; + } elsif ($arg =~ /^(?:U\+|0x)?([[:xdigit:]]+)$/) { + return hex($1); + } + + return; +} + +# Lingua::KO::Hangul::Util not part of the standard distribution +# but it will be used if available. + +eval { require Lingua::KO::Hangul::Util }; +my $hasHangulUtil = ! $@; +if ($hasHangulUtil) { + Lingua::KO::Hangul::Util->import(); +} + +sub hangul_decomp { # internal: called from charinfo + if ($hasHangulUtil) { + my @tmp = decomposeHangul(shift); + return sprintf("%04X %04X", @tmp) if @tmp == 2; + return sprintf("%04X %04X %04X", @tmp) if @tmp == 3; + } + return; +} + +sub hangul_charname { # internal: called from charinfo + return sprintf("HANGUL SYLLABLE-%04X", shift); +} + +sub han_charname { # internal: called from charinfo + return sprintf("CJK UNIFIED IDEOGRAPH-%04X", shift); +} + +my @CharinfoRanges = ( +# block name +# [ first, last, coderef to name, coderef to decompose ], +# CJK Ideographs Extension A + [ 0x3400, 0x4DB5, \&han_charname, undef ], +# CJK Ideographs + [ 0x4E00, 0x9FA5, \&han_charname, undef ], +# Hangul Syllables + [ 0xAC00, 0xD7A3, $hasHangulUtil ? \&getHangulName : \&hangul_charname, \&hangul_decomp ], +# Non-Private Use High Surrogates + [ 0xD800, 0xDB7F, undef, undef ], +# Private Use High Surrogates + [ 0xDB80, 0xDBFF, undef, undef ], +# Low Surrogates + [ 0xDC00, 0xDFFF, undef, undef ], +# The Private Use Area + [ 0xE000, 0xF8FF, undef, undef ], +# CJK Ideographs Extension B + [ 0x20000, 0x2A6D6, \&han_charname, undef ], +# Plane 15 Private Use Area + [ 0xF0000, 0xFFFFD, undef, undef ], +# Plane 16 Private Use Area + [ 0x100000, 0x10FFFD, undef, undef ], +); + +sub charinfo { + my $arg = shift; + my $code = _getcode($arg); + croak __PACKAGE__, "::charinfo: unknown code '$arg'" + unless defined $code; + my $hexk = sprintf("%06X", $code); + my($rcode,$rname,$rdec); + foreach my $range (@CharinfoRanges){ + if ($range->[0] <= $code && $code <= $range->[1]) { + $rcode = $hexk; + $rcode =~ s/^0+//; + $rcode = sprintf("%04X", hex($rcode)); + $rname = $range->[2] ? $range->[2]->($code) : ''; + $rdec = $range->[3] ? $range->[3]->($code) : ''; + $hexk = sprintf("%06X", $range->[0]); # replace by the first + last; + } + } + openunicode(\$UNICODEFH, "UnicodeData.txt"); + if (defined $UNICODEFH) { + use Search::Dict 1.02; + if (look($UNICODEFH, "$hexk;", { xfrm => sub { $_[0] =~ /^([^;]+);(.+)/; sprintf "%06X;$2", hex($1) } } ) >= 0) { + my $line = <$UNICODEFH>; + chomp $line; + my %prop; + @prop{qw( + code name category + combining bidi decomposition + decimal digit numeric + mirrored unicode10 comment + upper lower title + )} = split(/;/, $line, -1); + $hexk =~ s/^0+//; + $hexk = sprintf("%04X", hex($hexk)); + if ($prop{code} eq $hexk) { + $prop{block} = charblock($code); + $prop{script} = charscript($code); + if(defined $rname){ + $prop{code} = $rcode; + $prop{name} = $rname; + $prop{decomposition} = $rdec; + } + return \%prop; + } + } + } + return; +} + +sub _search { # Binary search in a [[lo,hi,prop],[...],...] table. + my ($table, $lo, $hi, $code) = @_; + + return if $lo > $hi; + + my $mid = int(($lo+$hi) / 2); + + if ($table->[$mid]->[0] < $code) { + if ($table->[$mid]->[1] >= $code) { + return $table->[$mid]->[2]; + } else { + _search($table, $mid + 1, $hi, $code); + } + } elsif ($table->[$mid]->[0] > $code) { + _search($table, $lo, $mid - 1, $code); + } else { + return $table->[$mid]->[2]; + } +} + +sub charinrange { + my ($range, $arg) = @_; + my $code = _getcode($arg); + croak __PACKAGE__, "::charinrange: unknown code '$arg'" + unless defined $code; + _search($range, 0, $#$range, $code); +} + +=head2 charblock + + use Unicode::UCD 'charblock'; + + my $charblock = charblock(0x41); + my $charblock = charblock(1234); + my $charblock = charblock("0x263a"); + my $charblock = charblock("U+263a"); + + my $range = charblock('Armenian'); + +With a B<code point argument> charblock() returns the I<block> the character +belongs to, e.g. C<Basic Latin>. Note that not all the character +positions within all blocks are defined. + +See also L</Blocks versus Scripts>. + +If supplied with an argument that can't be a code point, charblock() tries +to do the opposite and interpret the argument as a character block. The +return value is a I<range>: an anonymous list of lists that contain +I<start-of-range>, I<end-of-range> code point pairs. You can test whether a +code point is in a range using the L</charinrange> function. If the +argument is not a known charater block, C<undef> is returned. + +=cut + +my @BLOCKS; +my %BLOCKS; + +sub _charblocks { + unless (@BLOCKS) { + if (openunicode(\$BLOCKSFH, "Blocks.txt")) { + while (<$BLOCKSFH>) { + if (/^([0-9A-F]+)\.\.([0-9A-F]+);\s+(.+)/) { + my ($lo, $hi) = (hex($1), hex($2)); + my $subrange = [ $lo, $hi, $3 ]; + push @BLOCKS, $subrange; + push @{$BLOCKS{$3}}, $subrange; + } + } + close($BLOCKSFH); + } + } +} + +sub charblock { + my $arg = shift; + + _charblocks() unless @BLOCKS; + + my $code = _getcode($arg); + + if (defined $code) { + _search(\@BLOCKS, 0, $#BLOCKS, $code); + } else { + if (exists $BLOCKS{$arg}) { + return $BLOCKS{$arg}; + } else { + return; + } + } +} + +=head2 charscript + + use Unicode::UCD 'charscript'; + + my $charscript = charscript(0x41); + my $charscript = charscript(1234); + my $charscript = charscript("U+263a"); + + my $range = charscript('Thai'); + +With a B<code point argument> charscript() returns the I<script> the +character belongs to, e.g. C<Latin>, C<Greek>, C<Han>. + +See also L</Blocks versus Scripts>. + +If supplied with an argument that can't be a code point, charscript() tries +to do the opposite and interpret the argument as a character script. The +return value is a I<range>: an anonymous list of lists that contain +I<start-of-range>, I<end-of-range> code point pairs. You can test whether a +code point is in a range using the L</charinrange> function. If the +argument is not a known charater script, C<undef> is returned. + +=cut + +my @SCRIPTS; +my %SCRIPTS; + +sub _charscripts { + unless (@SCRIPTS) { + if (openunicode(\$SCRIPTSFH, "Scripts.txt")) { + while (<$SCRIPTSFH>) { + if (/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+)/) { + my ($lo, $hi) = (hex($1), $2 ? hex($2) : hex($1)); + my $script = lc($3); + $script =~ s/\b(\w)/uc($1)/ge; + my $subrange = [ $lo, $hi, $script ]; + push @SCRIPTS, $subrange; + push @{$SCRIPTS{$script}}, $subrange; + } + } + close($SCRIPTSFH); + @SCRIPTS = sort { $a->[0] <=> $b->[0] } @SCRIPTS; + } + } +} + +sub charscript { + my $arg = shift; + + _charscripts() unless @SCRIPTS; + + my $code = _getcode($arg); + + if (defined $code) { + _search(\@SCRIPTS, 0, $#SCRIPTS, $code); + } else { + if (exists $SCRIPTS{$arg}) { + return $SCRIPTS{$arg}; + } else { + return; + } + } +} + +=head2 charblocks + + use Unicode::UCD 'charblocks'; + + my $charblocks = charblocks(); + +charblocks() returns a reference to a hash with the known block names +as the keys, and the code point ranges (see L</charblock>) as the values. + +See also L</Blocks versus Scripts>. + +=cut + +sub charblocks { + _charblocks() unless %BLOCKS; + return \%BLOCKS; +} + +=head2 charscripts + + use Unicode::UCD 'charscripts'; + + my %charscripts = charscripts(); + +charscripts() returns a hash with the known script names as the keys, +and the code point ranges (see L</charscript>) as the values. + +See also L</Blocks versus Scripts>. + +=cut + +sub charscripts { + _charscripts() unless %SCRIPTS; + return \%SCRIPTS; +} + +=head2 Blocks versus Scripts + +The difference between a block and a script is that scripts are closer +to the linguistic notion of a set of characters required to present +languages, while block is more of an artifact of the Unicode character +numbering and separation into blocks of (mostly) 256 characters. + +For example the Latin B<script> is spread over several B<blocks>, such +as C<Basic Latin>, C<Latin 1 Supplement>, C<Latin Extended-A>, and +C<Latin Extended-B>. On the other hand, the Latin script does not +contain all the characters of the C<Basic Latin> block (also known as +the ASCII): it includes only the letters, and not, for example, the digits +or the punctuation. + +For blocks see http://www.unicode.org/Public/UNIDATA/Blocks.txt + +For scripts see UTR #24: http://www.unicode.org/unicode/reports/tr24/ + +=head2 Matching Scripts and Blocks + +Scripts are matched with the regular-expression construct +C<\p{...}> (e.g. C<\p{Tibetan}> matches characters of the Tibetan script), +while C<\p{In...}> is used for blocks (e.g. C<\p{InTibetan}> matches +any of the 256 code points in the Tibetan block). + +=head2 Code Point Arguments + +A I<code point argument> is either a decimal or a hexadecimal scalar +designating a Unicode character, or C<U+> followed by hexadecimals +designating a Unicode character. Note that Unicode is B<not> limited +to 16 bits (the number of Unicode characters is open-ended, in theory +unlimited): you may have more than 4 hexdigits. + +=head2 charinrange + +In addition to using the C<\p{In...}> and C<\P{In...}> constructs, you +can also test whether a code point is in the I<range> as returned by +L</charblock> and L</charscript> or as the values of the hash returned +by L</charblocks> and L</charscripts> by using charinrange(): + + use Unicode::UCD qw(charscript charinrange); + + $range = charscript('Hiragana'); + print "looks like hiragana\n" if charinrange($range, $codepoint); + +=cut + +=head2 compexcl + + use Unicode::UCD 'compexcl'; + + my $compexcl = compexcl("09dc"); + +The compexcl() returns the composition exclusion (that is, if the +character should not be produced during a precomposition) of the +character specified by a B<code point argument>. + +If there is a composition exclusion for the character, true is +returned. Otherwise, false is returned. + +=cut + +my %COMPEXCL; + +sub _compexcl { + unless (%COMPEXCL) { + if (openunicode(\$COMPEXCLFH, "CompositionExclusions.txt")) { + while (<$COMPEXCLFH>) { + if (/^([0-9A-F]+)\s+\#\s+/) { + my $code = hex($1); + $COMPEXCL{$code} = undef; + } + } + close($COMPEXCLFH); + } + } +} + +sub compexcl { + my $arg = shift; + my $code = _getcode($arg); + croak __PACKAGE__, "::compexcl: unknown code '$arg'" + unless defined $code; + + _compexcl() unless %COMPEXCL; + + return exists $COMPEXCL{$code}; +} + +=head2 casefold + + use Unicode::UCD 'casefold'; + + my %casefold = casefold("09dc"); + +The casefold() returns the locale-independent case folding of the +character specified by a B<code point argument>. + +If there is a case folding for that character, a reference to a hash +with the following fields is returned: + + key + + code code point with at least four hexdigits + status "C", "F", "S", or "I" + mapping one or more codes separated by spaces + +The meaning of the I<status> is as follows: + + C common case folding, common mappings shared + by both simple and full mappings + F full case folding, mappings that cause strings + to grow in length. Multiple characters are separated + by spaces + S simple case folding, mappings to single characters + where different from F + I special case for dotted uppercase I and + dotless lowercase i + - If this mapping is included, the result is + case-insensitive, but dotless and dotted I's + are not distinguished + - If this mapping is excluded, the result is not + fully case-insensitive, but dotless and dotted + I's are distinguished + +If there is no case folding for that character, C<undef> is returned. + +For more information about case mappings see +http://www.unicode.org/unicode/reports/tr21/ + +=cut + +my %CASEFOLD; + +sub _casefold { + unless (%CASEFOLD) { + if (openunicode(\$CASEFOLDFH, "CaseFolding.txt")) { + while (<$CASEFOLDFH>) { + if (/^([0-9A-F]+); ([CFSI]); ([0-9A-F]+(?: [0-9A-F]+)*);/) { + my $code = hex($1); + $CASEFOLD{$code} = { code => $1, + status => $2, + mapping => $3 }; + } + } + close($CASEFOLDFH); + } + } +} + +sub casefold { + my $arg = shift; + my $code = _getcode($arg); + croak __PACKAGE__, "::casefold: unknown code '$arg'" + unless defined $code; + + _casefold() unless %CASEFOLD; + + return $CASEFOLD{$code}; +} + +=head2 casespec + + use Unicode::UCD 'casespec'; + + my %casespec = casespec("09dc"); + +The casespec() returns the potentially locale-dependent case mapping +of the character specified by a B<code point argument>. The mapping +may change the length of the string (which the basic Unicode case +mappings as returned by charinfo() never do). + +If there is a case folding for that character, a reference to a hash +with the following fields is returned: + + key + + code code point with at least four hexdigits + lower lowercase + title titlecase + upper uppercase + condition condition list (may be undef) + +The C<condition> is optional. Where present, it consists of one or +more I<locales> or I<contexts>, separated by spaces (other than as +used to separate elements, spaces are to be ignored). A condition +list overrides the normal behavior if all of the listed conditions are +true. Case distinctions in the condition list are not significant. +Conditions preceded by "NON_" represent the negation of the condition + +Note that when there are multiple case folding definitions for a +single code point because of different locales, the value returned by +casespec() is a hash reference which has the locales as the keys and +hash references as described above as the values. + +A I<locale> is defined as a 2-letter ISO 3166 country code, possibly +followed by a "_" and a 2-letter ISO language code (possibly followed +by a "_" and a variant code). You can find the lists of those codes, +see L<Locale::Country> and L<Locale::Language>. + +A I<context> is one of the following choices: + + FINAL The letter is not followed by a letter of + general category L (e.g. Ll, Lt, Lu, Lm, or Lo) + MODERN The mapping is only used for modern text + AFTER_i The last base character was "i" (U+0069) + +For more information about case mappings see +http://www.unicode.org/unicode/reports/tr21/ + +=cut + +my %CASESPEC; + +sub _casespec { + unless (%CASESPEC) { + if (openunicode(\$CASESPECFH, "SpecialCasing.txt")) { + while (<$CASESPECFH>) { + if (/^([0-9A-F]+); ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; (\w+(?: \w+)*)?/) { + my ($hexcode, $lower, $title, $upper, $condition) = + ($1, $2, $3, $4, $5); + my $code = hex($hexcode); + if (exists $CASESPEC{$code}) { + if (exists $CASESPEC{$code}->{code}) { + my ($oldlower, + $oldtitle, + $oldupper, + $oldcondition) = + @{$CASESPEC{$code}}{qw(lower + title + upper + condition)}; + if (defined $oldcondition) { + my ($oldlocale) = + ($oldcondition =~ /^([a-z][a-z](?:_\S+)?)/); + delete $CASESPEC{$code}; + $CASESPEC{$code}->{$oldlocale} = + { code => $hexcode, + lower => $oldlower, + title => $oldtitle, + upper => $oldupper, + condition => $oldcondition }; + } + } + my ($locale) = + ($condition =~ /^([a-z][a-z](?:_\S+)?)/); + $CASESPEC{$code}->{$locale} = + { code => $hexcode, + lower => $lower, + title => $title, + upper => $upper, + condition => $condition }; + } else { + $CASESPEC{$code} = + { code => $hexcode, + lower => $lower, + title => $title, + upper => $upper, + condition => $condition }; + } + } + } + close($CASESPECFH); + } + } +} + +sub casespec { + my $arg = shift; + my $code = _getcode($arg); + croak __PACKAGE__, "::casespec: unknown code '$arg'" + unless defined $code; + + _casespec() unless %CASESPEC; + + return $CASESPEC{$code}; +} + +=head2 Unicode::UCD::UnicodeVersion + +Unicode::UCD::UnicodeVersion() returns the version of the Unicode +Character Database, in other words, the version of the Unicode +standard the database implements. The version is a string +of numbers delimited by dots (C<'.'>). + +=cut + +my $UNICODEVERSION; + +sub UnicodeVersion { + unless (defined $UNICODEVERSION) { + openunicode(\$VERSIONFH, "version"); + chomp($UNICODEVERSION = <$VERSIONFH>); + close($VERSIONFH); + croak __PACKAGE__, "::VERSION: strange version '$UNICODEVERSION'" + unless $UNICODEVERSION =~ /^\d+(?:\.\d+)+$/; + } + return $UNICODEVERSION; +} + +=head2 Implementation Note + +The first use of charinfo() opens a read-only filehandle to the Unicode +Character Database (the database is included in the Perl distribution). +The filehandle is then kept open for further queries. In other words, +if you are wondering where one of your filehandles went, that's where. + +=head1 BUGS + +Does not yet support EBCDIC platforms. + +=head1 AUTHOR + +Jarkko Hietaniemi + +=cut + +1; diff --git a/gnu/usr.bin/perl/lib/Unicode/UCD.t b/gnu/usr.bin/perl/lib/Unicode/UCD.t new file mode 100644 index 00000000000..9082057bbd1 --- /dev/null +++ b/gnu/usr.bin/perl/lib/Unicode/UCD.t @@ -0,0 +1,281 @@ +BEGIN { + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + chdir 't' if -d 't'; + @INC = '../lib'; + @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself +} + +use strict; +use Unicode::UCD; +use Test::More; + +BEGIN { plan tests => 162 }; + +use Unicode::UCD 'charinfo'; + +my $charinfo; + +$charinfo = charinfo(0x41); + +is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A'); +is($charinfo->{name}, 'LATIN CAPITAL LETTER A'); +is($charinfo->{category}, 'Lu'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'L'); +is($charinfo->{decomposition}, ''); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, ''); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, '0061'); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Basic Latin'); +is($charinfo->{script}, 'Latin'); + +$charinfo = charinfo(0x100); + +is($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON'); +is($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON'); +is($charinfo->{category}, 'Lu'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'L'); +is($charinfo->{decomposition}, '0041 0304'); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON'); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, '0101'); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Latin Extended-A'); +is($charinfo->{script}, 'Latin'); + +# 0x0590 is in the Hebrew block but unused. + +$charinfo = charinfo(0x590); + +is($charinfo->{code}, undef, '0x0590 - unused Hebrew'); +is($charinfo->{name}, undef); +is($charinfo->{category}, undef); +is($charinfo->{combining}, undef); +is($charinfo->{bidi}, undef); +is($charinfo->{decomposition}, undef); +is($charinfo->{decimal}, undef); +is($charinfo->{digit}, undef); +is($charinfo->{numeric}, undef); +is($charinfo->{mirrored}, undef); +is($charinfo->{unicode10}, undef); +is($charinfo->{comment}, undef); +is($charinfo->{upper}, undef); +is($charinfo->{lower}, undef); +is($charinfo->{title}, undef); +is($charinfo->{block}, undef); +is($charinfo->{script}, undef); + +# 0x05d0 is in the Hebrew block and used. + +$charinfo = charinfo(0x5d0); + +is($charinfo->{code}, '05D0', '05D0 - used Hebrew'); +is($charinfo->{name}, 'HEBREW LETTER ALEF'); +is($charinfo->{category}, 'Lo'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'R'); +is($charinfo->{decomposition}, ''); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, ''); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, ''); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Hebrew'); +is($charinfo->{script}, 'Hebrew'); + +# An open syllable in Hangul. + +$charinfo = charinfo(0xAC00); + +is($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00'); +is($charinfo->{name}, 'HANGUL SYLLABLE-AC00'); +is($charinfo->{category}, 'Lo'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'L'); +is($charinfo->{decomposition}, undef); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, ''); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, ''); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Hangul Syllables'); +is($charinfo->{script}, 'Hangul'); + +# A closed syllable in Hangul. + +$charinfo = charinfo(0xAE00); + +is($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00'); +is($charinfo->{name}, 'HANGUL SYLLABLE-AE00'); +is($charinfo->{category}, 'Lo'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'L'); +is($charinfo->{decomposition}, undef); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, ''); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, ''); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Hangul Syllables'); +is($charinfo->{script}, 'Hangul'); + +$charinfo = charinfo(0x1D400); + +is($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A'); +is($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A'); +is($charinfo->{category}, 'Lu'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'L'); +is($charinfo->{decomposition}, '<font> 0041'); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, ''); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, ''); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Mathematical Alphanumeric Symbols'); +is($charinfo->{script}, undef); + +use Unicode::UCD qw(charblock charscript); + +# 0x0590 is in the Hebrew block but unused. + +is(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock'); +is(charscript(0x590), undef, '0x0590 - Hebrew unused charscript'); + +$charinfo = charinfo(0xbe); + +is($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS'); +is($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS'); +is($charinfo->{category}, 'No'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'ON'); +is($charinfo->{decomposition}, '<fraction> 0033 2044 0034'); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, '3/4'); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, 'FRACTION THREE QUARTERS'); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, ''); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Latin-1 Supplement'); +is($charinfo->{script}, undef); + +use Unicode::UCD qw(charblocks charscripts); + +my $charblocks = charblocks(); + +ok(exists $charblocks->{Thai}, 'Thai charblock exists'); +is($charblocks->{Thai}->[0]->[0], hex('0e00')); +ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist'); + +my $charscripts = charscripts(); + +ok(exists $charscripts->{Armenian}, 'Armenian charscript exists'); +is($charscripts->{Armenian}->[0]->[0], hex('0531')); +ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist'); + +my $charscript; + +$charscript = charscript("12ab"); +is($charscript, 'Ethiopic', 'Ethiopic charscript'); + +$charscript = charscript("0x12ab"); +is($charscript, 'Ethiopic'); + +$charscript = charscript("U+12ab"); +is($charscript, 'Ethiopic'); + +my $ranges; + +$ranges = charscript('Ogham'); +is($ranges->[0]->[0], hex('1681'), 'Ogham charscript'); +is($ranges->[0]->[1], hex('169a')); + +use Unicode::UCD qw(charinrange); + +$ranges = charscript('Cherokee'); +ok(!charinrange($ranges, "139f"), 'Cherokee charscript'); +ok( charinrange($ranges, "13a0")); +ok( charinrange($ranges, "13f4")); +ok(!charinrange($ranges, "13f5")); + +is(Unicode::UCD::UnicodeVersion, '3.2.0', 'UnicodeVersion'); + +use Unicode::UCD qw(compexcl); + +ok(!compexcl(0x0100), 'compexcl'); +ok( compexcl(0x0958)); + +use Unicode::UCD qw(casefold); + +my $casefold; + +$casefold = casefold(0x41); + +ok($casefold->{code} eq '0041' && + $casefold->{status} eq 'C' && + $casefold->{mapping} eq '0061', 'casefold 0x41'); + +$casefold = casefold(0xdf); + +ok($casefold->{code} eq '00DF' && + $casefold->{status} eq 'F' && + $casefold->{mapping} eq '0073 0073', 'casefold 0xDF'); + +ok(!casefold(0x20)); + +use Unicode::UCD qw(casespec); + +my $casespec; + +ok(!casespec(0x41)); + +$casespec = casespec(0xdf); + +ok($casespec->{code} eq '00DF' && + $casespec->{lower} eq '00DF' && + $casespec->{title} eq '0053 0073' && + $casespec->{upper} eq '0053 0053' && + $casespec->{condition} eq undef, 'casespec 0xDF'); + +$casespec = casespec(0x307); + +ok($casespec->{az}->{code} eq '0307' && + $casespec->{az}->{lower} eq '' && + $casespec->{az}->{title} eq '0307' && + $casespec->{az}->{upper} eq '0307' && + $casespec->{az}->{condition} eq 'az After_Soft_Dotted', + 'casespec 0x307'); |