diff options
Diffstat (limited to 'gnu/usr.bin/perl/lib')
109 files changed, 7650 insertions, 2540 deletions
diff --git a/gnu/usr.bin/perl/lib/AnyDBM_File.t b/gnu/usr.bin/perl/lib/AnyDBM_File.t index d3c1c312791..63fb2d8fb98 100644 --- a/gnu/usr.bin/perl/lib/AnyDBM_File.t +++ b/gnu/usr.bin/perl/lib/AnyDBM_File.t @@ -14,7 +14,7 @@ use Fcntl; $Is_Dosish = ($^O eq 'amigaos' || $^O eq 'MSWin32' || $^O eq 'NetWare' || $^O eq 'dos' || - $^O eq 'os2' || $^O eq 'mint' || + $^O eq 'os2' || $^O eq 'cygwin'); my $filename = "Any_dbmx$$"; diff --git a/gnu/usr.bin/perl/lib/CORE.pod b/gnu/usr.bin/perl/lib/CORE.pod index ec6a8047a4d..ce5feb5908a 100644 --- a/gnu/usr.bin/perl/lib/CORE.pod +++ b/gnu/usr.bin/perl/lib/CORE.pod @@ -34,21 +34,31 @@ For many Perl functions, the CORE package contains real subroutines. This feature is new in Perl 5.16. You can take references to these and make aliases. However, some can only be called as barewords; i.e., you cannot use ampersand syntax (C<&foo>) or call them through references. See the -C<shove> example above. These subroutines exist for all overridable -keywords, except for C<dump> and the infix operators. Calling with +C<shove> example above. These subroutines exist for all keywords except the following: + +C<__DATA__>, C<__END__>, C<and>, C<cmp>, C<default>, C<do>, C<dump>, +C<else>, C<elsif>, C<eq>, C<eval>, C<for>, C<foreach>, C<format>, C<ge>, +C<given>, C<goto>, C<grep>, C<gt>, C<if>, C<last>, C<le>, C<local>, C<lt>, +C<m>, C<map>, C<my>, C<ne>, C<next>, C<no>, C<or>, C<our>, C<package>, +C<print>, C<printf>, C<q>, C<qq>, C<qr>, C<qw>, C<qx>, C<redo>, C<require>, +C<return>, C<s>, C<say>, C<sort>, C<state>, C<sub>, C<tr>, C<unless>, +C<until>, C<use>, C<when>, C<while>, C<x>, C<xor>, C<y> + +Calling with ampersand syntax and through references does not work for the following functions, as they have special syntax that cannot always be translated into a simple list (e.g., C<eof> vs C<eof()>): -C<chdir>, C<chomp>, C<chop>, C<each>, C<eof>, C<exec>, C<keys>, C<lstat>, -C<pop>, C<push>, C<shift>, C<splice>, C<stat>, C<system>, C<truncate>, +C<chdir>, C<chomp>, C<chop>, C<defined>, C<delete>, C<each>, +C<eof>, C<exec>, C<exists>, C<keys>, C<lstat>, C<pop>, C<push>, +C<shift>, C<splice>, C<split>, C<stat>, C<system>, C<truncate>, C<unlink>, C<unshift>, C<values> =head1 OVERRIDING CORE FUNCTIONS To override a Perl built-in routine with your own version, you need to -import it at compile-time. This can be conveniently achieved with the -C<subs> pragma. This will affect only the package in which you've imported +import it at compile-time. This can be conveniently achieved with the +C<subs> pragma. This will affect only the package in which you've imported the said subroutine: use subs 'chdir'; diff --git a/gnu/usr.bin/perl/lib/Class/Struct.t b/gnu/usr.bin/perl/lib/Class/Struct.t index 694d622d4d2..fb1eb0b545a 100644 --- a/gnu/usr.bin/perl/lib/Class/Struct.t +++ b/gnu/usr.bin/perl/lib/Class/Struct.t @@ -33,11 +33,32 @@ package MyOther; use Class::Struct s => '$', a => '@', h => '%', c => 'aClass'; # +# test overriden accessors +# +package OverrideAccessor; +use Class::Struct; + +{ + no warnings qw(Class::Struct); + struct( 'OverrideAccessor', { count => '$' } ); +} + +sub count { + my ($self,$count) = @_; + + if ( @_ >= 2 ) { + $self->{'OverrideAccessor::count'} = $count + 9; + } + + return $self->{'OverrideAccessor::count'}; +} + +# # back to main... # package main; -use Test::More tests => 24; +use Test::More; my $obj = MyObj->new; isa_ok $obj, 'MyObj'; @@ -101,3 +122,133 @@ is $obk->SomeElem(), 123; my $recobj = RecClass->new(); isa_ok $recobj, 'RecClass'; +my $override_obj = OverrideAccessor->new( count => 3 ); +is $override_obj->count, 12; + +$override_obj->count( 1 ); +is $override_obj->count, 10; + + +use Class::Struct Kapow => { z_zwap => 'Regexp', sploosh => 'MyObj' }; + +is eval { main->new(); }, undef, + 'No new method injected into current package'; + +my $obj3 = Kapow->new(); + +isa_ok $obj3, 'Kapow'; +is $obj3->z_zwap, undef, 'No z_zwap member by default'; +is $obj3->sploosh, undef, 'No sploosh member by default'; +$obj3->z_zwap(qr//); +isa_ok $obj3->z_zwap, 'Regexp', 'Can set z_zwap member'; +$obj3->sploosh(MyObj->new(s => 'pie')); +isa_ok $obj3->sploosh, 'MyObj', + 'Can set sploosh member to object of correct class'; +is $obj3->sploosh->s, 'pie', 'Can set sploosh member to correct object'; + +my $obj4 = Kapow->new( z_zwap => qr//, sploosh => MyObj->new(a => ['Good']) ); + +isa_ok $obj4, 'Kapow'; +isa_ok $obj4->z_zwap, 'Regexp', 'Initialised z_zwap member'; +isa_ok $obj4->sploosh, 'MyObj', 'Initialised sploosh member'; +is_deeply $obj4->sploosh->a, ['Good'], 'with correct object'; + +my $obj5 = Kapow->new( sploosh => { h => {perl => 'rules'} } ); + +isa_ok $obj5, 'Kapow'; +is $obj5->z_zwap, undef, 'No z_zwap member by default'; +isa_ok $obj5->sploosh, 'MyObj', 'Initialised sploosh member from hash'; +is_deeply $obj5->sploosh->h, { perl => 'rules'} , 'with correct object'; + +is eval { + package MyObj; + struct( s => '$', a => '@', h => '%', c => 'aClass' ); +}, undef, 'Calling struct a second time fails'; + +like $@, qr/^function 'new' already defined in package MyObj/, + 'fails with the expected error'; + +is eval { MyObj->new( a => {} ) }, undef, + 'Using a hash where an array reference is expected'; +like $@, qr/^Initializer for a must be array reference/, + 'fails with the expected error'; + +is eval { MyObj->new( h => [] ) }, undef, + 'Using an array where a hash reference is expected'; +like $@, qr/^Initializer for h must be hash reference/, + 'fails with the expected error'; + +is eval { Kapow->new( sploosh => { h => [perl => 'rules'] } ); }, undef, + 'Using an array where a hash reference is expected in an initialiser list'; +like $@, qr/^Initializer for h must be hash reference/, + 'fails with the expected error'; + +is eval { Kapow->new( sploosh => [ h => {perl => 'rules'} ] ); }, undef, + "Using an array for a member object's initialiser list"; +like $@, qr/^Initializer for sploosh must be hash or MyObj reference/, + 'fails with the expected error'; + +is eval { + package Crraack; + use Class::Struct 'struct'; + struct( 'pow' => '@$%!' ); +}, undef, 'Bad type fails'; +like $@, qr/^'\@\$\%\!' is not a valid struct element type/, + 'with the expected error'; + +is eval { + $obj3->sploosh(MyOther->new(s => 3.14)); +}, undef, 'Setting member to the wrong class of object fails'; +like $@, qr/^sploosh argument is wrong class/, + 'with the expected error'; +is $obj3->sploosh->s, 'pie', 'Object is unchanged'; + +is eval { + $obj3->sploosh(MyObj->new(s => 3.14), 'plop'); +}, undef, 'Too many arguments to setter fails'; +like $@, qr/^Too many args to sploosh/, + 'with the expected error'; +is $obj3->sploosh->s, 'pie', 'Object is unchanged'; + +is eval { + package Blurp; + use Class::Struct 'struct'; + struct( Blurp => {}, 'Bonus!' ); +}, undef, 'hash based class with extra argument fails'; +like $@, qr/\Astruct usage error.*\n.*\n/, + 'with the expected confession'; + +is eval { + package Zamm; + use Class::Struct 'struct'; + struct( Zamm => [], 'Bonus!' ); +}, undef, 'array based class with extra argument fails'; +like $@, qr/\Astruct usage error.*\n.*\n/, + 'with the expected confession'; + +is eval { + package Thwapp; + use Class::Struct 'struct'; + struct( Thwapp => ['Bonus!'] ); +}, undef, 'array based class with extra constructor argument fails'; +like $@, qr/\Astruct usage error.*\n.*\n/, + 'with the expected confession'; + +is eval { + package Rakkk; + use Class::Struct 'struct'; + struct( z_zwap => 'Regexp', sploosh => 'MyObj', 'Bonus' ); +}, undef, 'default array based class with extra constructor argument fails'; +like $@, qr/\Astruct usage error.*\n.*\n/, + 'with the expected confession'; + +is eval { + package Awk; + use parent -norequire, 'Urkkk'; + use Class::Struct 'struct'; + struct( beer => 'foamy' ); +}, undef, '@ISA is not allowed'; +like $@, qr/^struct class cannot be a subclass \(\@ISA not allowed\)/, + 'with the expected error'; + +done_testing; diff --git a/gnu/usr.bin/perl/lib/DB.t b/gnu/usr.bin/perl/lib/DB.t index a1fadf331a8..b4b6ecb1e5f 100644 --- a/gnu/usr.bin/perl/lib/DB.t +++ b/gnu/usr.bin/perl/lib/DB.t @@ -126,7 +126,7 @@ is( DB::_clientname('bar'), undef, my @ret = eval { DB->backtrace() }; like( $ret[0], qr/file.+\Q$0\E/, 'DB::backtrace() should report current file'); like( $ret[0], qr/line $line/, '... should report calling line number' ); - like( $ret[0], qr/eval {...}/, '... should catch eval BLOCK' ); + like( $ret[0], qr/eval\Q {...}/, '... should catch eval BLOCK' ); @ret = eval "one(2)"; is( scalar @ret, 1, '... should report from provided stack frame number' ); diff --git a/gnu/usr.bin/perl/lib/DBM_Filter.pm b/gnu/usr.bin/perl/lib/DBM_Filter.pm index a5f4a17fd56..3421848eca4 100644 --- a/gnu/usr.bin/perl/lib/DBM_Filter.pm +++ b/gnu/usr.bin/perl/lib/DBM_Filter.pm @@ -2,7 +2,7 @@ package DBM_Filter ; use strict; use warnings; -our $VERSION = '0.04'; +our $VERSION = '0.05'; package Tie::Hash ; @@ -247,7 +247,7 @@ DBM_Filter -- Filter DBM keys/values $db->Filtered(); package DBM_Filter::my_filter1; - + sub Store { ... } sub Fetch { ... } diff --git a/gnu/usr.bin/perl/lib/DBM_Filter/t/int32.t b/gnu/usr.bin/perl/lib/DBM_Filter/t/int32.t index 96d4d9e8e1c..a74d49dfbe6 100644 --- a/gnu/usr.bin/perl/lib/DBM_Filter/t/int32.t +++ b/gnu/usr.bin/perl/lib/DBM_Filter/t/int32.t @@ -55,9 +55,8 @@ is $@, '', "push an 'int32' filter" ; no warnings 'uninitialized'; StoreData(\%h1, { - undef() => undef(), "400" => "500", - 0 => 1, + undef() => 1, 1 => 0, -47 => -6, }); diff --git a/gnu/usr.bin/perl/lib/ExtUtils/t/Embed.t b/gnu/usr.bin/perl/lib/ExtUtils/t/Embed.t index 269b20ac3a8..7a83c98f767 100644 --- a/gnu/usr.bin/perl/lib/ExtUtils/t/Embed.t +++ b/gnu/usr.bin/perl/lib/ExtUtils/t/Embed.t @@ -204,13 +204,13 @@ int main(int argc, char **argv, char **env) { perl_free(my_perl); -#ifdef PERL_GLOBAL_STRUCT - free_global_struct(plvarsp); -#endif /* PERL_GLOBAL_STRUCT */ - my_puts("ok 8"); PERL_SYS_TERM(); +#ifdef PERL_GLOBAL_STRUCT + free_global_struct(plvarsp); +#endif /* PERL_GLOBAL_STRUCT */ + return 0; } diff --git a/gnu/usr.bin/perl/lib/File/Basename.t b/gnu/usr.bin/perl/lib/File/Basename.t index 0d3b633669d..6ff3121ec93 100644 --- a/gnu/usr.bin/perl/lib/File/Basename.t +++ b/gnu/usr.bin/perl/lib/File/Basename.t @@ -154,7 +154,9 @@ can_ok( __PACKAGE__, qw( basename fileparse dirname fileparse_set_fstype ) ); ### Test tainting -{ +SKIP: { + skip "A perl without taint support", 2 + if not ${^TAINT}; # The empty tainted value, for tainting strings my $TAINT = substr($^X, 0, 0); diff --git a/gnu/usr.bin/perl/lib/File/Copy.t b/gnu/usr.bin/perl/lib/File/Copy.t index ffd3d59db78..1e6c9cb4a12 100644 --- a/gnu/usr.bin/perl/lib/File/Copy.t +++ b/gnu/usr.bin/perl/lib/File/Copy.t @@ -14,7 +14,7 @@ use Test::More; my $TB = Test::More->builder; -plan tests => 463; +plan tests => 465; # We're going to override rename() later on but Perl has to see an override # at compile time to honor it. @@ -139,7 +139,7 @@ for my $cross_partition_test (0..1) { { my $warnings = ''; local $SIG{__WARN__} = sub { $warnings .= join '', @_ }; - ok copy("file-$$", "file-$$"), 'copy(fn, fn) succeeds'; + ok !copy("file-$$", "file-$$"), 'copy to itself fails'; like $warnings, qr/are identical/, 'but warns'; ok -s "file-$$", 'contents preserved'; @@ -267,6 +267,9 @@ SKIP: { if $^O eq "MSWin32"; skip "Copy maps POSIX permissions to VOS permissions.", $skips if $^O eq "vos"; + skip "There be dragons here with DragonflyBSD.", $skips + if $^O eq 'dragonfly'; + # Just a sub to get better failure messages. sub __ ($) { @@ -411,7 +414,7 @@ SKIP: { foreach my $right (qw(plain object1 object2)) { @warnings = (); $! = 0; - is eval {copy $what{$left}, $what{$right}}, 1, "copy $left $right"; + is eval {copy $what{$left}, $what{$right}}, 0, "copy $left $right"; is $@, '', 'No croaking'; is $!, '', 'No system call errors'; is @warnings, 1, 'Exactly 1 warning'; @@ -472,6 +475,31 @@ SKIP: { close($IN); } +use File::Temp qw(tempdir); +use File::Spec; + +SKIP: { + # RT #111126: File::Copy copy() zeros file when copying a file + # into the same directory it is stored in + + my $temp_dir = tempdir( CLEANUP => 1 ); + my $temp_file = File::Spec->catfile($temp_dir, "somefile"); + + open my $fh, ">", $temp_file + or skip "Cannot create $temp_file: $!", 2; + print $fh "Just some data"; + close $fh + or skip "Cannot close $temp_file: $!", 2; + + my $warn_message = ""; + local $SIG{__WARN__} = sub { $warn_message .= "@_" }; + ok(!copy($temp_file, $temp_dir), + "Copy of foo/file to foo/ should fail"); + like($warn_message, qr/^\Q'$temp_file' and '$temp_file'\E are identical.*Copy\.t/i, + "error message should describe the problem"); + 1 while unlink $temp_file; +} + END { 1 while unlink "file-$$"; 1 while unlink "lib/file-$$"; diff --git a/gnu/usr.bin/perl/lib/File/Find/t/find.t b/gnu/usr.bin/perl/lib/File/Find/t/find.t index 1d0a0870b13..96a10005114 100644 --- a/gnu/usr.bin/perl/lib/File/Find/t/find.t +++ b/gnu/usr.bin/perl/lib/File/Find/t/find.t @@ -18,7 +18,7 @@ BEGIN { $SIG{'__WARN__'} = sub { $warn_msg = $_[0]; warn "# $_[0]"; } } -my $test_count = 85; +my $test_count = 98; $test_count += 119 if $symlink_exists; $test_count += 26 if $^O eq 'MSWin32'; $test_count += 2 if $^O eq 'MSWin32' and $symlink_exists; @@ -108,6 +108,21 @@ sub cleanup { rmdir dir_path('fb', 'fbc'); rmdir dir_path('fb'); } + if (-d dir_path('fc')) { + unlink ( + file_path('fc', 'fca', 'match_alpha'), + file_path('fc', 'fca', 'match_beta'), + file_path('fc', 'fcb', 'match_gamma'), + file_path('fc', 'fcb', 'delta'), + file_path('fc', 'fcc', 'match_epsilon'), + file_path('fc', 'fcc', 'match_zeta'), + file_path('fc', 'fcc', 'eta'), + ); + rmdir dir_path('fc', 'fca'); + rmdir dir_path('fc', 'fcb'); + rmdir dir_path('fc', 'fcc'); + rmdir dir_path('fc'); + } if ($need_updir) { my $updir = $^O eq 'VMS' ? File::Spec::VMS->updir() : File::Spec->updir; chdir($updir); @@ -197,7 +212,7 @@ sub my_preprocess { print "# --preprocess--\n"; print "# \$File::Find::dir => '$File::Find::dir' \n"; foreach $file (@files) { - $file =~ s/\.(dir)?$// if $^O eq 'VMS'; + $file =~ s/\.(dir)?$//i if $^O eq 'VMS'; print "# $file \n"; delete $Expect_Dir{ $File::Find::dir }->{$file}; } @@ -870,6 +885,41 @@ if ($symlink_exists) { # Issue 68260 Check (!$dangling_symlink); } +print "# RT 59750\n"; +MkDir( dir_path('fc'), 0770 ); +MkDir( dir_path('fc', 'fca'), 0770 ); +MkDir( dir_path('fc', 'fcb'), 0770 ); +MkDir( dir_path('fc', 'fcc'), 0770 ); +touch( file_path('fc', 'fca', 'match_alpha') ); +touch( file_path('fc', 'fca', 'match_beta') ); +touch( file_path('fc', 'fcb', 'match_gamma') ); +touch( file_path('fc', 'fcb', 'delta') ); +touch( file_path('fc', 'fcc', 'match_epsilon') ); +touch( file_path('fc', 'fcc', 'match_zeta') ); +touch( file_path('fc', 'fcc', 'eta') ); + +my @files_from_mixed = (); +sub wantmatch { + if ( $File::Find::name =~ m/match/ ) { + push @files_from_mixed, $_; + print "# \$_ => '$_'\n"; + } +} +find( \&wantmatch, ( + dir_path('fc', 'fca'), + dir_path('fc', 'fcb'), + dir_path('fc', 'fcc'), +) ); +Check( scalar(@files_from_mixed) == 5 ); + +@files_from_mixed = (); +find( \&wantmatch, ( + dir_path('fc', 'fca'), + dir_path('fc', 'fcb'), + file_path('fc', 'fcc', 'match_epsilon'), + file_path('fc', 'fcc', 'eta'), +) ); +Check( scalar(@files_from_mixed) == 4 ); if ($^O eq 'MSWin32') { # Check F:F:f correctly handles a root directory path. diff --git a/gnu/usr.bin/perl/lib/File/Find/t/taint.t b/gnu/usr.bin/perl/lib/File/Find/t/taint.t index d47b21a7c31..954c6780d94 100644 --- a/gnu/usr.bin/perl/lib/File/Find/t/taint.t +++ b/gnu/usr.bin/perl/lib/File/Find/t/taint.t @@ -1,12 +1,19 @@ #!./perl -T use strict; +use Test::More; +BEGIN { + plan( + ${^TAINT} + ? (tests => 45) + : (skip_all => "A perl without taint support") + ); +} my %Expect_File = (); # what we expect for $_ my %Expect_Name = (); # what we expect for $File::Find::name/fullname my %Expect_Dir = (); # what we expect for $File::Find::dir my ($cwd, $cwd_untainted); - BEGIN { require File::Spec; chdir 't' if -d 't'; @@ -42,8 +49,6 @@ BEGIN { $ENV{'PATH'} = join($sep,@path); } -use Test::More tests => 45; - my $symlink_exists = eval { symlink("",""); 1 }; use File::Find; diff --git a/gnu/usr.bin/perl/lib/File/stat-7896.t b/gnu/usr.bin/perl/lib/File/stat-7896.t new file mode 100644 index 00000000000..57b26858520 --- /dev/null +++ b/gnu/usr.bin/perl/lib/File/stat-7896.t @@ -0,0 +1,28 @@ +#!./perl -w +use strict; + +use Test::More; +use File::stat; + +# This is possibly a bit black-box, but for now it works. +# If (either) File::stat stops lazy loading Symbol, or Test::More starts, it +# should be revisited +is($INC{'Symbol.pm'}, undef, "Symbol isn't loaded yet"); + +# ID 20011110.104 (RT #7896) +$! = 0; +is($!, '', '$! is empty'); +is(File::stat::stat('/notafile'), undef, 'invalid file should fail'); +isnt($!, '', 'should populate $!, given invalid file'); +my $e = $!; + +isnt($INC{'Symbol.pm'}, undef, "Symbol has been loaded"); + +# Repeat twice +is(File::stat::stat('/notafile'), undef, 'invalid file should fail again'); +is($!, $e, '$! should be consistent for an invalid file'); +$e = $!; +is(File::stat::stat('/notafile'), undef, 'invalid file should fail again'); +is($!, $e, '$! should be consistent for an invalid file'); + +done_testing(); diff --git a/gnu/usr.bin/perl/lib/File/stat.t b/gnu/usr.bin/perl/lib/File/stat.t index 0646ebdcd6f..b85ff95462c 100644 --- a/gnu/usr.bin/perl/lib/File/stat.t +++ b/gnu/usr.bin/perl/lib/File/stat.t @@ -5,107 +5,136 @@ BEGIN { @INC = '../lib'; } +use strict; +use warnings; use Test::More; use Config qw( %Config ); - -BEGIN { - # Check whether the build is configured with -Dmksymlinks - our $Dmksymlinks = - grep { /^config_arg\d+$/ && $Config{$_} eq '-Dmksymlinks' } - keys %Config; - - # Resolve symlink to ./lib/File/stat.t if this build is configured - # with -Dmksymlinks - # Originally we worked with ./TEST, but other test scripts read from - # that file and modify its access time. - our $file = '../lib/File/stat.t'; - if ( $Dmksymlinks ) { - $file = readlink $file; - die "Can't readlink(../lib/File/stat.t): $!" if ! defined $file; +use File::Temp qw( tempfile tempdir ); + +use File::stat; + +my (undef, $file) = tempfile(); + +{ + my @stat = CORE::stat $file; + my $stat = File::stat::stat($file); + isa_ok($stat, 'File::stat', 'should build a stat object'); + is_deeply($stat, \@stat, '... and matches the builtin'); + + my $i = 0; + foreach ([dev => 'device number'], + [ino => 'inode number'], + [mode => 'file mode'], + [nlink => 'number of links'], + [uid => 'owner uid'], + [gid => 'group id'], + [rdev => 'device identifier'], + [size => 'file size'], + [atime => 'last access time'], + [mtime => 'last modify time'], + [ctime => 'change time'], + [blksize => 'IO block size'], + [blocks => 'number of blocks']) { + my ($meth, $desc) = @$_; + # On OS/2 (fake) ino is not constant, it is incremented each time + SKIP: { + skip('inode number is not constant on OS/2', 1) + if $i == 1 && $^O eq 'os2'; + is($stat->$meth, $stat[$i], "$desc in position $i"); + } + ++$i; } - our $hasst; - eval { my @n = stat $file }; - $hasst = 1 unless $@ && $@ =~ /unimplemented/; - unless ($hasst) { plan skip_all => "no stat"; exit 0 } - use Config; - $hasst = 0 unless $Config{'i_sysstat'} eq 'define'; - unless ($hasst) { plan skip_all => "no sys/stat.h"; exit 0 } + my $stat2 = stat $file; + isa_ok($stat2, 'File::stat', + 'File::stat exports stat, overriding the builtin'); + is_deeply($stat2, $stat, '... and matches the direct call'); } -# Originally this was done in the BEGIN block, but perl is still -# compiling (and hence reading) the script at that point, which can -# change the file's access time, causing a different in the comparison -# tests if the clock ticked over the second between the stat() and the -# final read. -# At this point all of the reading is done. -our @stat = stat $file; # This is the function stat. -unless (@stat) { plan skip_all => "1..0 # Skip: no file $file"; exit 0 } - -plan tests => 19 + 24*2 + 4 + 3; - -use_ok( 'File::stat' ); - -my $stat = File::stat::stat( $file ); # This is the OO stat. -ok( ref($stat), 'should build a stat object' ); - -is( $stat->dev, $stat[0], "device number in position 0" ); - -# On OS/2 (fake) ino is not constant, it is incremented each time -SKIP: { - skip('inode number is not constant on OS/2', 1) if $^O eq 'os2'; - is( $stat->ino, $stat[1], "inode number in position 1" ); +sub test_X_ops { + my ($file, $desc_tail, $skip) = @_; + my @stat = CORE::stat $file; + my $stat = File::stat::stat($file); + my $lstat = File::stat::lstat($file); + isa_ok($stat, 'File::stat', 'should build a stat object'); + + for my $op (split //, "rwxoRWXOezsfdlpSbcugkMCA") { + if ($skip && $op =~ $skip) { + note("Not testing -A $desc_tail"); + next; + } + my $stat = $op eq 'l' ? $lstat : $stat; + for my $access ('', 'use filetest "access";') { + my ($warnings, $awarn, $vwarn, $rv); + my $desc = $access + ? "for -$op under use filetest 'access' $desc_tail" + : "for -$op $desc_tail"; + { + local $SIG{__WARN__} = sub { + my $w = shift; + if ($w =~ /^File::stat ignores VMS ACLs/) { + ++$vwarn; + } elsif ($w =~ /^File::stat ignores use filetest 'access'/) { + ++$awarn; + } else { + $warnings .= $w; + } + }; + $rv = eval "$access; -$op \$stat"; + } + is($@, '', "Overload succeeds $desc"); + + if ($^O eq "VMS" && $op =~ /[rwxRWX]/) { + is($vwarn, 1, "warning about VMS ACLs $desc"); + } else { + is($rv, eval "-$op \$file", "correct overload $desc") + unless $access; + is($vwarn, undef, "no warnings about VMS ACLs $desc"); + } + + # 111640 - File::stat bogus index check in overload + if ($access && $op =~ /[rwxRXW]/) { + # these should all warn with filetest access + is($awarn, 1, + "produced the right warning $desc"); + } else { + # -d and others shouldn't warn + is($awarn, undef, "should be no warning $desc") + } + + is($warnings, undef, "no other warnings seen $desc"); + } + } } -is( $stat->mode, $stat[2], "file mode in position 2" ); - -is( $stat->nlink, $stat[3], "number of links in position 3" ); - -is( $stat->uid, $stat[4], "owner uid in position 4" ); - -is( $stat->gid, $stat[5], "group id in position 5" ); - -is( $stat->rdev, $stat[6], "device identifier in position 6" ); - -is( $stat->size, $stat[7], "file size in position 7" ); - -is( $stat->atime, $stat[8], "last access time in position 8" ); - -is( $stat->mtime, $stat[9], "last modify time in position 9" ); - -is( $stat->ctime, $stat[10], "change time in position 10" ); - -is( $stat->blksize, $stat[11], "IO block size in position 11" ); - -is( $stat->blocks, $stat[12], "number of blocks in position 12" ); - -for (split //, "rwxoRWXOezsfdlpSbcugkMCA") { - SKIP: { - $^O eq "VMS" and index("rwxRWX", $_) >= 0 - and skip "File::stat ignores VMS ACLs", 2; - - my $rv = eval "-$_ \$stat"; - ok( !$@, "-$_ overload succeeds" ) - or diag( $@ ); - is( $rv, eval "-$_ \$file", "correct -$_ overload" ); +foreach ([file => $file], + [dir => tempdir(CLEANUP => 1)]) { + my ($what, $pathname) = @$_; + test_X_ops($pathname, "for $what $pathname"); + + my $mode = 01000; + while ($mode) { + $mode >>= 1; + my $mode_oct = sprintf "0%03o", $mode; + chmod $mode, $pathname or die "Can't chmod $mode_oct $pathname: $!"; + test_X_ops($pathname, "for $what with mode=$mode_oct"); } + chmod 0600, $pathname + or die "Can't restore permissions on $pathname to 0600"; } SKIP: { - my $file = '../perl'; - -e $file && -x $file or skip "$file is not present and executable", 4; + -e $^X && -x $^X or skip "$^X is not present and executable", 4; $^O eq "VMS" and skip "File::stat ignores VMS ACLs", 4; - my $stat = File::stat::stat( $file ); # This is the OO stat. - foreach (qw/x X/) { - my $rv = eval "-$_ \$stat"; - ok( !$@, "-$_ overload succeeds" ) - or diag( $@ ); - is( $rv, eval "-$_ \$file", "correct -$_ overload" ); - } + # Other tests running in parallel mean that $^X is read, updating its atime + test_X_ops($^X, "for $^X", qr/A/); } +my $stat = File::stat::stat($file); +isa_ok($stat, 'File::stat', 'should build a stat object'); + for (split //, "tTB") { eval "-$_ \$stat"; like( $@, qr/\Q-$_ is not implemented/, "-$_ overload fails" ); @@ -114,12 +143,14 @@ for (split //, "tTB") { SKIP: { local *STAT; skip("Could not open file: $!", 2) unless open(STAT, $file); - ok( File::stat::stat('STAT'), '... should be able to find filehandle' ); + isa_ok(File::stat::stat('STAT'), 'File::stat', + '... should be able to find filehandle'); package foo; local *STAT = *main::STAT; - main::ok( my $stat2 = File::stat::stat('STAT'), - '... and filehandle in another package' ); + my $stat2 = File::stat::stat('STAT'); + main::isa_ok($stat2, 'File::stat', + '... and filehandle in another package'); close STAT; # VOS open() updates atime; ignore this error (posix-975). @@ -133,12 +164,29 @@ SKIP: { main::skip("OS/2: inode number is not constant on os/2", 1) if $^O eq 'os2'; - main::is( "@$stat", "@$stat3", '... and must match normal stat' ); + main::is_deeply($stat, $stat3, '... and must match normal stat'); } - -local $!; -$stat = stat '/notafile'; -isnt( $!, '', 'should populate $!, given invalid file' ); +SKIP: +{ # RT #111638 + skip "We can't check for FIFOs", 2 unless defined &Fcntl::S_ISFIFO; + skip "No pipes", 2 unless defined $Config{d_pipe}; + pipe my ($rh, $wh) + or skip "Couldn't create a pipe: $!", 2; + skip "Built-in -p doesn't detect a pipe", 2 unless -p $rh; + + my $pstat = File::stat::stat($rh); + ok(!-p($stat), "-p should be false on a file"); + ok(-p($pstat), "check -p detects a pipe"); +} # Testing pretty much anything else is unportable. + +done_testing; + +# Local variables: +# cperl-indent-level: 4 +# indent-tabs-mode: nil +# End: +# +# ex: set ts=8 sts=4 sw=4 et: diff --git a/gnu/usr.bin/perl/lib/FileHandle.t b/gnu/usr.bin/perl/lib/FileHandle.t index ddbd94474ee..aeae754b0e0 100644 --- a/gnu/usr.bin/perl/lib/FileHandle.t +++ b/gnu/usr.bin/perl/lib/FileHandle.t @@ -8,10 +8,6 @@ BEGIN { print "1..0\n"; exit 0; } - if ($^O eq 'mpeix') { - print "1..0 # Skip: broken on MPE/iX\n"; - exit 0; - } } use FileHandle; diff --git a/gnu/usr.bin/perl/lib/Internals.t b/gnu/usr.bin/perl/lib/Internals.t index d3fce9c1c81..8af04af1bd5 100644 --- a/gnu/usr.bin/perl/lib/Internals.t +++ b/gnu/usr.bin/perl/lib/Internals.t @@ -7,7 +7,7 @@ BEGIN { } } -use Test::More tests => 78; +use Test::More tests => 82; my $ro_err = qr/^Modification of a read-only value attempted/; @@ -173,4 +173,18 @@ is( Internals::SvREFCNT($foo, $big_count), $big_count, "set reference count unsigned"); is( Internals::SvREFCNT($foo), $big_count, "reference count unsigned"); -Internals::SvREFCNT($foo, 1 ); +{ + my @arr = Internals::SvREFCNT($foo, 1 ); + is(scalar(@arr), 1, "SvREFCNT always returns only 1 item"); +} + +{ + my $usage = 'Usage: Internals::SvREFCNT(SCALAR[, REFCOUNT])'; + eval { &Internals::SvREFCNT();}; + like($@, qr/\Q$usage\E/); + $foo = \"perl"; + eval { &Internals::SvREFCNT($foo, 0..1);}; + like($@, qr/\Q$usage\E/); + eval { &Internals::SvREFCNT($foo, 0..3);}; + like($@, qr/\Q$usage\E/); +} diff --git a/gnu/usr.bin/perl/lib/Tie/Handle/stdhandle.t b/gnu/usr.bin/perl/lib/Tie/Handle/stdhandle.t index 13a8255155f..b1ae7c455b5 100644 --- a/gnu/usr.bin/perl/lib/Tie/Handle/stdhandle.t +++ b/gnu/usr.bin/perl/lib/Tie/Handle/stdhandle.t @@ -10,7 +10,7 @@ tie *tst,Tie::StdHandle; $f = 'tst'; -print "1..13\n"; +print "1..14\n"; # my $file tests @@ -42,6 +42,10 @@ print "ok 11\n"; $b = <$f>; print "not " unless eof($f); print "ok 12\n"; -print "not " unless close($f); +seek($f,0,0); +read($f,($b='scrinches'),4,4); # with offset +print "'$b' not " unless $b eq 'scriSome'; print "ok 13\n"; +print "not " unless close($f); +print "ok 14\n"; unlink("afile"); diff --git a/gnu/usr.bin/perl/lib/Tie/StdHandle.pm b/gnu/usr.bin/perl/lib/Tie/StdHandle.pm index 3a1a3db4788..9192b2e5ee3 100644 --- a/gnu/usr.bin/perl/lib/Tie/StdHandle.pm +++ b/gnu/usr.bin/perl/lib/Tie/StdHandle.pm @@ -5,7 +5,7 @@ use strict; use Tie::Handle; use vars qw(@ISA $VERSION); @ISA = 'Tie::Handle'; -$VERSION = '4.2'; +$VERSION = '4.3'; =head1 NAME @@ -57,7 +57,7 @@ sub OPEN @_ == 2 ? open($_[0], $_[1]) : open($_[0], $_[1], $_[2]); } -sub READ { read($_[0],$_[1],$_[2]) } +sub READ { &CORE::read(shift, \shift, @_) } sub READLINE { my $fh = $_[0]; <$fh> } sub GETC { getc($_[0]) } diff --git a/gnu/usr.bin/perl/lib/Unicode/UCD.pm b/gnu/usr.bin/perl/lib/Unicode/UCD.pm index 724fb62785b..9c3dd7c7105 100644 --- a/gnu/usr.bin/perl/lib/Unicode/UCD.pm +++ b/gnu/usr.bin/perl/lib/Unicode/UCD.pm @@ -4,11 +4,8 @@ use strict; use warnings; no warnings 'surrogate'; # surrogates can be inputs to this use charnames (); -use Unicode::Normalize qw(getCombinClass NFD); -our $VERSION = '0.43'; - -use Storable qw(dclone); +our $VERSION = '0.51'; require Exporter; @@ -20,7 +17,7 @@ our @EXPORT_OK = qw(charinfo charinrange general_categories bidi_types compexcl - casefold casespec + casefold all_casefolds casespec namedseq num prop_aliases @@ -44,6 +41,9 @@ Unicode::UCD - Unicode character database use Unicode::UCD 'casefold'; my $casefold = casefold(0xFB00); + use Unicode::UCD 'all_casefolds'; + my $all_casefolds_ref = all_casefolds(); + use Unicode::UCD 'casespec'; my $casespec = casespec(0xFB00); @@ -104,8 +104,16 @@ a decimal or a hexadecimal scalar designating a Unicode code point, or C<U+> followed by hexadecimals designating a Unicode code point. In other words, if you want a code point to be interpreted as a hexadecimal number, you must prefix it with either C<0x> or C<U+>, because a string like e.g. C<123> will be -interpreted as a decimal code point. Note that the largest code point in -Unicode is U+10FFFF. +interpreted as a decimal code point. + +Examples: + + 223 # Decimal 223 + 0223 # Hexadecimal 223 (= 547 decimal) + 0xDF # Hexadecimal DF (= 223 decimal + U+DF # Hexadecimal DF + +Note that the largest code point in Unicode is U+10FFFF. =cut @@ -114,6 +122,7 @@ my $VERSIONFH; my $CASEFOLDFH; my $CASESPECFH; my $NAMEDSEQFH; +my $v_unicode_version; # v-string. sub openunicode { my ($rfh, @path) = @_; @@ -132,6 +141,35 @@ sub openunicode { return $f; } +sub _dclone ($) { # Use Storable::dclone if available; otherwise emulate it. + + use if defined &DynaLoader::boot_DynaLoader, Storable => qw(dclone); + + return dclone(shift) if defined &dclone; + + my $arg = shift; + my $type = ref $arg; + return $arg unless $type; # No deep cloning needed for scalars + + if ($type eq 'ARRAY') { + my @return; + foreach my $element (@$arg) { + push @return, &_dclone($element); + } + return \@return; + } + elsif ($type eq 'HASH') { + my %return; + foreach my $key (keys %$arg) { + $return{$key} = &_dclone($arg->{$key}); + } + return \%return; + } + else { + croak "_dclone can't handle " . $type; + } +} + =head2 B<charinfo()> use Unicode::UCD 'charinfo'; @@ -303,6 +341,7 @@ my %SIMPLE_LOWER; my %SIMPLE_TITLE; my %SIMPLE_UPPER; my %UNICODE_1_NAMES; +my %ISO_COMMENT; sub charinfo { @@ -315,6 +354,9 @@ sub charinfo { use feature 'unicode_strings'; + # Will fail if called under minitest + use if defined &DynaLoader::boot_DynaLoader, "Unicode::Normalize" => qw(getCombinClass NFD); + my $arg = shift; my $code = _getcode($arg); croak __PACKAGE__, "::charinfo: unknown code '$arg'" unless defined $code; @@ -353,7 +395,8 @@ sub charinfo { # Having no decomposition implies an empty field; otherwise, all but # "Canonical" imply a compatible decomposition, and the type is prefixed # to that, as it is in UnicodeData.txt - if ($char =~ /\p{Block=Hangul_Syllables}/) { + UnicodeVersion() unless defined $v_unicode_version; + if ($v_unicode_version ge v2.0.0 && $char =~ /\p{Block=Hangul_Syllables}/) { # The code points of the decomposition are output in standard Unicode # hex format, separated by blanks. $prop{'decomposition'} = join " ", map { sprintf("%04X", $_)} @@ -400,9 +443,16 @@ sub charinfo { %UNICODE_1_NAMES =_read_table("To/Na1.pl", "use_hash") unless %UNICODE_1_NAMES; $prop{'unicode10'} = $UNICODE_1_NAMES{$code} // ""; - # This is true starting in 6.0, but, num() also requires 6.0, so - # don't need to test for version again here. - $prop{'comment'} = ""; + UnicodeVersion() unless defined $v_unicode_version; + if ($v_unicode_version ge v6.0.0) { + $prop{'comment'} = ""; + } + else { + %ISO_COMMENT = _read_table("To/Isc.pl", "use_hash") unless %ISO_COMMENT; + $prop{'comment'} = (defined $ISO_COMMENT{$code}) + ? $ISO_COMMENT{$code} + : ""; + } %SIMPLE_UPPER = _read_table("To/Uc.pl", "use_hash") unless %SIMPLE_UPPER; $prop{'upper'} = (defined $SIMPLE_UPPER{$code}) @@ -536,7 +586,8 @@ With a L</code point argument> charblock() returns the I<block> the code point belongs to, e.g. C<Basic Latin>. The old-style block name is returned (see L</Old-style versus new-style block names>). If the code point is unassigned, this returns the block it would belong to if -it were assigned. +it were assigned. (If the Unicode version being used is so early as to not +have blocks, all code points are considered to be in C<No_Block>.) See also L</Blocks versus Scripts>. @@ -562,7 +613,13 @@ sub _charblocks { # Can't read from the mktables table because it loses the hyphens in the # original. unless (@BLOCKS) { - if (openunicode(\$BLOCKSFH, "Blocks.txt")) { + UnicodeVersion() unless defined $v_unicode_version; + if ($v_unicode_version lt v2.0.0) { + my $subrange = [ 0, 0x10FFFF, 'No_Block' ]; + push @BLOCKS, $subrange; + push @{$BLOCKS{'No_Block'}}, $subrange; + } + elsif (openunicode(\$BLOCKSFH, "Blocks.txt")) { local $_; local $/ = "\n"; while (<$BLOCKSFH>) { @@ -591,7 +648,7 @@ sub charblock { return 'No_Block'; } elsif (exists $BLOCKS{$arg}) { - return dclone $BLOCKS{$arg}; + return _dclone $BLOCKS{$arg}; } } @@ -607,7 +664,8 @@ sub charblock { With a L</code point argument> charscript() returns the I<script> the code point belongs to, e.g. C<Latin>, C<Greek>, C<Han>. -If the code point is unassigned, it returns C<"Unknown">. +If the code point is unassigned or the Unicode version being used is so early +that it doesn't have scripts, this function returns C<"Unknown">. If supplied with an argument that can't be a code point, charscript() tries to do the opposite and interpret the argument as a script name. The @@ -624,7 +682,15 @@ my @SCRIPTS; my %SCRIPTS; sub _charscripts { - @SCRIPTS =_read_table("To/Sc.pl") unless @SCRIPTS; + unless (@SCRIPTS) { + UnicodeVersion() unless defined $v_unicode_version; + if ($v_unicode_version lt v3.1.0) { + push @SCRIPTS, [ 0, 0x10FFFF, 'Unknown' ]; + } + else { + @SCRIPTS =_read_table("To/Sc.pl"); + } + } foreach my $entry (@SCRIPTS) { $entry->[2] =~ s/(_\w)/\L$1/g; # Preserve old-style casing push @{$SCRIPTS{$entry->[2]}}, $entry; @@ -643,7 +709,7 @@ sub charscript { return $result if defined $result; return $utf8::SwashInfo{'ToSc'}{'missing'}; } elsif (exists $SCRIPTS{$arg}) { - return dclone $SCRIPTS{$arg}; + return _dclone $SCRIPTS{$arg}; } return; @@ -670,7 +736,7 @@ See also L</Blocks versus Scripts>. sub charblocks { _charblocks() unless %BLOCKS; - return dclone \%BLOCKS; + return _dclone \%BLOCKS; } =head2 B<charscripts()> @@ -692,7 +758,7 @@ See also L</Blocks versus Scripts>. sub charscripts { _charscripts() unless %SCRIPTS; - return dclone \%SCRIPTS; + return _dclone \%SCRIPTS; } =head2 B<charinrange()> @@ -752,7 +818,7 @@ my %GENERAL_CATEGORIES = ); sub general_categories { - return dclone \%GENERAL_CATEGORIES; + return _dclone \%GENERAL_CATEGORIES; } =head2 B<general_categories()> @@ -820,7 +886,7 @@ the bidi type name. =cut sub bidi_types { - return dclone \%BIDI_TYPES; + return _dclone \%BIDI_TYPES; } =head2 B<compexcl()> @@ -829,7 +895,9 @@ sub bidi_types { my $compexcl = compexcl(0x09dc); -This routine is included for backwards compatibility, but as of Perl 5.12, for +This routine returns C<undef> if the Unicode version being used is so early +that it doesn't have this property. It is included for backwards +compatibility, but as of Perl 5.12 and more modern Unicode versions, for most purposes it is probably more convenient to use one of the following instead: @@ -864,6 +932,9 @@ sub compexcl { croak __PACKAGE__, "::compexcl: unknown code '$arg'" unless defined $code; + UnicodeVersion() unless defined $v_unicode_version; + return if $v_unicode_version lt v3.0.0; + no warnings "non_unicode"; # So works on non-Unicode code points return chr($code) =~ /\p{Composition_Exclusion}/; } @@ -943,12 +1014,12 @@ dotless lowercase i: =over -=item B<*> If you use this C<I> mapping +=item Z<>B<*> If you use this C<I> mapping the result is case-insensitive, but dotless and dotted I's are not distinguished -=item B<*> If you exclude this C<I> mapping +=item Z<>B<*> If you exclude this C<I> mapping the result is not fully case-insensitive, but dotless and dotted I's are distinguished @@ -997,54 +1068,88 @@ L<http://www.unicode.org/unicode/reports/tr21> my %CASEFOLD; sub _casefold { - unless (%CASEFOLD) { - if (openunicode(\$CASEFOLDFH, "CaseFolding.txt")) { - local $_; - local $/ = "\n"; - while (<$CASEFOLDFH>) { - if (/^([0-9A-F]+); ([CFIST]); ([0-9A-F]+(?: [0-9A-F]+)*);/) { - my $code = hex($1); - $CASEFOLD{$code}{'code'} = $1; - $CASEFOLD{$code}{'turkic'} = "" unless - defined $CASEFOLD{$code}{'turkic'}; - if ($2 eq 'C' || $2 eq 'I') { # 'I' is only on 3.1 and - # earlier Unicodes - # Both entries there (I - # only checked 3.1) are - # the same as C, and - # there are no other - # entries for those - # codepoints, so treat - # as if C, but override - # the turkic one for - # 'I'. - $CASEFOLD{$code}{'status'} = $2; - $CASEFOLD{$code}{'full'} = $CASEFOLD{$code}{'simple'} = - $CASEFOLD{$code}{'mapping'} = $3; - $CASEFOLD{$code}{'turkic'} = $3 if $2 eq 'I'; - } elsif ($2 eq 'F') { - $CASEFOLD{$code}{'full'} = $3; - unless (defined $CASEFOLD{$code}{'simple'}) { - $CASEFOLD{$code}{'simple'} = ""; - $CASEFOLD{$code}{'mapping'} = $3; - $CASEFOLD{$code}{'status'} = $2; - } - } elsif ($2 eq 'S') { + unless (%CASEFOLD) { # Populate the hash + my ($full_invlist_ref, $full_invmap_ref, undef, $default) + = prop_invmap('Case_Folding'); + + # Use the recipe given in the prop_invmap() pod to convert the + # inversion map into the hash. + for my $i (0 .. @$full_invlist_ref - 1 - 1) { + next if $full_invmap_ref->[$i] == $default; + my $adjust = -1; + for my $j ($full_invlist_ref->[$i] .. $full_invlist_ref->[$i+1] -1) { + $adjust++; + if (! ref $full_invmap_ref->[$i]) { + + # This is a single character mapping + $CASEFOLD{$j}{'status'} = 'C'; + $CASEFOLD{$j}{'simple'} + = $CASEFOLD{$j}{'full'} + = $CASEFOLD{$j}{'mapping'} + = sprintf("%04X", $full_invmap_ref->[$i] + $adjust); + $CASEFOLD{$j}{'code'} = sprintf("%04X", $j); + $CASEFOLD{$j}{'turkic'} = ""; + } + else { # prop_invmap ensures that $adjust is 0 for a ref + $CASEFOLD{$j}{'status'} = 'F'; + $CASEFOLD{$j}{'full'} + = $CASEFOLD{$j}{'mapping'} + = join " ", map { sprintf "%04X", $_ } + @{$full_invmap_ref->[$i]}; + $CASEFOLD{$j}{'simple'} = ""; + $CASEFOLD{$j}{'code'} = sprintf("%04X", $j); + $CASEFOLD{$j}{'turkic'} = ""; + } + } + } + # We have filled in the full mappings above, assuming there were no + # simple ones for the ones with multi-character maps. Now, we find + # and fix the cases where that assumption was false. + (my ($simple_invlist_ref, $simple_invmap_ref, undef), $default) + = prop_invmap('Simple_Case_Folding'); + for my $i (0 .. @$simple_invlist_ref - 1 - 1) { + next if $simple_invmap_ref->[$i] == $default; + my $adjust = -1; + for my $j ($simple_invlist_ref->[$i] + .. $simple_invlist_ref->[$i+1] -1) + { + $adjust++; + next if $CASEFOLD{$j}{'status'} eq 'C'; + $CASEFOLD{$j}{'status'} = 'S'; + $CASEFOLD{$j}{'simple'} + = $CASEFOLD{$j}{'mapping'} + = sprintf("%04X", $simple_invmap_ref->[$i] + $adjust); + $CASEFOLD{$j}{'code'} = sprintf("%04X", $j); + $CASEFOLD{$j}{'turkic'} = ""; + } + } - # There can't be a simple without a full, and simple - # overrides all but full + # We hard-code in the turkish rules + UnicodeVersion() unless defined $v_unicode_version; + if ($v_unicode_version ge v3.2.0) { - $CASEFOLD{$code}{'simple'} = $3; - $CASEFOLD{$code}{'mapping'} = $3; - $CASEFOLD{$code}{'status'} = $2; - } elsif ($2 eq 'T') { - $CASEFOLD{$code}{'turkic'} = $3; - } # else can't happen because only [CIFST] are possible - } - } - close($CASEFOLDFH); - } + # These two code points should already have regular entries, so + # just fill in the turkish fields + $CASEFOLD{ord('I')}{'turkic'} = '0131'; + $CASEFOLD{0x130}{'turkic'} = sprintf "%04X", ord('i'); + } + elsif ($v_unicode_version ge v3.1.0) { + + # These two code points don't have entries otherwise. + $CASEFOLD{0x130}{'code'} = '0130'; + $CASEFOLD{0x131}{'code'} = '0131'; + $CASEFOLD{0x130}{'status'} = $CASEFOLD{0x131}{'status'} = 'I'; + $CASEFOLD{0x130}{'turkic'} + = $CASEFOLD{0x130}{'mapping'} + = $CASEFOLD{0x130}{'full'} + = $CASEFOLD{0x130}{'simple'} + = $CASEFOLD{0x131}{'turkic'} + = $CASEFOLD{0x131}{'mapping'} + = $CASEFOLD{0x131}{'full'} + = $CASEFOLD{0x131}{'simple'} + = sprintf "%04X", ord('i'); + } } } @@ -1059,6 +1164,55 @@ sub casefold { return $CASEFOLD{$code}; } +=head2 B<all_casefolds()> + + + use Unicode::UCD 'all_casefolds'; + + my $all_folds_ref = all_casefolds(); + foreach my $char_with_casefold (sort { $a <=> $b } + keys %$all_folds_ref) + { + printf "%04X:", $char_with_casefold; + my $casefold = $all_folds_ref->{$char_with_casefold}; + + # Get folds for $char_with_casefold + + my @full_fold_hex = split / /, $casefold->{'full'}; + my $full_fold_string = + join "", map {chr(hex($_))} @full_fold_hex; + print " full=", join " ", @full_fold_hex; + my @turkic_fold_hex = + split / /, ($casefold->{'turkic'} ne "") + ? $casefold->{'turkic'} + : $casefold->{'full'}; + my $turkic_fold_string = + join "", map {chr(hex($_))} @turkic_fold_hex; + print "; turkic=", join " ", @turkic_fold_hex; + if (defined $casefold && $casefold->{'simple'} ne "") { + my $simple_fold_hex = $casefold->{'simple'}; + my $simple_fold_string = chr(hex($simple_fold_hex)); + print "; simple=$simple_fold_hex"; + } + print "\n"; + } + +This returns all the case foldings in the current version of Unicode in the +form of a reference to a hash. Each key to the hash is the decimal +representation of a Unicode character that has a casefold to other than +itself. The casefold of a semi-colon is itself, so it isn't in the hash; +likewise for a lowercase "a", but there is an entry for a capital "A". The +hash value for each key is another hash, identical to what is returned by +L</casefold()> if called with that code point as its argument. So the value +C<< all_casefolds()->{ord("A")}' >> is equivalent to C<casefold(ord("A"))>; + +=cut + +sub all_casefolds () { + _casefold() unless %CASEFOLD; + return _dclone \%CASEFOLD; +} + =head2 B<casespec()> use Unicode::UCD 'casespec'; @@ -1161,15 +1315,25 @@ my %CASESPEC; sub _casespec { unless (%CASESPEC) { - if (openunicode(\$CASESPECFH, "SpecialCasing.txt")) { + UnicodeVersion() unless defined $v_unicode_version; + if ($v_unicode_version lt v2.1.8) { + %CASESPEC = {}; + } + elsif (openunicode(\$CASESPECFH, "SpecialCasing.txt")) { local $_; local $/ = "\n"; while (<$CASESPECFH>) { if (/^([0-9A-F]+); ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; (\w+(?: \w+)*)?/) { + my ($hexcode, $lower, $title, $upper, $condition) = ($1, $2, $3, $4, $5); my $code = hex($hexcode); - if (exists $CASESPEC{$code}) { + + # In 2.1.8, there were duplicate entries; ignore all but + # the first one -- there were no conditions in the file + # anyway. + if (exists $CASESPEC{$code} && $v_unicode_version ne v2.1.8) + { if (exists $CASESPEC{$code}->{code}) { my ($oldlower, $oldtitle, @@ -1222,7 +1386,7 @@ sub casespec { _casespec() unless %CASESPEC; - return ref $CASESPEC{$code} ? dclone $CASESPEC{$code} : $CASESPEC{$code}; + return ref $CASESPEC{$code} ? _dclone $CASESPEC{$code} : $CASESPEC{$code}; } =head2 B<namedseq()> @@ -1307,16 +1471,6 @@ sub namedseq { my %NUMERIC; sub _numeric { - - # Unicode 6.0 instituted the rule that only digits in a consecutive - # block of 10 would be considered decimal digits. Before that, the only - # problematic code point that I'm (khw) aware of is U+019DA, NEW TAI LUE - # THAM DIGIT ONE, which is an alternate form of U+019D1, NEW TAI LUE DIGIT - # ONE. The code could be modified to handle that, but not bothering, as - # in TUS 6.0, U+19DA was changed to Nt=Di. - if ((pack "C*", split /\./, UnicodeVersion()) lt 6.0.0) { - croak __PACKAGE__, "::num requires Unicode 6.0 or greater" - } my @numbers = _read_table("To/Nv.pl"); foreach my $entry (@numbers) { my ($start, $end, $value) = @$entry; @@ -1427,14 +1581,43 @@ sub num { return if $string =~ /\D/; my $first_ord = ord(substr($string, 0, 1)); my $value = $NUMERIC{$first_ord}; + + # To be a valid decimal number, it should be in a block of 10 consecutive + # characters, whose values are 0, 1, 2, ... 9. Therefore this digit's + # value is its offset in that block from the character that means zero. my $zero_ord = $first_ord - $value; + # Unicode 6.0 instituted the rule that only digits in a consecutive + # block of 10 would be considered decimal digits. If this is an earlier + # release, we verify that this first character is a member of such a + # block. That is, that the block of characters surrounding this one + # consists of all \d characters whose numeric values are the expected + # ones. + UnicodeVersion() unless defined $v_unicode_version; + if ($v_unicode_version lt v6.0.0) { + for my $i (0 .. 9) { + my $ord = $zero_ord + $i; + return unless chr($ord) =~ /\d/; + my $numeric = $NUMERIC{$ord}; + return unless defined $numeric; + return unless $numeric == $i; + } + } + for my $i (1 .. $length -1) { + + # Here we know either by verifying, or by fact of the first character + # being a \d in Unicode 6.0 or later, that any character between the + # character that means 0, and 9 positions above it must be \d, and + # must have its value correspond to its offset from the zero. Any + # characters outside these 10 do not form a legal number for this + # function. my $ord = ord(substr($string, $i, 1)); my $digit = $ord - $zero_ord; return unless $digit >= 0 && $digit <= 9; $value = $value * 10 + $digit; } + return $value; } @@ -1676,7 +1859,7 @@ sub prop_aliases ($) { # The full name is in element 1. return $list_ref->[1] unless wantarray; - return @{dclone $list_ref}; + return @{_dclone $list_ref}; } =pod @@ -1815,7 +1998,7 @@ sub prop_value_aliases ($$) { # The full name is in element 1. return $list_ref->[1] unless wantarray; - return @{dclone $list_ref}; + return @{_dclone $list_ref}; } return $list_ref->[0] unless wantarray; @@ -1842,7 +2025,8 @@ by the input parameter string: prints: 0, 1114112 -An empty list is returned if the input is unknown; the number of elements in +If the input is unknown C<undef> is returned in scalar context; an empty-list +in list context. If the input is known, the number of elements in the list is returned if called in scalar context. L<perluniprops|perluniprops/Properties accessible through \p{} and \P{}> gives @@ -1952,8 +2136,12 @@ properties, and will return C<undef> if called with one of those. our %loose_defaults; our $MAX_UNICODE_CODEPOINT; -sub prop_invlist ($) { +sub prop_invlist ($;$) { my $prop = $_[0]; + + # Undocumented way to get at Perl internal properties + my $internal_ok = defined $_[1] && $_[1] eq '_perl_core_internal_ok'; + return if ! defined $prop; require "utf8_heavy.pl"; @@ -1970,7 +2158,7 @@ sub prop_invlist ($) { || ref $swash eq "" || $swash->{'BITS'} != 1 || $swash->{'USER_DEFINED'} - || $prop =~ /^\s*_/; + || (! $internal_ok && $prop =~ /^\s*_/); if ($swash->{'EXTRAS'}) { carp __PACKAGE__, "::prop_invlist: swash returned for $prop unexpectedly has EXTRAS magic"; @@ -2075,7 +2263,8 @@ sub prop_invlist ($) { sub _search_invlist { # Find the range in the inversion list which contains a code point; that - # is, find i such that l[i] <= code_point < l[i+1] + # is, find i such that l[i] <= code_point < l[i+1]. Returns undef if no + # such i. # If this is ever made public, could use to speed up .t specials. Would # need to use code point argument, as in other functions in this pm @@ -2085,7 +2274,10 @@ sub _search_invlist { # Verify non-neg numeric XXX my $max_element = @$list_ref - 1; - return if ! $max_element < 0; # Undef if list is empty. + + # Return undef if list is empty or requested item is before the first element. + return if $max_element < 0; + return if $code_point < $list_ref->[0]; # Short cut something at the far-end of the table. This also allows us to # refer to element [$i+1] without fear of being out-of-bounds in the loop @@ -2431,7 +2623,7 @@ means that all the elements of the map array are either rational numbers or the string C<"NaN">, meaning "Not a Number". A rational number is either an integer, or two integers separated by a solidus (C<"/">). The second integer represents the denominator of the division implied by the solidus, and is -actually always positive, so it is guaranteed not to be 0 and to not to be +actually always positive, so it is guaranteed not to be 0 and to not be signed. When the element is a plain integer (without the solidus), it may need to be adjusted to get the correct value by adding the offset, just as other C<"a"> properties. No adjustment is needed for @@ -2443,7 +2635,7 @@ can use something like this: my ($invlist_ref, $invmap_ref, $format) = prop_invmap($property); if ($format && $format eq "ar") { - map { $_ = eval $_ } @$invmap_ref; + map { $_ = eval $_ if $_ ne 'NaN' } @$map_ref; } Here's some entries from the output of the property "Nv", which has format @@ -2665,7 +2857,7 @@ RETRY: $prop = "age"; goto RETRY; } - elsif ($second_try =~ / ^ s ( cf | [ltu] c ) $ /x) { + elsif ($second_try =~ / ^ s ( cf | fc | [ltu] c ) $ /x) { # These properties use just the LIST part of the full mapping, # which includes the simple maps that are otherwise overridden by @@ -2674,7 +2866,11 @@ RETRY: $overrides = -1; # The full name is the simple name stripped of its initial 's' - $prop = $second_try =~ s/^s//r; + $prop = $1; + + # .. except for this case + $prop = 'cf' if $prop eq 'fc'; + goto RETRY; } elsif ($second_try eq "blk") { @@ -2733,7 +2929,7 @@ RETRY: my ($hex_code_point, $name) = split "\t", $line; # Weeds out all comments, blank lines, and named sequences - next if $hex_code_point =~ /\P{ASCII_HEX_DIGIT}/; + next if $hex_code_point =~ /[^[:xdigit:]]/a; my $code_point = hex $hex_code_point; @@ -2791,8 +2987,11 @@ RETRY: $decomps{'LIST'} = ""; # This property has one special range not in the file: for the - # hangul syllables - my $done_hangul = 0; # Have we done the hangul range. + # hangul syllables. But not in Unicode version 1. + UnicodeVersion() unless defined $v_unicode_version; + my $done_hangul = ($v_unicode_version lt v2.0.0) + ? 1 + : 0; # Have we done the hangul range ? foreach my $line (split "\n", $original) { my ($hex_lower, $hex_upper, $type_and_map) = split "\t", $line; my $code_point = hex $hex_lower; @@ -2822,6 +3021,12 @@ RETRY: : "<hangul syllable>"; } + if ($value =~ / / && $hex_upper ne "" && $hex_upper ne $hex_lower) { + $line = sprintf("%04X\t%s\t%s", hex($hex_lower) + 1, $hex_upper, $value); + $hex_upper = ""; + $redo = 1; + } + # And append this to our constructed LIST. $decomps{'LIST'} .= "$hex_lower\t$hex_upper\t$value\n"; @@ -2863,8 +3068,8 @@ RETRY: } else { - # These should all single-element ranges. - croak __PACKAGE__, "::prop_invmap: Not expecting a mapping with multiple code points in a multi-element range, $ranges[$i]" if $hex_end ne ""; + # These should all be single-element ranges. + croak __PACKAGE__, "::prop_invmap: Not expecting a mapping with multiple code points in a multi-element range, $ranges[$i]" if $hex_end ne "" && $hex_end ne $hex_begin; # Convert them to decimal, as that's what's expected. $list .= "$hex_begin\t\t" @@ -3334,6 +3539,7 @@ sub UnicodeVersion { croak __PACKAGE__, "::VERSION: strange version '$UNICODEVERSION'" unless $UNICODEVERSION =~ /^\d+(?:\.\d+)+$/; } + $v_unicode_version = pack "C*", split /\./, $UNICODEVERSION; return $UNICODEVERSION; } @@ -3342,7 +3548,8 @@ sub UnicodeVersion { The difference between a block and a script is that scripts are closer to the linguistic notion of a set of code points required to present languages, while block is more of an artifact of the Unicode code point -numbering and separation into blocks of (mostly) 256 code points. +numbering and separation into blocks of consecutive code points (so far the +size of a block is some multiple of 16, like 128 or 256). For example the Latin B<script> is spread over several B<blocks>, such as C<Basic Latin>, C<Latin 1 Supplement>, C<Latin Extended-A>, and diff --git a/gnu/usr.bin/perl/lib/Unicode/UCD.t b/gnu/usr.bin/perl/lib/Unicode/UCD.t index 2e5a741f0f9..e070defbeae 100644 --- a/gnu/usr.bin/perl/lib/Unicode/UCD.t +++ b/gnu/usr.bin/perl/lib/Unicode/UCD.t @@ -19,7 +19,8 @@ use Test::More; use Unicode::UCD 'charinfo'; -$/ = 7; +my $input_record_separator = 7; # Make sure Unicode::UCD isn't affected by +$/ = $input_record_separator; # setting this. my $charinfo; @@ -342,7 +343,7 @@ is($bt->{AL}, 'Right-to-Left Arabic', 'AL is Right-to-Left Arabic'); # If this fails, then maybe one should look at the Unicode changes to see # what else might need to be updated. -is(Unicode::UCD::UnicodeVersion, '6.1.0', 'UnicodeVersion'); +is(Unicode::UCD::UnicodeVersion, '6.2.0', 'UnicodeVersion'); use Unicode::UCD qw(compexcl); @@ -373,9 +374,9 @@ is($casefold->{full}, '0073 0073', 'casefold 0xDF full'); is($casefold->{simple}, "", 'casefold 0xDF simple'); is($casefold->{turkic}, "", 'casefold 0xDF turkic'); -# Do different tests depending on if version <= 3.1, or not. -(my $version = Unicode::UCD::UnicodeVersion) =~ /^(\d+)\.(\d+)/; -if (defined $1 && ($1 <= 2 || $1 == 3 && defined $2 && $2 <= 1)) { +# Do different tests depending on if version < 3.2, or not. +my $v_unicode_version = pack "C*", split /\./, Unicode::UCD::UnicodeVersion(); +if ($v_unicode_version lt v3.2.0) { $casefold = casefold(0x130); is($casefold->{code}, '0130', 'casefold 0x130 code'); @@ -469,11 +470,13 @@ is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)"); { my $r1 = charscript('Latin'); - my $n1 = @$r1; - is($n1, 30, "number of ranges in Latin script (Unicode 6.1.0)"); - shift @$r1 while @$r1; - my $r2 = charscript('Latin'); - is(@$r2, $n1, "modifying results should not mess up internal caches"); + if (ok(defined $r1, "Found Latin script")) { + my $n1 = @$r1; + is($n1, 30, "number of ranges in Latin script (Unicode 6.1.0)"); + shift @$r1 while @$r1; + my $r2 = charscript('Latin'); + is(@$r2, $n1, "modifying results should not mess up internal caches"); + } } { @@ -550,14 +553,18 @@ is_deeply(\@list, ], "prop_aliases('perldecimaldigit') returns Perl_Decimal_Digit as both short and full names"); # Get the official Unicode property name synonyms and test them. + +SKIP: { +skip "PropertyAliases.txt is not in this Unicode version", 1 if $v_unicode_version lt v3.2.0; open my $props, "<", "../lib/unicore/PropertyAliases.txt" or die "Can't open Unicode PropertyAliases.txt"; -$/ = "\n"; +local $/ = "\n"; while (<$props>) { s/\s*#.*//; # Remove comments next if /^\s* $/x; # Ignore empty and comment lines chomp; + local $/ = $input_record_separator; my $count = 0; # 0th field in line is short name; 1th is long name my $short_name; my $full_name; @@ -615,6 +622,7 @@ while (<$props>) { $count++; } } +} # End of SKIP block # Now test anything we can find that wasn't covered by the tests of the # official properties. We have no way of knowing if mktables omitted a Perl @@ -701,15 +709,20 @@ is(prop_value_aliases("gc", "isC"), undef, "prop_value_aliases('gc', 'isC') retu # correct. my %pva_tested; # List of things already tested. + +SKIP: { +skip "PropValueAliases.txt is not in this Unicode version", 1 if $v_unicode_version lt v3.2.0; open my $propvalues, "<", "../lib/unicore/PropValueAliases.txt" or die "Can't open Unicode PropValueAliases.txt"; +local $/ = "\n"; while (<$propvalues>) { s/\s*#.*//; # Remove comments next if /^\s* $/x; # Ignore empty and comment lines chomp; + local $/ = $input_record_separator; # Fix typo in official input file - s/CCC133/CCC132/g if $version eq "6.1.0"; + s/CCC133/CCC132/g if $v_unicode_version eq v6.1.0; my @fields = split /\s*;\s*/; # Fields are separated by semi-colons my $prop = shift @fields; # 0th field is the property, @@ -801,6 +814,7 @@ while (<$propvalues>) { $count++; } } +} # End of SKIP block # And test as best we can, the non-official pva's that mktables generates. foreach my $hash (\%utf8::loose_to_file_of, \%utf8::stricter_to_file_of) { @@ -953,6 +967,7 @@ sub fail_with_diff ($$$$) { require File::Temp; my $off = File::Temp->new(); + local $/ = "\n"; chomp $official; print $off $official, "\n"; close $off || die "Can't close official"; @@ -1037,7 +1052,9 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of # Get rid of any trailing space and comments in the file. $official =~ s/\s*(#.*)?$//mg; + local $/ = "\n"; chomp $official; + $/ = $input_record_separator; # If we are to test against an inverted file, it is easier to invert # our array than the file. @@ -1091,7 +1108,9 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of if ($i == @tested - 1 && $tested[$i] <= 0x10FFFF) { $tested .= sprintf("%04X\t10FFFF\n", $tested[$i]); } + local $/ = "\n"; chomp $tested; + $/ = $input_record_separator; if ($tested ne $official) { fail_with_diff($mod_table, $official, $tested, "prop_invlist"); next; @@ -1407,11 +1426,15 @@ foreach my $prop (keys %props) { } } } + local $/ = "\n"; chomp $official; + $/ = $input_record_separator; - # If there are any special elements, get a reference to them. + # Get the format for the file, and if there are any special elements, + # get a reference to them. my $swash_name = $utf8::file_to_swash_name{$base_file}; my $specials_ref; + my $file_format; if ($swash_name) { $specials_ref = $utf8::SwashInfo{$swash_name}{'specials_name'}; if ($specials_ref) { @@ -1420,6 +1443,8 @@ foreach my $prop (keys %props) { no strict 'refs'; $specials_ref = \%{$specials_ref}; } + + $file_format = $utf8::SwashInfo{$swash_name}{'format'}; } # Certain of the proxy properties have to be adjusted to match the @@ -1512,15 +1537,14 @@ foreach my $prop (keys %props) { # specials are superfluous. undef $specials_ref; } - elsif ($name eq 'bmg') { + elsif ($format !~ /^a/ && defined $file_format && $file_format eq 'x') { - # For this property, the file is output using hex notation for the - # map, with all ranges equal to length 1. Convert from hex to - # decimal. + # For these properties the file is output using hex notation for the + # map. Convert from hex to decimal. my @lines = split "\n", $official; foreach my $line (@lines) { - my ($code_point, $map) = split "\t\t", $line; - $line = $code_point . "\t\t" . hex $map; + my ($lower, $upper, $map) = split "\t", $line; + $line = "$lower\t$upper\t" . hex $map; } $official = join "\n", @lines; } @@ -1731,7 +1755,9 @@ foreach my $prop (keys %props) { # Here are done with generating what the file should look like + local $/ = "\n"; chomp $tested_map; + $/ = $input_record_separator; # And compare. if ($tested_map ne $official) { @@ -1801,7 +1827,9 @@ foreach my $prop (keys %props) { $official =~ s/$hex_code_point \t $alias \n //x; } } + local $/ = "\n"; chomp $official; + $/ = $input_record_separator; # Here have adjusted the file. We also have to adjust the returned # inversion map by checking and deleting all the lines in it that @@ -1889,7 +1917,9 @@ foreach my $prop (keys %props) { # Finished creating the string from the inversion map. Can compare # with what the file is. + local $/ = "\n"; chomp $tested_map; + $/ = $input_record_separator; if ($tested_map ne $official) { fail_with_diff($mod_prop, $official, $tested_map, "prop_invmap"); next PROPERTY; @@ -1996,4 +2026,5 @@ foreach my $prop (keys %props) { pass("prop_invmap('$mod_prop')"); } +ok($/ eq $input_record_separator, "The record separator didn't get overridden"); done_testing(); diff --git a/gnu/usr.bin/perl/lib/_charnames.pm b/gnu/usr.bin/perl/lib/_charnames.pm index 62ee39560de..4ab9411f8ea 100644 --- a/gnu/usr.bin/perl/lib/_charnames.pm +++ b/gnu/usr.bin/perl/lib/_charnames.pm @@ -7,7 +7,7 @@ package _charnames; use strict; use warnings; use File::Spec; -our $VERSION = '1.31'; +our $VERSION = '1.36'; use unicore::Name; # mktables-generated algorithmically-defined names use bytes (); # for $bytes::hint_bits @@ -77,11 +77,8 @@ my %system_aliases = ( #my %loose_system_aliases = ( #); -my %deprecated_aliases = ( - # Use of these gives deprecated message. - # Unicode 6.0 co-opted this for U+1F514, so deprecate it for now. - 'BELL' => pack("U", 0x07), -); +#my %deprecated_aliases; +#$deprecated_aliases{'BELL'} = pack("U", 0x07) if $^V lt v5.17.0; #my %loose_deprecated_aliases = ( #); @@ -146,8 +143,12 @@ sub carp sub alias (@) # Set up a single alias { + my @errors; + my $alias = ref $_[0] ? $_[0] : { @_ }; - foreach my $name (keys %$alias) { + foreach my $name (sort keys %$alias) { # Sort only because it helps having + # deterministic output for + # t/lib/charnames/alias my $value = $alias->{$name}; next unless defined $value; # Omit if screwed up. @@ -159,18 +160,53 @@ sub alias (@) # Set up a single alias $value = CORE::hex $1; } if ($value =~ $decimal_qr) { - no warnings qw(non_unicode surrogate nonchar); # Allow any non-malformed + no warnings qw(non_unicode surrogate nonchar); # Allow any of these $^H{charnames_ord_aliases}{$name} = pack("U", $value); # Use a canonical form. $^H{charnames_inverse_ords}{sprintf("%05X", $value)} = $name; } else { - # XXX validate syntax when deprecation cycle complete. ie. start - # with an alpha only, etc. - $^H{charnames_name_aliases}{$name} = $value; + # This regex needs to be sync'd with the code in toke.c that checks + # for the same thing + if ($name !~ / ^ + \p{_Perl_Charname_Begin} + \p{_Perl_Charname_Continue}* + $ /x) { + + push @errors, $name; + } + else { + $^H{charnames_name_aliases}{$name} = $value; + + if (warnings::enabled('deprecated')) { + if ($name =~ / ( .* \s ) ( \s* ) $ /x) { + carp "Trailing white-space in a charnames alias definition is deprecated; marked by <-- HERE in '$1 <-- HERE " . $2 . "'"; + } + + # Use '+' instead of '*' in this regex, because any trailing + # blanks have already been warned about. + if ($name =~ / ( .*? \s{2} ) ( .+ ) /x) { + carp "A sequence of multiple spaces in a charnames alias definition is deprecated; marked by <-- HERE in '$1 <-- HERE " . $2 . "'"; + } + } + } } } + + # We find and output all errors from this :alias definition, rather than + # failing on the first one, so fewer runs are needed to get it to compile + if (@errors) { + foreach my $name (@errors) { + my $ok = ""; + $ok = $1 if $name =~ / ^ ( \p{Alpha} [-\p{XPosixWord} ():\xa0]* ) /x; + my $first_bad = substr($name, length($ok), 1); + $name = "Invalid character in charnames alias definition; marked by <-- HERE in '$ok$first_bad<-- HERE " . substr($name, length($ok) + 1) . "'"; + } + croak join "\n", @errors; + } + + return; } # alias sub not_legal_use_bytes_msg { @@ -191,11 +227,11 @@ sub alias_file ($) # Reads a file containing alias definitions if (-f $arg && File::Spec->file_name_is_absolute ($arg)) { $file = $arg; } - elsif ($arg =~ m/^\w+$/) { + elsif ($arg =~ m/ ^ \p{_Perl_IDStart} \p{_Perl_IDCont}* $/x) { $file = "unicore/${arg}_alias.pl"; } else { - croak "Charnames alias files can only have identifier characters"; + croak "Charnames alias file names can only have identifier characters"; } if (my @alias = do $file) { @alias == 1 && !defined $alias[0] and @@ -320,14 +356,14 @@ sub lookup_name ($$$) { # elsif ($loose && exists $loose_system_aliases{$lookup_name}) { # $utf8 = $loose_system_aliases{$lookup_name}; # } - if (exists $deprecated_aliases{$lookup_name}) { - require warnings; - warnings::warnif('deprecated', - "Unicode character name \"$name\" is deprecated, use \"" - . viacode(ord $deprecated_aliases{$lookup_name}) - . "\" instead"); - $utf8 = $deprecated_aliases{$lookup_name}; - } +# if (exists $deprecated_aliases{$lookup_name}) { +# require warnings; +# warnings::warnif('deprecated', +# "Unicode character name \"$name\" is deprecated, use \"" +# . viacode(ord $deprecated_aliases{$lookup_name}) +# . "\" instead"); +# $utf8 = $deprecated_aliases{$lookup_name}; +# } # There are currently no entries in this hash, so don't waste time looking # for them. But the code is retained for the unlikely possibility that # some will be added in the future. @@ -457,18 +493,8 @@ sub lookup_name ($$$) { } my $case = $name_has_uppercase ? "CAPITAL" : "SMALL"; - if (! $scripts_trie - || $txt !~ - /\t (?: $scripts_trie ) \ (?:$case\ )? LETTER \ \U$lookup_name $/xm) - { - # Here we still don't have it, give up. - return if $runtime; - - # May have zapped input name, get it again. - $name = (defined $save_input) ? $save_input : $_[0]; - carp "Unknown charname '$name'"; - return ($wants_ord) ? 0xFFFD : pack("U", 0xFFFD); - } + return if (! $scripts_trie || $txt !~ + /\t (?: $scripts_trie ) \ (?:$case\ )? LETTER \ \U$lookup_name $/xm); # Here have found the input name in the table. @off = ($-[0] + 1, $+[0]); # The 1 is for the tab @@ -599,6 +625,7 @@ sub import ref $alias eq "HASH" or croak "Only HASH reference supported as argument to :alias"; alias ($alias); + $promote = 1; next; } if ($alias =~ m{:(\w+)$}) { @@ -607,7 +634,7 @@ sub import alias_file ($1) and $promote = 1; next; } - alias_file ($alias); + alias_file ($alias) and $promote = 1; next; } if (substr($arg, 0, 1) eq ':' @@ -752,7 +779,9 @@ sub viacode { # Here there is no user-defined alias, return any official one. return $return if defined $return; - if (CORE::hex($hex) > 0x10FFFF) { + if (CORE::hex($hex) > 0x10FFFF + && warnings::enabled('non_unicode')) + { carp "Unicode characters only allocated up to U+10FFFF (you asked for U+$hex)"; } return; diff --git a/gnu/usr.bin/perl/lib/charnames.pm b/gnu/usr.bin/perl/lib/charnames.pm index 495c30342fd..07ffe80a130 100644 --- a/gnu/usr.bin/perl/lib/charnames.pm +++ b/gnu/usr.bin/perl/lib/charnames.pm @@ -1,7 +1,7 @@ package charnames; use strict; use warnings; -our $VERSION = '1.30'; +our $VERSION = '1.36'; use unicore::Name; # mktables-generated algorithmically-defined names use _charnames (); # The submodule for this where most of the work gets done @@ -88,6 +88,8 @@ sub string_vianame { 1; __END__ +=encoding utf8 + =head1 NAME charnames - access to Unicode character names and named character sequences; also define character names @@ -110,12 +112,16 @@ charnames - access to Unicode character names and named character sequences; als use charnames qw(cyrillic greek); print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n"; + use utf8; use charnames ":full", ":alias" => { e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE", mychar => 0xE8000, # Private use area + "自転車ã«ä¹—る人" => "BICYCLIST" }; print "\N{e_ACUTE} is a small letter e with an acute.\n"; print "\N{mychar} allows me to name private use characters.\n"; + print "And I can create synonyms in other languages,", + " such as \N{自転車ã«ä¹—る人} for "BICYCLIST (U+1F6B4)\n"; use charnames (); print charnames::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE" @@ -211,11 +217,11 @@ use variables inside the C<\N{...}>. If you want similar run-time functionality, use L<charnames::string_vianame()|/charnames::string_vianame(I<name>)>. -Since Unicode 6.0, it is deprecated to use C<BELL>. Instead use C<ALERT> (but -C<BEL> will continue to work). +Note, starting in Perl 5.18, the name C<BELL> refers to the Unicode character +U+1F514, instead of the traditional U+0007. For the latter, use C<ALERT> +or C<BEL>. -If the input name is unknown, C<\N{NAME}> raises a warning and -substitutes the Unicode REPLACEMENT CHARACTER (U+FFFD). +It is a syntax error to use C<\N{NAME}> where C<NAME> is unknown. For C<\N{NAME}>, it is a fatal error if C<use bytes> is in effect and the input name is that of a character that won't fit into a byte (i.e., whose @@ -248,8 +254,8 @@ C<:loose> slows down look-ups by a factor of 2 to 3 versus C<:full>, but the trade-off may be worth it to you. Each individual look-up takes very little time, and the results are cached, so the speed difference would become a factor only in programs that do look-ups of many different -spellings, and probably only when those look-ups are through vianame() and -string_vianame(), since C<\N{...}> look-ups are done at compile time. +spellings, and probably only when those look-ups are through C<vianame()> and +C<string_vianame()>, since C<\N{...}> look-ups are done at compile time. =head1 ALIASES @@ -270,13 +276,19 @@ conventions. The aliases override any standard definitions, so, if you're twisted enough, you can change C<"\N{LATIN CAPITAL LETTER A}"> to mean C<"B">, etc. -Note that an alias should not be something that is a legal curly -brace-enclosed quantifier (see L<perlreref/QUANTIFIERS>). For example -C<\N{123}> means to match 123 non-newline characters, and is not treated as a -charnames alias. Aliases are discouraged from beginning with anything -other than an alphabetic character and from containing anything other -than alphanumerics, spaces, dashes, parentheses, and underscores. -Currently they must be ASCII. +Aliases must begin with a character that is alphabetic. After that, each may +contain any combination of word (C<\w>) characters, SPACE (U+0020), +HYPHEN-MINUS (U+002D), LEFT PARENTHESIS (U+0028), RIGHT PARENTHESIS (U+0029), +and NO-BREAK SPACE (U+00A0). These last three should never have been allowed +in names, and are retained for backwards compatibility only; they may be +deprecated and removed in future releases of Perl, so don't use them for new +names. (More precisely, the first character of a name you specify must be +something that matches all of C<\p{ID_Start}>, C<\p{Alphabetic}>, and +C<\p{Gc=Letter}>. This makes sure it is what any reasonable person would view +as an alphabetic character. And, the continuation characters that match C<\w> +must also match C<\p{ID_Continue}>.) Starting with Perl v5.18, any Unicode +characters meeting the above criteria may be used; prior to that only +Latin1-range characters were acceptable. An alias can map to either an official Unicode character name (not a loose matched name) or to a @@ -337,14 +349,15 @@ controlling C<"use charnames"> in the same scope apply, like C<:loose> or any L<script list, C<:short> option|/DESCRIPTION>, or L<custom aliases|/CUSTOM ALIASES> you may have defined. -The only difference is that if the input name is unknown, C<string_vianame> -returns C<undef> instead of the REPLACEMENT CHARACTER and does not raise a -warning message. +The only differences are due to the fact that C<string_vianame> is run-time +and C<\N{}> is compile time. You can't interpolate inside a C<\N{}>, (so +C<\N{$variable}> doesn't work); and if the input name is unknown, +C<string_vianame> returns C<undef> instead of it being a syntax error. =head1 charnames::vianame(I<name>) This is similar to C<string_vianame>. The main difference is that under most -circumstances, vianame returns an ordinal code +circumstances, C<vianame> returns an ordinal code point, whereas C<string_vianame> returns a string. For example, printf "U+%04X", charnames::vianame("FOUR TEARDROP-SPOKED ASTERISK"); @@ -379,7 +392,7 @@ If you define more than one name for the code point, it is indeterminate which one will be returned. As mentioned, the function returns C<undef> if no name is known for the code -point. In Unicode the proper name of these is the empty string, which +point. In Unicode the proper name for these is the empty string, which C<undef> stringifies to. (If you ask for a code point past the legal Unicode maximum of U+10FFFF that you haven't assigned an alias to, you get C<undef> plus a warning.) @@ -474,10 +487,6 @@ the form C<U+...>, it returns a chr instead. In this case, if C<use bytes> is in effect and the character won't fit into a byte, it returns C<undef> and raises a warning. -Names must be ASCII characters only, which means that you are out of luck if -you want to create aliases in a language where some or all the characters of -the desired aliases are non-ASCII. - Since evaluation of the translation function (see L</CUSTOM TRANSLATORS>) happens in the middle of compilation (of a string literal), the translation function should not do any C<eval>s or diff --git a/gnu/usr.bin/perl/lib/charnames.t b/gnu/usr.bin/perl/lib/charnames.t index 8aba9be9e0c..c58ccbe073e 100644 --- a/gnu/usr.bin/perl/lib/charnames.t +++ b/gnu/usr.bin/perl/lib/charnames.t @@ -62,12 +62,13 @@ EOE { - use charnames ':full', ":alias" => { mychar1 => "0xE8000", - mychar2 => 983040, # U+F0000 - mychar3 => "U+100000", - myctrl => 0x80, - mylarge => "U+111000", - }; + use charnames ":alias" => { mychar1 => "0xE8000", + mychar2 => 983040, # U+F0000 + mychar3 => "U+100000", + myctrl => 0x80, + mylarge => "U+111000", + }; + is ("\N{PILE OF POO}", chr(0x1F4A9), "Verify :alias alone implies :full"); is ("\N{mychar1}", chr(0xE8000), "Verify that can define hex alias"); is (charnames::viacode(0xE8000), "mychar1", "And that can get the alias back"); is ("\N{mychar2}", chr(0xF0000), "Verify that can define decimal alias"); @@ -295,11 +296,11 @@ is("\N{BOM}", chr(0xFEFF), 'Verify "\N{BOM}" is correct'); my $ok = ! grep { /"HORIZONTAL TABULATION" is deprecated.*"CHARACTER TABULATION"/ } @WARN; ok($ok, '... and doesnt give deprecated warning'); - # XXX These tests should be changed for 5.16, when we convert BELL to the - # Unicode version. - is("\N{BELL}", "\a", 'Verify "\N{BELL}" eq "\a"'); - my $ok = grep { /"BELL" is deprecated.*"ALERT"/ } @WARN; - ok($ok, '... and that gives correct deprecated warning'); + if ($^V lt v5.17.0) { + is("\N{BELL}", "\a", 'Verify "\N{BELL}" eq "\a"'); + my $ok = grep { /"BELL" is deprecated.*"ALERT"/ } @WARN; + ok($ok, '... and that gives correct deprecated warning'); + } no warnings 'deprecated'; @@ -365,7 +366,7 @@ is(charnames::viacode("U+00000000000FEED"), "ARABIC LETTER WAW ISOLATED FORM", ' is("\N{EOT}", "\N{END OF TRANSMISSION}", 'Verify "\N{EOT}" eq "\N{END OF TRANSMISSION}"'); is("\N{ENQ}", "\N{ENQUIRY}", 'Verify "\N{ENQ}" eq "\N{ENQUIRY}"'); is("\N{ACK}", "\N{ACKNOWLEDGE}", 'Verify "\N{ACK}" eq "\N{ACKNOWLEDGE}"'); - is("\N{BEL}", "\N{BELL}", 'Verify "\N{BEL}" eq "\N{BELL}"'); + is("\N{BEL}", "\N{BELL}", 'Verify "\N{BEL}" eq "\N{BELL}"') if $^V lt v5.17.0; is("\N{BS}", "\N{BACKSPACE}", 'Verify "\N{BS}" eq "\N{BACKSPACE}"'); is("\N{HT}", "\N{HORIZONTAL TABULATION}", 'Verify "\N{HT}" eq "\N{HORIZONTAL TABULATION}"'); is("\N{LF}", "\N{LINE FEED (LF)}", 'Verify "\N{LF}" eq "\N{LINE FEED (LF)}"'); @@ -776,7 +777,7 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V $res .= '-' . ($^H{73174} // ""); $res .= '-2' if ":" =~ /\N{COLON}/; $res .= '-3' if ":" =~ /\N{COLON}/i; - is($res, "foo-foo-1--2-3", "Verify %^H doesn't get reset by \N{...}"); + is($res, "foo-foo-1--2-3", "Verify %^H doesn't get reset by \\N{...}"); } { use charnames qw(.*); @@ -836,7 +837,8 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V is("\N{mychar1}", "f", "Inner block: verify that \\N{mychar1} is redefined"); is(charnames::vianame("mychar1"), ord("f"), "Inner block: verify that vianame(mychar1) is redefined"); is(charnames::string_vianame("mychar1"), "f", "Inner block: verify that string_vianame(mychar1) is redefined"); - is("\N{mychar2}", "\x{FFFD}", "Inner block: verify that \\N{mychar2} outer definition didn't leak"); + eval '"\N{mychar2}"'; + like($@, qr/Unknown charname 'mychar2'/, "Inner block: verify that \\N{mychar2} outer definition didn't leak"); ok( ! defined charnames::vianame("mychar2"), "Inner block: verify that vianame(mychar2) outer definition didn't leak"); ok( ! defined charnames::string_vianame("mychar2"), "Inner block: verify that string_vianame(mychar2) outer definition didn't leak"); is("\N{myprivate1}", "\x{E8001}", "Inner block: verify that \\N{myprivate1} is redefined "); @@ -844,38 +846,46 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V is(charnames::string_vianame("myprivate1"), chr(0xE8001), "Inner block: verify that string_vianame(myprivate1) is redefined"); is(charnames::viacode(0xE8001), "myprivate1", "Inner block: verify that myprivate1 viacode is redefined"); ok(! defined charnames::viacode(0xE8000), "Inner block: verify that outer myprivate1 viacode didn't leak"); - is("\N{myprivate2}", "\x{FFFD}", "Inner block: verify that \\N{myprivate2} outer definition didn't leak"); + eval '"\N{myprivate2}"'; + like($@, qr/Unknown charname 'myprivate2'/, "Inner block: verify that \\N{myprivate2} outer definition didn't leak"); ok(! defined charnames::vianame("myprivate2"), "Inner block: verify that vianame(myprivate2) outer definition didn't leak"); ok(! defined charnames::string_vianame("myprivate2"), "Inner block: verify that string_vianame(myprivate2) outer definition didn't leak"); ok(! defined charnames::viacode(0x100000), "Inner block: verify that myprivate2 viacode outer definition didn't leak"); is("\N{BE}", $hiragana_be, "Inner block: verify that \\N uses the correct script"); cmp_ok(charnames::vianame("BE"), "==", ord($hiragana_be), "Inner block: verify that vianame uses the correct script"); cmp_ok(charnames::string_vianame("BE"), "==", $hiragana_be, "Inner block: verify that string_vianame uses the correct script"); - is("\N{Hiragana: BE}", "\x{FFFD}", "Inner block without :short: \\N with short doesn't work"); + eval '"\N{Hiragana: BE}"'; + like($@, qr/Unknown charname 'Hiragana: BE'/, "Inner block without :short: \\N with short doesn't work"); ok(! defined charnames::vianame("Hiragana: BE"), "Inner block without :short: verify that vianame with short doesn't work"); ok(! defined charnames::string_vianame("Hiragana: BE"), "Inner block without :short: verify that string_vianame with short doesn't work"); { # An inner block where only :short definitions are valid. use charnames ":short"; - is("\N{mychar1}", "\x{FFFD}", "Inner inner block: verify that mychar1 outer definition didn't leak with \\N"); + eval '"\N{mychar1}"'; + like($@, qr/Unknown charname 'mychar1'/, "Inner inner block: verify that mychar1 outer definition didn't leak with \\N"); ok( ! defined charnames::vianame("mychar1"), "Inner inner block: verify that mychar1 outer definition didn't leak with vianame"); ok( ! defined charnames::string_vianame("mychar1"), "Inner inner block: verify that mychar1 outer definition didn't leak with string_vianame"); - is("\N{mychar2}", "\x{FFFD}", "Inner inner block: verify that mychar2 outer definition didn't leak with \\N"); + eval '"\N{mychar2}"'; + like($@, qr/Unknown charname 'mychar2'/, "Inner inner block: verify that mychar2 outer definition didn't leak with \\N"); ok( ! defined charnames::vianame("mychar2"), "Inner inner block: verify that mychar2 outer definition didn't leak with vianame"); ok( ! defined charnames::string_vianame("mychar2"), "Inner inner block: verify that mychar2 outer definition didn't leak with string_vianame"); - is("\N{myprivate1}", "\x{FFFD}", "Inner inner block: verify that myprivate1 outer definition didn't leak with \\N"); + eval '"\N{myprivate1}"'; + like($@, qr/Unknown charname 'myprivate1'/, "Inner inner block: verify that myprivate1 outer definition didn't leak with \\N"); ok(! defined charnames::vianame("myprivate1"), "Inner inner block: verify that myprivate1 outer definition didn't leak with vianame"); ok(! defined charnames::string_vianame("myprivate1"), "Inner inner block: verify that myprivate1 outer definition didn't leak with string_vianame"); - is("\N{myprivate2}", "\x{FFFD}", "Inner inner block: verify that myprivate2 outer definition didn't leak with \\N"); + eval '"\N{myprivate2}"'; + like($@, qr/Unknown charname 'myprivate2'/, "Inner inner block: verify that myprivate2 outer definition didn't leak with \\N"); ok(! defined charnames::vianame("myprivate2"), "Inner inner block: verify that myprivate2 outer definition didn't leak with vianame"); ok(! defined charnames::string_vianame("myprivate2"), "Inner inner block: verify that myprivate2 outer definition didn't leak with string_vianame"); ok(! defined charnames::viacode(0xE8000), "Inner inner block: verify that mychar1 outer outer definition didn't leak with viacode"); ok(! defined charnames::viacode(0xE8001), "Inner inner block: verify that mychar1 outer definition didn't leak with viacode"); ok(! defined charnames::viacode(0x100000), "Inner inner block: verify that mychar2 outer definition didn't leak with viacode"); - is("\N{BE}", "\x{FFFD}", "Inner inner block without script: verify that outer :script didn't leak with \\N"); + eval '"\N{BE}"'; + like($@, qr/Unknown charname 'BE'/, "Inner inner block without script: verify that outer :script didn't leak with \\N"); ok(! defined charnames::vianame("BE"), "Inner inner block without script: verify that outer :script didn't leak with vianames"); ok(! defined charnames::string_vianame("BE"), "Inner inner block without script: verify that outer :script didn't leak with string_vianames"); - is("\N{HIRAGANA LETTER BE}", "\x{FFFD}", "Inner inner block without :full: verify that outer :full didn't leak with \\N"); + eval '"\N{HIRAGANA LETTER BE}"'; + like($@, qr/Unknown charname 'HIRAGANA LETTER BE'/, "Inner inner block without :full: verify that outer :full didn't leak with \\N"); is("\N{Hiragana: BE}", $hiragana_be, "Inner inner block with :short: verify that \\N works with :short"); cmp_ok(charnames::vianame("Hiragana: BE"), "==", ord($hiragana_be), "Inner inner block with :short: verify that vianame works with :short"); cmp_ok(charnames::string_vianame("Hiragana: BE"), "==", $hiragana_be, "Inner inner block with :short: verify that string_vianame works with :short"); @@ -885,7 +895,8 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V is("\N{mychar1}", "f", "Inner block: verify that \\N{mychar1} is redefined"); is(charnames::vianame("mychar1"), ord("f"), "Inner block: verify that vianame(mychar1) is redefined"); is(charnames::string_vianame("mychar1"), "f", "Inner block: verify that string_vianame(mychar1) is redefined"); - is("\N{mychar2}", "\x{FFFD}", "Inner block: verify that \\N{mychar2} outer definition didn't leak"); + eval '"\N{mychar2}"'; + like($@, qr/Unknown charname 'mychar2'/, "Inner block: verify that \\N{mychar2} outer definition didn't leak"); ok( ! defined charnames::vianame("mychar2"), "Inner block: verify that vianame(mychar2) outer definition didn't leak"); ok( ! defined charnames::string_vianame("mychar2"), "Inner block: verify that string_vianame(mychar2) outer definition didn't leak"); is("\N{myprivate1}", "\x{E8001}", "Inner block: verify that \\N{myprivate1} is redefined "); @@ -893,14 +904,16 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V is(charnames::string_vianame("myprivate1"), chr(0xE8001), "Inner block: verify that string_vianame(myprivate1) is redefined"); is(charnames::viacode(0xE8001), "myprivate1", "Inner block: verify that myprivate1 viacode is redefined"); ok(! defined charnames::viacode(0xE8000), "Inner block: verify that outer myprivate1 viacode didn't leak"); - is("\N{myprivate2}", "\x{FFFD}", "Inner block: verify that \\N{myprivate2} outer definition didn't leak"); + eval '"\N{myprivate2}"'; + like($@, qr/Unknown charname 'myprivate2'/, "Inner block: verify that \\N{myprivate2} outer definition didn't leak"); ok(! defined charnames::vianame("myprivate2"), "Inner block: verify that vianame(myprivate2) outer definition didn't leak"); ok(! defined charnames::string_vianame("myprivate2"), "Inner block: verify that string_vianame(myprivate2) outer definition didn't leak"); ok(! defined charnames::viacode(0x100000), "Inner block: verify that myprivate2 viacode outer definition didn't leak"); is("\N{BE}", $hiragana_be, "Inner block: verify that \\N uses the correct script"); cmp_ok(charnames::vianame("BE"), "==", ord($hiragana_be), "Inner block: verify that vianame uses the correct script"); cmp_ok(charnames::string_vianame("BE"), "==", $hiragana_be, "Inner block: verify that string_vianame uses the correct script"); - is("\N{Hiragana: BE}", "\x{FFFD}", "Inner block without :short: \\N with short doesn't work"); + eval '"\N{Hiragana: BE}"'; + like($@, qr/Unknown charname 'Hiragana: BE'/, "Inner block without :short: \\N with short doesn't work"); ok(! defined charnames::vianame("Hiragana: BE"), "Inner block without :short: verify that vianame with short doesn't work"); ok(! defined charnames::string_vianame("Hiragana: BE"), "Inner block without :short: verify that string_vianame with short doesn't work"); } @@ -933,7 +946,8 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V is(charnames::string_vianame("O-i"), chr(0x10426), "Verify that loose script list matching works with string_vianame"); is(charnames::vianame("o i"), 0x1044E, "Verify that loose script list matching works with vianame"); } - is ("\N{latincapitallettera}", "\x{FFFD}", "Verify that loose matching caching doesn't leak outside of scope"); + eval '"\N{latincapitallettera}"'; + like($@, qr/Unknown charname 'latincapitallettera'/, "Verify that loose matching caching doesn't leak outside of scope"); { use charnames qw(:loose :short); cmp_ok("\N{co pt-ic:she-i}", "==", chr(0x3E3), "Verify that loose :short matching works"); @@ -1013,14 +1027,14 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V my $decimal = hex $code; # The Unicode version 1 name is used instead of any that are - # marked <control> + # marked <control>. $name = $u1name if $name eq "<control>"; - $name = 'ALERT' if $decimal == 7; + # In earlier Perls, we reject this code point's name (BELL) + $name = "" if $^V lt v5.17.0 && $decimal == 0x1F514; - # XXX This test should be changed for 5.16 when we convert to use - # Unicode's BELL - $name = "" if $decimal == 0x1F514; + # ALERT overrides BELL + $name = 'ALERT' if $decimal == 7; # Some don't have names, leave those array elements undefined next unless $name; @@ -1053,23 +1067,67 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V } close $fh; - # The Hangul syllable names aren't in the file above; their names - # are algorithmically determinable, but to avoid perpetuating any - # programming errors, this file contains the complete list, gathered - # from the web. - while (<DATA>) { - chomp; - next unless $_; # Guard against empty lines getting inserted. - my ($code, $name) = split ";"; - my $decimal = hex $code; - $names[$decimal] = $name; - my $block = $decimal >> $block_size_bits; - $algorithmic_names_count[$block] = 1; + use Unicode::UCD; + if (pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) gt v1.1.5) { + # The Hangul syllable names aren't in the file above; their names + # are algorithmically determinable, but to avoid perpetuating any + # programming errors, this file contains the complete list, gathered + # from the web. + while (<DATA>) { + chomp; + next unless $_; # Guard against empty lines getting inserted. + my ($code, $name) = split ";"; + my $decimal = hex $code; + $names[$decimal] = $name; + my $block = $decimal >> $block_size_bits; + $algorithmic_names_count[$block] = 1; + } } - open $fh, "<", "../../lib/unicore/NameAliases.txt" or - die "Can't open ../../lib/unicore/NameAliases.txt: $!"; - while (<$fh>) { + my @name_aliases; + use Unicode::UCD; + if (ord('A') != 65 + || pack( "C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v6.1.0) + { + open my $fh, "<", "../../lib/unicore/NameAliases.txt" + or die "Can't open ../../lib/unicore/NameAliases.txt: $!"; + @name_aliases = <$fh> + } + else { + + # If this Unicode version doesn't have the full .txt file, or are on + # an EBCDIC platform where they need to be translated, get the data + # from prop_invmap() (which should do the translation) and convert it + # to the file's format + use Unicode::UCD 'prop_invmap'; + my ($invlist_ref, $invmap_ref, undef, $default) + = prop_invmap('Name_Alias'); + for my $i (0 .. @$invlist_ref - 1) { + + # Convert the aliases for code points that have just one alias to + # single element arrays for uniform handling below. + if (! ref $invmap_ref->[$i]) { + + # But we test only the real aliases, not the ones which are + # just really placeholders. + next if $invmap_ref->[$i] eq $default; + + $invmap_ref->[$i] = [ $invmap_ref->[$i] ]; + } + + + # Change each alias for the code point to the form that the file + # has + foreach my $j ($invlist_ref->[$i] .. $invlist_ref->[$i+1] - 1) { + foreach my $value (@{$invmap_ref->[$i]}) { + $value =~ s/: /;/; + push @name_aliases, sprintf("%04X;%s\n", $j, $value); + } + } + } + } + + for (@name_aliases) { chomp; s/^\s*#.*//; next unless $_; @@ -1185,21 +1243,26 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V $block = $end_block + 1; } - open $fh, "<", "../../lib/unicore/NamedSequences.txt" or - die "Can't open ../../lib/unicore/NamedSequences.txt: $!"; - while (<$fh>) { - chomp; - s/^\s*#.*//; - next unless $_; - my ($name, $codes) = split ";"; - my $utf8 = pack("U*", map { hex } split " ", $codes); - is(charnames::string_vianame($name), $utf8, "Verify string_vianame(\"$name\") is the proper utf8"); - my $loose_name = get_loose_name($name); - use charnames ":loose"; - is(charnames::string_vianame($loose_name), $utf8, "Verify string_vianame(\"$loose_name\") is the proper utf8"); - #diag("$name, $utf8"); + if (open my $fh, "<", "../../lib/unicore/NamedSequences.txt") { + while (<$fh>) { + chomp; + s/^\s*#.*//; + next unless $_; + my ($name, $codes) = split ";"; + my $utf8 = pack("U*", map { hex } split " ", $codes); + is(charnames::string_vianame($name), $utf8, "Verify string_vianame(\"$name\") is the proper utf8"); + my $loose_name = get_loose_name($name); + use charnames ":loose"; + is(charnames::string_vianame($loose_name), $utf8, "Verify string_vianame(\"$loose_name\") is the proper utf8"); + #diag("$name, $utf8"); + } + close $fh; + } + else { + use Unicode::UCD; + die "Can't open ../../lib/unicore/NamedSequences.txt: $!" + if pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v4.1.0; } - close $fh; unless ($all_pass) { diff --git a/gnu/usr.bin/perl/lib/dbm_filter_util.pl b/gnu/usr.bin/perl/lib/dbm_filter_util.pl index 2f8af209e22..0105a5c30ac 100644 --- a/gnu/usr.bin/perl/lib/dbm_filter_util.pl +++ b/gnu/usr.bin/perl/lib/dbm_filter_util.pl @@ -1,5 +1,8 @@ use strict; use warnings; +use Data::Dumper; + +*qquote= *Data::Dumper::qquote; sub StoreData { @@ -36,11 +39,11 @@ sub VerifyData while (my ($k, $v) = each %$hashref) { no warnings 'uninitialized'; if ($expected{$k} eq $v) { - #diag "Match [$k][$v]"; + #diag "Match " . qquote($k) . " => " . qquote($v); delete $expected{$k} ; } else { - #diag "No Match [$k][$v]"; + #diag "No Match " . qquote($k) . " => " . qquote($v) . " want " . qquote($expected{$k}); $bad{$k} = $v; } } @@ -50,17 +53,17 @@ sub VerifyData if (keys %expected ) { $bad .=" No Match from Expected:\n" ; while (my ($k, $v) = each %expected) { - $bad .= "\t'$k' =>\t'$v'\n"; + $bad .= "\t" . qquote($k) . " => " . qquote($v) . "\n"; } } if (keys %bad ) { $bad .= "\n No Match from Actual:\n" ; while (my ($k, $v) = each %bad) { no warnings 'uninitialized'; - $bad .= "\t'$k' =>\t'$v'\n"; + $bad .= "\t" . qquote($k) . " => " . qquote($v) . "\n"; } } - diag "${bad}\n" ; + diag( "${bad}\n" ); } } diff --git a/gnu/usr.bin/perl/lib/diagnostics.t b/gnu/usr.bin/perl/lib/diagnostics.t index 60e01e1b81a..6eecdda0ad2 100644 --- a/gnu/usr.bin/perl/lib/diagnostics.t +++ b/gnu/usr.bin/perl/lib/diagnostics.t @@ -4,7 +4,7 @@ BEGIN { chdir '..' if -d '../pod' && -d '../t'; @INC = 'lib'; require './t/test.pl'; - plan(18); + plan(24); } BEGIN { @@ -20,7 +20,8 @@ eval { 'base'->import(qw(I::do::not::exist)); }; -like( $@, qr/^Base class package "I::do::not::exist" is empty/); +like( $@, qr/^Base class package "I::do::not::exist" is empty/, + 'diagnostics not tripped up by "use base qw(Dont::Exist)"'); open *whatever, ">", \my $warning or die "Couldn't redirect STDERR to var: $!"; @@ -48,7 +49,7 @@ like $warning, qr/using lex_stuff_pvn or similar/, 'L<foo|bar/baz>'; seek STDERR, 0,0; $warning = ''; warn 'Code point 0xBEE5 is not Unicode, may not be portable'; -like $warning, qr/W utf8/, +like $warning, qr/S utf8/, 'Message sharing its description with the following message'; # Periods at end of entries in perldiag.pod get matched correctly @@ -69,6 +70,12 @@ $warning = ''; warn "Unicode surrogate U+C0FFEE is illegal in UTF-8"; like $warning, qr/You had a UTF-16 surrogate/, '%X'; +# Test for %p +seek STDERR, 0,0; +$warning = ''; +warn "Slab leaked from cv fadedc0ffee"; +like $warning, qr/bookkeeping of op trees/, '%p'; + # Strip S<> seek STDERR, 0,0; $warning = ''; @@ -89,6 +96,39 @@ like $warning, qr/You tried to load a file.*Perl could not compile/s, 'multiline errors'; +# Multiline entry in perldiag.pod +seek STDERR, 0,0; +$warning = ''; +warn "Using just the first character returned by \\N{} in character class in regex; marked by <-- HERE in m/%s/"; +like $warning, + qr/A charnames handler may return a sequence/s, + 'multi-line entries in perldiag.pod match'; + +# ; at end of entry in perldiag.pod +seek STDERR, 0,0; +$warning = ''; +warn "Perl folding rules are not up-to-date for 0xA; please use the perlbug utility to report; in regex; marked by <-- HERE in m/\ <-- HERE q/"; +like $warning, + qr/regular expression folding rules/s, + '; works at the end of entries in perldiag.pod'; + +# Differences in spaces in warnings (Why not be nice and accept them?) +seek STDERR, 0,0; +$warning = ''; +warn "Assignment to both a list and a scalar\n"; +like $warning, + qr/2nd and 3rd/s, + 'spaces in warnings are matched lightly'; + +# Differences in spaces in warnings with a period at the end +seek STDERR, 0,0; +$warning = ''; +warn "perl: warning: Setting locale failed.\n"; +like $warning, + qr/The whole warning/s, + 'spaces in warnings with periods at the end are matched lightly'; + + *STDERR = $old_stderr; # These tests use a panic under the hope that the description is not likely @@ -145,3 +185,13 @@ like runperl( main::bar\(\) called at -e line \d+ main::foo\(\) called at -e line \d+ /, 'backtrace from multiline error'; +is runperl(@runperl_args, prog => 'BEGIN { die q _panic: gremlins_ }'), + << 'EOX', 'BEGIN{die} does not suppress diagnostics'; +panic: gremlins at -e line 1. +BEGIN failed--compilation aborted at -e line 1 (#1) + (P) An internal error. + +Uncaught exception from user code: + panic: gremlins at -e line 1. + BEGIN failed--compilation aborted at -e line 1. +EOX diff --git a/gnu/usr.bin/perl/lib/dumpvar.t b/gnu/usr.bin/perl/lib/dumpvar.t index f4f55d9744f..3e48b1728fb 100644 --- a/gnu/usr.bin/perl/lib/dumpvar.t +++ b/gnu/usr.bin/perl/lib/dumpvar.t @@ -54,6 +54,11 @@ package Tyre; sub TIESCALAR{bless[]} # other methods intentionally omitted +package Kerb; + +sub TIEHASH{bless{}} +# other methods intentionally omitted + package main; my $foo = Foo->new(1..5); @@ -331,3 +336,7 @@ EXPECT local *_; tie $_, 'Tyre'; unctrl('abc'); EXPECT abc +######## +tie my %h, 'Kerb'; my $v = { a => 1, b => \%h, c => 2 }; dumpvalue($v); +EXPECT +/'a' => 1\n.+Can't locate object method.+'c' => 2/s diff --git a/gnu/usr.bin/perl/lib/feature.pm b/gnu/usr.bin/perl/lib/feature.pm index 87b42aa916b..d45174c907d 100644 --- a/gnu/usr.bin/perl/lib/feature.pm +++ b/gnu/usr.bin/perl/lib/feature.pm @@ -5,7 +5,7 @@ package feature; -our $VERSION = '1.27'; +our $VERSION = '1.32'; our %feature = ( fc => 'feature_fc', @@ -15,6 +15,7 @@ our %feature = ( evalbytes => 'feature_evalbytes', array_base => 'feature_arybase', current_sub => 'feature___SUB__', + lexical_subs => 'feature_lexsubs', unicode_eval => 'feature_unieval', unicode_strings => 'feature_unicode', ); @@ -23,7 +24,7 @@ our %feature_bundle = ( "5.10" => [qw(array_base say state switch)], "5.11" => [qw(array_base say state switch unicode_strings)], "5.15" => [qw(current_sub evalbytes fc say state switch unicode_eval unicode_strings)], - "all" => [qw(array_base current_sub evalbytes fc say state switch unicode_eval unicode_strings)], + "all" => [qw(array_base current_sub evalbytes fc lexical_subs say state switch unicode_eval unicode_strings)], "default" => [qw(array_base)], ); @@ -31,6 +32,8 @@ $feature_bundle{"5.12"} = $feature_bundle{"5.11"}; $feature_bundle{"5.13"} = $feature_bundle{"5.11"}; $feature_bundle{"5.14"} = $feature_bundle{"5.11"}; $feature_bundle{"5.16"} = $feature_bundle{"5.15"}; +$feature_bundle{"5.17"} = $feature_bundle{"5.15"}; +$feature_bundle{"5.18"} = $feature_bundle{"5.15"}; $feature_bundle{"5.9.5"} = $feature_bundle{"5.10"}; our $hint_shift = 26; @@ -77,7 +80,7 @@ pragma.) =head2 Lexical effect Like other pragmas (C<use strict>, for example), features have a lexical -effect. C<use feature qw(foo)> will only make the feature "foo" available +effect. C<use feature qw(foo)> will only make the feature "foo" available from that point to the end of the enclosing block. { @@ -137,7 +140,8 @@ C<use feature 'unicode_strings'> tells the compiler to use Unicode semantics in all string operations executed within its scope (unless they are also within the scope of either C<use locale> or C<use bytes>). The same applies to all regular expressions compiled within the scope, even if executed outside -it. +it. It does not change the internal representation of strings, but only how +they are interpreted. C<no feature 'unicode_strings'> tells the compiler to use the traditional Perl semantics wherein the native character set semantics is used unless it is @@ -223,6 +227,20 @@ See L<perlfunc/fc> for details. This feature is available from Perl 5.16 onwards. +=head2 The 'lexical_subs' feature + +B<WARNING>: This feature is still experimental and the implementation may +change in future versions of Perl. For this reason, Perl will +warn when you use the feature, unless you have explicitly disabled the +warning: + + no warnings "experimental::lexical_subs"; + +This enables declaration of subroutines via C<my sub foo>, C<state sub foo> +and C<our sub foo> syntax. See L<perlsub/Lexical Subroutines> for details. + +This feature is available from Perl 5.18 onwards. + =head1 FEATURE BUNDLES It's possible to load multiple features together, using @@ -246,6 +264,9 @@ The following feature bundles are available: :5.16 say state switch unicode_strings unicode_eval evalbytes current_sub fc + :5.18 say state switch unicode_strings + unicode_eval evalbytes current_sub fc + The C<:default> bundle represents the feature set that is enabled before any C<use feature> or C<no feature> declaration. diff --git a/gnu/usr.bin/perl/lib/feature/unicode_strings.t b/gnu/usr.bin/perl/lib/feature/unicode_strings.t index 7e557b2bc78..8bd536f258d 100755 --- a/gnu/usr.bin/perl/lib/feature/unicode_strings.t +++ b/gnu/usr.bin/perl/lib/feature/unicode_strings.t @@ -172,6 +172,7 @@ for my $i ( 0x30 .. 0x39, # 0-9 my @s = (0) x 256; $s[ord_latin1_to_native 0x09] = 1; # Tab $s[ord_latin1_to_native 0x0A] = 1; # LF +$s[ord_latin1_to_native 0x0B] = 1; # VT $s[ord_latin1_to_native 0x0C] = 1; # FF $s[ord_latin1_to_native 0x0D] = 1; # CR $s[ord_latin1_to_native 0x20] = 1; # SPACE diff --git a/gnu/usr.bin/perl/lib/filetest.pm b/gnu/usr.bin/perl/lib/filetest.pm index b08f1dc9107..a4049832e67 100644 --- a/gnu/usr.bin/perl/lib/filetest.pm +++ b/gnu/usr.bin/perl/lib/filetest.pm @@ -1,6 +1,6 @@ package filetest; -our $VERSION = '1.02'; +our $VERSION = '1.03'; =head1 NAME @@ -71,7 +71,7 @@ of the operators is a filename, not when it is a filehandle. Because access() does not invoke stat() (at least not in a way visible to Perl), B<the stat result cache "_" is not set>. This means that the outcome of the following two tests is different. The first has the stat -bits of C</etc/passwd> in C<_>, and in the second case this still +bits of F</etc/passwd> in C<_>, and in the second case this still contains the bits of C</etc>. { -d '/etc'; diff --git a/gnu/usr.bin/perl/lib/filetest.t b/gnu/usr.bin/perl/lib/filetest.t index 98a19bdf5fd..50886117313 100644 --- a/gnu/usr.bin/perl/lib/filetest.t +++ b/gnu/usr.bin/perl/lib/filetest.t @@ -57,6 +57,7 @@ SKIP: { my $chflags = "/usr/bin/chflags"; my $tstfile = "filetest.tst"; skip("No $chflags available", 4) if !-x $chflags; + skip("Dragonfly filetests seem non-chflags aware", 4) if $^O eq 'dragonfly'; my $skip_eff_user_tests = (!$Config{d_setreuid} && !$Config{d_setresuid}) || diff --git a/gnu/usr.bin/perl/lib/h2xs.t b/gnu/usr.bin/perl/lib/h2xs.t index 6ce37ee3570..d10ce75c1ca 100644 --- a/gnu/usr.bin/perl/lib/h2xs.t +++ b/gnu/usr.bin/perl/lib/h2xs.t @@ -159,8 +159,10 @@ for (my $i = $#tests; $i > 0; $i-=3) { # 1 test for running it, 1 test for the expected result, and 1 for each file # plus 1 to open and 1 to check for the use in lib/$name.pm and Makefile.PL # And 1 more for our check for "bonus" files, 2 more for ExtUtil::Manifest. + # And 1 more to examine const-c.inc contents in tests that use $header. # use the () to force list context and hence count the number of matches. $total_tests += 9 + (() = $tests[$i] =~ /(Writing)/sg); + $total_tests++ if $tests[$i-2] =~ / \Q$header\E$/; } plan tests => $total_tests; @@ -169,6 +171,8 @@ ok (open (HEADER, ">$header"), "open '$header'"); print HEADER <<HEADER or die $!; #define Camel 2 #define Dromedary 1 +#define Bactrian /* empty */ +#define Bactrian2 HEADER ok (close (HEADER), "close '$header'"); @@ -215,6 +219,23 @@ while (my ($args, $version, $expectation) = splice @tests, 0, 3) { pop @INC; chdir ($up) or die "chdir $up failed: $!"; + if ($args =~ / \Q$header\E$/) { + my $const_c = File::Spec->catfile($name, 'fallback', 'const-c.inc'); + my ($found, $diag); + if (!open FILE, '<', $const_c) { + $diag = "can't open $const_c: $!"; + } + else { + while (<FILE>) { + next unless /\b Bactrian 2? \b/x; + $found = 1; + last; + } + } + ok (!$found, "generated $const_c has no Bactrian(2)"); + diag ($diag) if defined $diag; + } + foreach my $leaf (File::Spec->catfile('lib', "$name.pm"), 'Makefile.PL') { my $file = File::Spec->catfile($name, $leaf); if (ok (open (FILE, $file), "open $file")) { diff --git a/gnu/usr.bin/perl/lib/overload.t b/gnu/usr.bin/perl/lib/overload.t index c0478eef7f9..74adae340e5 100644 --- a/gnu/usr.bin/perl/lib/overload.t +++ b/gnu/usr.bin/perl/lib/overload.t @@ -48,7 +48,7 @@ package main; $| = 1; BEGIN { require './test.pl' } -plan tests => 5041; +plan tests => 5191; use Scalar::Util qw(tainted); @@ -131,7 +131,7 @@ $b++; is(ref $b, "Oscalar"); is($a, "087"); -is($b, "88"); +is($b, "89"); is(ref $a, "Oscalar"); package Oscalar; @@ -142,7 +142,7 @@ $b++; is(ref $b, "Oscalar"); is($a, "087"); -is($b, "90"); +is($b, "91"); is(ref $a, "Oscalar"); $b=$a; @@ -267,11 +267,12 @@ is("$aI", "xx"); is($aI, "xx"); is("b${aI}c", "_._.b.__.xx._.__.c._"); -# Here we test blessing to a package updates hash +# Here we test that both "no overload" and +# blessing to a package update hash eval "package Oscalar; no overload '.'"; -is("b${a}", "_.b.__.xx._"); +is("b${a}", "bxx"); $x="1"; bless \$x, Oscalar; is("b${a}c", "bxxc"); @@ -291,8 +292,8 @@ like($@, qr/no method found/); eval "package Oscalar; sub comple; use overload '~' => 'comple'"; -$na = eval { ~$a }; # Hash was not updated -like($@, qr/no method found/); +$na = eval { ~$a }; +is($@, ''); bless \$x, Oscalar; @@ -303,8 +304,8 @@ is($na, '_!_xx_!_'); $na = 0; -$na = eval { ~$aI }; # Hash was not updated -like($@, qr/no method found/); +$na = eval { ~$aI }; +like($@, ''); bless \$x, OscalarI; @@ -316,8 +317,8 @@ is($na, '_!_xx_!_'); eval "package Oscalar; sub rshft; use overload '>>' => 'rshft'"; -$na = eval { $aI >> 1 }; # Hash was not updated -like($@, qr/no method found/); +$na = eval { $aI >> 1 }; +is($@, ''); bless \$x, OscalarI; @@ -961,11 +962,16 @@ unless ($aaa) { my $a = "" ; local $SIG{__WARN__} = sub {$a = $_[0]} ; $x = eval ' use overload "~|_|~" => sub{} ' ; + eval ' no overload "~|_|~" ' ; is($a, ""); use warnings 'overload' ; $x = eval ' use overload "~|_|~" => sub{} ' ; like($a, qr/^overload arg '~\|_\|~' is invalid at \(eval \d+\) line /, 'invalid arg warning'); + undef $a; + eval ' no overload "~|_|~" ' ; + like($a, qr/^overload arg '~\|_\|~' is invalid at \(eval \d+\) line /, + 'invalid arg warning'); } { @@ -1113,18 +1119,6 @@ like ($@, qr/zap/); } { - package Numify; - use overload (qw(0+ numify fallback 1)); - - sub new { - my $val = $_[1]; - bless \$val, $_[0]; - } - - sub numify { ${$_[0]} } -} - -{ package perl31793; use overload cmp => sub { 0 }; package perl31793_fb; @@ -1145,8 +1139,20 @@ like ($@, qr/zap/); like(overload::StrVal($no), qr/^no_overload=ARRAY\(0x[0-9a-f]+\)$/); } -# These are all check that overloaded values rather than reference addresses -# are what is getting tested. +{ + package Numify; + use overload (qw(0+ numify fallback 1)); + + sub new { + my $val = $_[1]; + bless \$val, $_[0]; + } + + sub numify { ${$_[0]} } +} + +# These all check that overloaded values, rather than reference addresses, +# are what are getting tested. my ($two, $one, $un, $deux) = map {new Numify $_} 2, 1, 1, 2; my ($ein, $zwei) = (1, 2); @@ -1200,17 +1206,23 @@ foreach my $op (qw(<=> == != < <= > >=)) { # doesn't look like a regex ok("x" =~ $x, "qr-only matches"); ok("y" !~ $x, "qr-only doesn't match what it shouldn't"); + ok("x" =~ /^(??{$x})$/, "qr-only with ?? matches"); + ok("y" !~ /^(??{$x})$/, "qr-only with ?? doesn't match what it shouldn't"); ok("xx" =~ /x$x/, "qr-only matches with concat"); like("$x", qr/^QRonly=ARRAY/, "qr-only doesn't have string overload"); my $qr = bless qr/y/, "QRonly"; ok("x" =~ $qr, "qr with qr-overload uses overload"); ok("y" !~ $qr, "qr with qr-overload uses overload"); + ok("x" =~ /^(??{$qr})$/, "qr with qr-overload with ?? uses overload"); + ok("y" !~ /^(??{$qr})$/, "qr with qr-overload with ?? uses overload"); is("$qr", "".qr/y/, "qr with qr-overload stringify"); my $rx = $$qr; ok("y" =~ $rx, "bare rx with qr-overload doesn't overload match"); ok("x" !~ $rx, "bare rx with qr-overload doesn't overload match"); + ok("y" =~ /^(??{$rx})$/, "bare rx with qr-overload with ?? doesn't overload match"); + ok("x" !~ /^(??{$rx})$/, "bare rx with qr-overload with ?? doesn't overload match"); is("$rx", "".qr/y/, "bare rx with qr-overload stringify"); } { @@ -1851,6 +1863,9 @@ foreach my $op (qw(<=> == != < <= > >=)) { or die "open of \$iter_text gave ($!)\n"; $subs{'<>'} = '<$iter_fh>'; push @tests, [ $iter_fh, '<%s>', '(<>)', undef, [ 1, 1, 0 ], 1 ]; + push @tests, [ $iter_fh, + 'local *CORE::GLOBAL::glob = sub {}; eval q|<%s>|', + '(<>)', undef, [ 1, 1, 0 ], 1 ]; # eval should do tie, overload on its arg before checking taint */ push @tests, [ '1;', 'eval q(eval %s); $@ =~ /Insecure/', @@ -1858,6 +1873,7 @@ foreach my $op (qw(<=> == != < <= > >=)) { for my $sub (keys %subs) { + no warnings 'experimental::smartmatch'; my $term = $subs{$sub}; my $t = sprintf $term, '$_[0][0]'; my $e ="sub { \$funcs .= '($sub)'; my \$r; if (\$use_int) {" @@ -1899,6 +1915,7 @@ foreach my $op (qw(<=> == != < <= > >=)) { ? "-\$_[0][0]" : "$_[3](\$_[0][0])"; my $r; + no warnings 'experimental::smartmatch'; if ($use_int) { use integer; $r = eval $e; } @@ -1945,7 +1962,7 @@ foreach my $op (qw(<=> == != < <= > >=)) { $use_int = ($int ne ''); my $plain = $tainted_val; my $plain_term = $int . sprintf $sub_term, '$plain'; - my $exp = eval $plain_term; + my $exp = do {no warnings 'experimental::smartmatch'; eval $plain_term }; diag("eval of plain_term <$plain_term> gave <$@>") if $@; is(tainted($exp), $exp_taint, "<$plain_term> taint of expected return"); @@ -1973,7 +1990,7 @@ foreach my $op (qw(<=> == != < <= > >=)) { my $res_term = $int . sprintf $sub_term, $var; my $desc = "<$res_term> $ov_pkg" ; - my $res = eval $res_term; + my $res = do { no warnings 'experimental::smartmatch'; eval $res_term }; diag("eval of res_term $desc gave <$@>") if $@; # uniquely, the inc/dec ops return the original # ref rather than a copy, so stringify it to @@ -2185,7 +2202,7 @@ fresh_perl_is { package Justus; use overload '+' => 'justice'; - eval {bless[]}; + eval {"".bless[]}; ::like $@, qr/^Can't resolve method "justice" overloading "\+" in p(?x: )ackage "Justus" at /, 'Error message when explicitly named overload method does not exist'; @@ -2194,19 +2211,49 @@ fresh_perl_is our @ISA = 'JustYou'; package JustYou { use overload '+' => 'injustice'; } "JustUs"->${\"(+"}; - eval {bless []}; + eval {"".bless []}; ::like $@, qr/^Stub found while resolving method "\?{3}" overloadin(?x: )g "\+" in package "JustUs" at /, 'Error message when sub stub is encountered'; } -{ # undefining the overload stash -- KEEP THIS TEST LAST - package ant; - use overload '+' => 'onion'; - $_ = \&overload::nil; - undef %overload::; - bless[]; - ::ok(1, 'no crash when undefining %overload::'); +{ + # check that the right number of stringifications + # and the correct un-utf8-ifying happen on regex compile + package utf8_match; + my $c; + use overload '""' => sub { $c++; $_[0][0] ? "^\x{100}\$" : "^A\$"; }; + my $o = bless [0], 'utf8_match'; + + $o->[0] = 0; + $c = 0; + ::ok("A" =~ "^A\$", "regex stringify utf8=0 ol=0 bytes=0"); + ::ok("A" =~ $o, "regex stringify utf8=0 ol=1 bytes=0"); + ::is($c, 1, "regex stringify utf8=0 ol=1 bytes=0 count"); + + $o->[0] = 1; + $c = 0; + ::ok("\x{100}" =~ "^\x{100}\$", + "regex stringify utf8=1 ol=0 bytes=0"); + ::ok("\x{100}" =~ $o, "regex stringify utf8=1 ol=1 bytes=0"); + ::is($c, 1, "regex stringify utf8=1 ol=1 bytes=0 count"); + + use bytes; + + $o->[0] = 0; + $c = 0; + ::ok("A" =~ "^A\$", "regex stringify utf8=0 ol=0 bytes=1"); + ::ok("A" =~ $o, "regex stringify utf8=0 ol=1 bytes=1"); + ::is($c, 1, "regex stringify utf8=0 ol=1 bytes=1 count"); + + $o->[0] = 1; + $c = 0; + ::ok("\xc4\x80" =~ "^\x{100}\$", + "regex stringify utf8=1 ol=0 bytes=1"); + ::ok("\xc4\x80" =~ $o, "regex stringify utf8=1 ol=1 bytes=1"); + ::is($c, 1, "regex stringify utf8=1 ol=1 bytes=1 count"); + + } # [perl #40333] @@ -2243,5 +2290,413 @@ ok !overload::Overloaded(new proxy new o), ok(overload::Overloaded($obj)); } +sub eleventative::cos { 'eleven' } +sub twelvetative::abs { 'twelve' } +sub thirteentative::abs { 'thirteen' } +sub fourteentative::abs { 'fourteen' } +@eleventative::ISA = twelvetative::; +{ + my $o = bless [], 'eleventative'; + eval 'package eleventative; use overload map +($_)x2, cos=>abs=>'; + is cos $o, 'eleven', 'overloading applies to object blessed before'; + bless [], 'eleventative'; + is cos $o, 'eleven', + 'ovrld applies to previously-blessed obj after other obj is blessed'; + $o = bless [], 'eleventative'; + *eleventative::cos = sub { 'ten' }; + is cos $o, 'ten', 'method changes affect overloading'; + @eleventative::ISA = thirteentative::; + is abs $o, 'thirteen', 'isa changes affect overloading'; + bless $o, 'fourteentative'; + @fourteentative::ISA = 'eleventative'; + is abs $o, 'fourteen', 'isa changes can turn overloading on'; +} + +# no overload "fallback"; +{ package phake; + use overload fallback => 1, '""' => sub { 'arakas' }; + no overload 'fallback'; +} +$a = bless [], 'phake'; +is "$a", "arakas", + 'no overload "fallback" does not stop overload from working'; +ok !eval { () = $a eq 'mpizeli'; 1 }, + 'no overload "fallback" resets fallback to undef on overloaded class'; +{ package ent; use overload fallback => 0, abs => sub{}; + our@ISA = 'huorn'; + package huorn; + use overload fallback => 1; + package ent; + no overload "fallback"; # disable previous declaration +} +$a = bless [], ent::; +is eval {"$a"}, overload::StrVal($a), + 'no overload undoes fallback declaration completetly' + or diag $@; + +# inherited fallback +{ + package pervyy; + our @ISA = 'vtoryy'; + use overload "abs" =>=> sub {}; + package vtoryy; + use overload fallback => 1, 'sin' =>=> sub{} +} +$a = bless [], pervyy::; +is eval {"$a"}, overload::StrVal($a), + 'fallback is inherited by classes that have their own overloading' + or diag $@; + +# package separators in method names +{ + package mane; + use overload q\""\ => "bear::strength"; + use overload bool => "bear'bouillon"; +} +@bear::ISA = 'food'; +sub food::strength { 'twine' } +sub food::bouillon { 0 } +$a = bless[], mane::; +is eval { "$a" }, 'twine', ':: in method name' or diag $@; +is eval { !$a }, 1, "' in method name" or diag $@; + +# [perl #113050] Half of CPAN assumes fallback is under "()" +{ + package dodo; + use overload '+' => sub {}; + no strict; + *{"dodo::()"} = sub{}; + ${"dodo::()"} = 1; +} +$a = bless [],'dodo'; +is eval {"$a"}, overload::StrVal($a), 'fallback is stored under "()"'; + +# [perl #47119] +{ + my $context; + + { + package Splitter; + use overload '<>' => \&chars; + + sub new { + my $class = shift; + my ($string) = @_; + bless \$string, $class; + } + + sub chars { + my $self = shift; + my @chars = split //, $$self; + $context = wantarray; + return @chars; + } + } + + my $obj = Splitter->new('bar'); + + $context = 42; # not 1, '', or undef + + my @foo = <$obj>; + is($context, 1, "list context (readline list)"); + is(scalar(@foo), 3, "correct result (readline list)"); + is($foo[0], 'b', "correct result (readline list)"); + is($foo[1], 'a', "correct result (readline list)"); + is($foo[2], 'r', "correct result (readline list)"); + + $context = 42; + + my $foo = <$obj>; + ok(defined($context), "scalar context (readline scalar)"); + is($context, '', "scalar context (readline scalar)"); + is($foo, 3, "correct result (readline scalar)"); + + $context = 42; + + <$obj>; + ok(!defined($context), "void context (readline void)"); + + $context = 42; + + my @bar = <${obj}>; + is($context, 1, "list context (glob list)"); + is(scalar(@bar), 3, "correct result (glob list)"); + is($bar[0], 'b', "correct result (glob list)"); + is($bar[1], 'a', "correct result (glob list)"); + is($bar[2], 'r', "correct result (glob list)"); + + $context = 42; + + my $bar = <${obj}>; + ok(defined($context), "scalar context (glob scalar)"); + is($context, '', "scalar context (glob scalar)"); + is($bar, 3, "correct result (glob scalar)"); + + $context = 42; + + <${obj}>; + ok(!defined($context), "void context (glob void)"); +} +{ + my $context; + + { + package StringWithContext; + use overload '""' => \&stringify; + + sub new { + my $class = shift; + my ($string) = @_; + bless \$string, $class; + } + + sub stringify { + my $self = shift; + $context = wantarray; + return $$self; + } + } + + my $obj = StringWithContext->new('bar'); + + $context = 42; + + my @foo = "".$obj; + ok(defined($context), "scalar context (stringify list)"); + is($context, '', "scalar context (stringify list)"); + is(scalar(@foo), 1, "correct result (stringify list)"); + is($foo[0], 'bar', "correct result (stringify list)"); + + $context = 42; + + my $foo = "".$obj; + ok(defined($context), "scalar context (stringify scalar)"); + is($context, '', "scalar context (stringify scalar)"); + is($foo, 'bar', "correct result (stringify scalar)"); + + $context = 42; + + "".$obj; + + is($context, '', "scalar context (stringify void)"); +} +{ + my ($context, $swap); + + { + package AddWithContext; + use overload '+' => \&add; + + sub new { + my $class = shift; + my ($num) = @_; + bless \$num, $class; + } + + sub add { + my $self = shift; + my ($other, $swapped) = @_; + $context = wantarray; + $swap = $swapped; + return ref($self)->new($$self + $other); + } + + sub val { ${ $_[0] } } + } + + my $obj = AddWithContext->new(6); + + $context = $swap = 42; + + my @foo = $obj + 7; + ok(defined($context), "scalar context (add list)"); + is($context, '', "scalar context (add list)"); + ok(defined($swap), "not swapped (add list)"); + is($swap, '', "not swapped (add list)"); + is(scalar(@foo), 1, "correct result (add list)"); + is($foo[0]->val, 13, "correct result (add list)"); + + $context = $swap = 42; + + @foo = 7 + $obj; + ok(defined($context), "scalar context (add list swap)"); + is($context, '', "scalar context (add list swap)"); + ok(defined($swap), "swapped (add list swap)"); + is($swap, 1, "swapped (add list swap)"); + is(scalar(@foo), 1, "correct result (add list swap)"); + is($foo[0]->val, 13, "correct result (add list swap)"); + + $context = $swap = 42; + + my $foo = $obj + 7; + ok(defined($context), "scalar context (add scalar)"); + is($context, '', "scalar context (add scalar)"); + ok(defined($swap), "not swapped (add scalar)"); + is($swap, '', "not swapped (add scalar)"); + is($foo->val, 13, "correct result (add scalar)"); + + $context = $swap = 42; + + my $foo = 7 + $obj; + ok(defined($context), "scalar context (add scalar swap)"); + is($context, '', "scalar context (add scalar swap)"); + ok(defined($swap), "swapped (add scalar swap)"); + is($swap, 1, "swapped (add scalar swap)"); + is($foo->val, 13, "correct result (add scalar swap)"); + + $context = $swap = 42; + + $obj + 7; + + ok(!defined($context), "void context (add void)"); + ok(defined($swap), "not swapped (add void)"); + is($swap, '', "not swapped (add void)"); + + $context = $swap = 42; + + 7 + $obj; + + ok(!defined($context), "void context (add void swap)"); + ok(defined($swap), "swapped (add void swap)"); + is($swap, 1, "swapped (add void swap)"); + + $obj = AddWithContext->new(6); + + $context = $swap = 42; + + my @foo = $obj += 7; + ok(defined($context), "scalar context (add assign list)"); + is($context, '', "scalar context (add assign list)"); + ok(!defined($swap), "not swapped and autogenerated (add assign list)"); + is(scalar(@foo), 1, "correct result (add assign list)"); + is($foo[0]->val, 13, "correct result (add assign list)"); + is($obj->val, 13, "correct result (add assign list)"); + + $obj = AddWithContext->new(6); + + $context = $swap = 42; + + my $foo = $obj += 7; + ok(defined($context), "scalar context (add assign scalar)"); + is($context, '', "scalar context (add assign scalar)"); + ok(!defined($swap), "not swapped and autogenerated (add assign scalar)"); + is($foo->val, 13, "correct result (add assign scalar)"); + is($obj->val, 13, "correct result (add assign scalar)"); + + $obj = AddWithContext->new(6); + + $context = $swap = 42; + + $obj += 7; + + ok(defined($context), "scalar context (add assign void)"); + is($context, '', "scalar context (add assign void)"); + ok(!defined($swap), "not swapped and autogenerated (add assign void)"); + is($obj->val, 13, "correct result (add assign void)"); + + $obj = AddWithContext->new(6); + + $context = $swap = 42; + + my @foo = ++$obj; + ok(defined($context), "scalar context (add incr list)"); + is($context, '', "scalar context (add incr list)"); + ok(!defined($swap), "not swapped and autogenerated (add incr list)"); + is(scalar(@foo), 1, "correct result (add incr list)"); + is($foo[0]->val, 7, "correct result (add incr list)"); + is($obj->val, 7, "correct result (add incr list)"); + + $obj = AddWithContext->new(6); + + $context = $swap = 42; + + my $foo = ++$obj; + ok(defined($context), "scalar context (add incr scalar)"); + is($context, '', "scalar context (add incr scalar)"); + ok(!defined($swap), "not swapped and autogenerated (add incr scalar)"); + is($foo->val, 7, "correct result (add incr scalar)"); + is($obj->val, 7, "correct result (add incr scalar)"); + + $obj = AddWithContext->new(6); + + $context = $swap = 42; + + ++$obj; + + ok(defined($context), "scalar context (add incr void)"); + is($context, '', "scalar context (add incr void)"); + ok(!defined($swap), "not swapped and autogenerated (add incr void)"); + is($obj->val, 7, "correct result (add incr void)"); +} + +# [perl #113010] +{ + { + package OnlyFallback; + use overload fallback => 0; + } + { + my $obj = bless {}, 'OnlyFallback'; + my $died = !eval { "".$obj; 1 }; + my $err = $@; + ok($died, "fallback of 0 causes error"); + like($err, qr/"\.": no method found/, "correct error"); + } + + { + package OnlyFallbackUndef; + use overload fallback => undef; + } + { + my $obj = bless {}, 'OnlyFallbackUndef'; + my $died = !eval { "".$obj; 1 }; + my $err = $@; + ok($died, "fallback of undef causes error"); + # this one tries falling back to stringify before dying + like($err, qr/"""": no method found/, "correct error"); + } + + { + package OnlyFallbackTrue; + use overload fallback => 1; + } + { + my $obj = bless {}, 'OnlyFallbackTrue'; + my $val; + my $died = !eval { $val = "".$obj; 1 }; + my $err = $@; + ok(!$died, "fallback of 1 doesn't cause error") + || diag("got error of $err"); + like($val, qr/^OnlyFallbackTrue=HASH\(/, "stringified correctly"); + } +} + +{ + # Making Regexp class overloaded: avoid infinite recursion. + # Do this in a separate process since it, well, overloads Regexp! + fresh_perl_is( + <<'EOF', +package Regexp; +use overload q{""} => sub {$_[0] }; +package main; +my $r1 = qr/1/; +my $r2 = qr/ABC$r1/; +print $r2,"\n"; +EOF + '(?^:ABC(?^:1))', + { stderr => 1 }, + 'overloaded REGEXP' + ); +} + +{ # undefining the overload stash -- KEEP THIS TEST LAST + package ant; + use overload '+' => 'onion'; + $_ = \&overload::nil; + undef %overload::; + ()=0+bless[]; + ::ok(1, 'no crash when undefining %overload::'); +} + # EOF diff --git a/gnu/usr.bin/perl/lib/overload/numbers.pm b/gnu/usr.bin/perl/lib/overload/numbers.pm index f56fa630cc8..a90c175db98 100644 --- a/gnu/usr.bin/perl/lib/overload/numbers.pm +++ b/gnu/usr.bin/perl/lib/overload/numbers.pm @@ -82,7 +82,6 @@ our @names = qw# (~~ (-X (qr - DESTROY #; our @enums = qw# @@ -154,7 +153,6 @@ our @enums = qw# smart ftest regexp - DESTROY #; { my $i = 0; our %names = map { $_ => $i++ } @names } diff --git a/gnu/usr.bin/perl/lib/perl5db.t b/gnu/usr.bin/perl/lib/perl5db.t index c7633fc2207..37a0119471b 100644 --- a/gnu/usr.bin/perl/lib/perl5db.t +++ b/gnu/usr.bin/perl/lib/perl5db.t @@ -28,7 +28,7 @@ BEGIN { } } -plan(30); +plan(116); my $rc_filename = '.perldb'; @@ -65,121 +65,6 @@ sub _out_contents return _slurp($out_fn); } -{ - my $target = '../lib/perl5db/t/eval-line-bug'; - - rc( - <<"EOF", - &parse_options("NonStop=0 TTY=db.out LineInfo=db.out"); - - sub afterinit { - push(\@DB::typeahead, - 'b 23', - 'n', - 'n', - 'n', - 'c', # line 23 - 'n', - "p \\\@{'main::_<$target'}", - 'q', - ); - } -EOF - ); - - { - local $ENV{PERLDB_OPTS} = "ReadLine=0"; - runperl(switches => [ '-d' ], progfile => $target); - } -} - -like(_out_contents(), qr/sub factorial/, - 'The ${main::_<filename} variable in the debugger was not destroyed' -); - -{ - my $target = '../lib/perl5db/t/eval-line-bug'; - - rc( - <<"EOF", - &parse_options("NonStop=0 TTY=db.out LineInfo=db.out"); - - sub afterinit { - push(\@DB::typeahead, - 'b 23', - 'c', - '\$new_var = "Foo"', - 'x "new_var = <\$new_var>\\n";', - 'q', - ); - } -EOF - ); - - { - local $ENV{PERLDB_OPTS} = "ReadLine=0"; - runperl(switches => [ '-d' ], progfile => $target); - } -} - -like(_out_contents(), qr/new_var = <Foo>/, - "no strict 'vars' in evaluated lines.", -); - -{ - local $ENV{PERLDB_OPTS} = "ReadLine=0"; - my $output = runperl(switches => [ '-d' ], progfile => '../lib/perl5db/t/lvalue-bug'); - like($output, qr/foo is defined/, 'lvalue subs work in the debugger'); -} - -{ - local $ENV{PERLDB_OPTS} = "ReadLine=0 NonStop=1"; - my $output = runperl(switches => [ '-d' ], progfile => '../lib/perl5db/t/symbol-table-bug'); - like($output, qr/Undefined symbols 0/, 'there are no undefined values in the symbol table'); -} - -SKIP: { - if ( $Config{usethreads} ) { - skip('This perl has threads, skipping non-threaded debugger tests'); - } else { - my $error = 'This Perl not built to support threads'; - my $output = runperl( switches => [ '-dt' ], stderr => 1 ); - like($output, qr/$error/, 'Perl debugger correctly complains that it was not built with threads'); - } - -} -SKIP: { - if ( $Config{usethreads} ) { - local $ENV{PERLDB_OPTS} = "ReadLine=0 NonStop=1"; - my $output = runperl(switches => [ '-dt' ], progfile => '../lib/perl5db/t/symbol-table-bug'); - like($output, qr/Undefined symbols 0/, 'there are no undefined values in the symbol table when running with thread support'); - } else { - skip("This perl is not threaded, skipping threaded debugger tests"); - } -} - - -# Test [perl #61222] -{ - local $ENV{PERLDB_OPTS}; - rc( - <<'EOF', - &parse_options("NonStop=0 TTY=db.out LineInfo=db.out"); - - sub afterinit { - push(@DB::typeahead, - 'm Pie', - 'q', - ); - } -EOF - ); - - my $output = runperl(switches => [ '-d' ], stderr => 1, progfile => '../lib/perl5db/t/rt-61222'); - unlike(_out_contents(), qr/INCORRECT/, "[perl #61222]"); -} - - # Test for Proxy constants { @@ -208,8 +93,21 @@ EOF my $output = runperl(switches => [ '-d' ], stderr => 1, progfile => '../lib/perl5db/t/rt-66110'); like($output, "All tests successful.", "[perl #66110]"); } +# [ perl #116769] Frame=2 +{ + local $ENV{PERLDB_OPTS} = "frame=2 nonstop"; + my $output = runperl( switches => [ '-d' ], prog => 'print q{success}' ); + is( $?, 0, '[perl #116769] frame=2 does not crash debugger, exit == 0' ); + like( $output, 'success' , '[perl #116769] code is run' ); +} +# [ perl #116771] autotrace +{ + local $ENV{PERLDB_OPTS} = "autotrace nonstop"; + my $output = runperl( switches => [ '-d' ], prog => 'print q{success}' ); + is( $?, 0, '[perl #116771] autotrace does not crash debugger, exit == 0' ); + like( $output, 'success' , '[perl #116771] code is run' ); +} -# [perl 104168] level option for tracing { rc(<<'EOF'); &parse_options("NonStop=0 TTY=db.out LineInfo=db.out"); @@ -223,21 +121,6 @@ sub afterinit { } EOF - - my $output = runperl(switches => [ '-d' ], stderr => 1, progfile => '../lib/perl5db/t/rt-104168'); - my $contents = _out_contents(); - like($contents, qr/level 2/, "[perl #104168]"); - unlike($contents, qr/baz/, "[perl #104168]"); -} - -# taint tests - -{ - local $ENV{PERLDB_OPTS} = "ReadLine=0 NonStop=1"; - my $output = runperl(switches => [ '-d', '-T' ], stderr => 1, - progfile => '../lib/perl5db/t/taint'); - chomp $output if $^O eq 'VMS'; # newline guaranteed at EOF - is($output, '[$^X][done]', "taint"); } package DebugWrap; @@ -294,6 +177,42 @@ sub _include_t return $self->{_include_t}; } +sub _stderr_val +{ + my $self = shift; + + if (@_) + { + $self->{_stderr_val} = shift; + } + + return $self->{_stderr_val}; +} + +sub field +{ + my $self = shift; + + if (@_) + { + $self->{field} = shift; + } + + return $self->{field}; +} + +sub _switches +{ + my $self = shift; + + if (@_) + { + $self->{_switches} = shift; + } + + return $self->{_switches}; +} + sub _contents { my $self = shift; @@ -328,6 +247,13 @@ sub _init $self->_include_t($args->{include_t} ? 1 : 0); + $self->_stderr_val(exists($args->{stderr}) ? $args->{stderr} : 1); + + if (exists($args->{switches})) + { + $self->_switches($args->{switches}); + } + $self->_run(); return; @@ -367,10 +293,13 @@ sub _run { ::runperl( switches => [ - '-d', + ($self->_switches ? (@{$self->_switches()}) : ('-d')), ($self->_include_t ? ('-I', '../lib/perl5db/t') : ()) ], - stderr => 1, + (defined($self->_stderr_val()) + ? (stderr => $self->_stderr_val()) + : () + ), progfile => $self->_prog() ); @@ -381,6 +310,11 @@ sub _run { return; } +sub get_output +{ + return shift->_output(); +} + sub output_like { my ($self, $re, $msg) = @_; @@ -388,6 +322,13 @@ sub output_like { ::like($self->_output(), $re, $msg); } +sub output_unlike { + my ($self, $re, $msg) = @_; + + local $::Level = $::Level + 1; + ::unlike($self->_output(), $re, $msg); +} + sub contents_like { my ($self, $re, $msg) = @_; @@ -395,8 +336,211 @@ sub contents_like { ::like($self->_contents(), $re, $msg); } +sub contents_unlike { + my ($self, $re, $msg) = @_; + + local $::Level = $::Level + 1; + ::unlike($self->_contents(), $re, $msg); +} + package main; +{ + local $ENV{PERLDB_OPTS} = "ReadLine=0"; + my $target = '../lib/perl5db/t/eval-line-bug'; + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'b 23', + 'n', + 'n', + 'n', + 'c', # line 23 + 'n', + "p \@{'main::_<$target'}", + 'q', + ], + prog => $target, + } + ); + $wrapper->contents_like( + qr/sub factorial/, + 'The ${main::_<filename} variable in the debugger was not destroyed', + ); +} + +sub _calc_generic_wrapper +{ + my $args = shift; + + my $extra_opts = delete($args->{extra_opts}); + $extra_opts ||= ''; + local $ENV{PERLDB_OPTS} = "ReadLine=0" . $extra_opts; + return DebugWrap->new( + { + cmds => delete($args->{cmds}), + prog => delete($args->{prog}), + %$args, + } + ); +} + +sub _calc_new_var_wrapper +{ + my ($args) = @_; + return _calc_generic_wrapper( + { + cmds => + [ + 'b 23', + 'c', + '$new_var = "Foo"', + 'x "new_var = <$new_var>\\n"', + 'q', + ], + %$args, + } + ); +} + +sub _calc_threads_wrapper +{ + my $args = shift; + + return _calc_new_var_wrapper( + { + switches => [ '-dt', ], + stderr => 1, + %$args + } + ); +} + +{ + _calc_new_var_wrapper({ prog => '../lib/perl5db/t/eval-line-bug'}) + ->contents_like( + qr/new_var = <Foo>/, + "no strict 'vars' in evaluated lines.", + ); +} + +{ + _calc_new_var_wrapper( + { + prog => '../lib/perl5db/t/lvalue-bug', + stderr => undef(), + }, + )->output_like( + qr/foo is defined/, + 'lvalue subs work in the debugger', + ); +} + +{ + _calc_new_var_wrapper( + { + prog => '../lib/perl5db/t/symbol-table-bug', + extra_opts => "NonStop=1", + stderr => undef(), + } + )->output_like( + qr/Undefined symbols 0/, + 'there are no undefined values in the symbol table', + ); +} + +SKIP: +{ + if ( $Config{usethreads} ) { + skip('This perl has threads, skipping non-threaded debugger tests'); + } + else { + my $error = 'This Perl not built to support threads'; + _calc_threads_wrapper( + { + prog => '../lib/perl5db/t/eval-line-bug', + } + )->output_like( + qr/\Q$error\E/, + 'Perl debugger correctly complains that it was not built with threads', + ); + } +} + +SKIP: +{ + if ( $Config{usethreads} ) { + _calc_threads_wrapper( + { + prog => '../lib/perl5db/t/symbol-table-bug', + } + )->output_like( + qr/Undefined symbols 0/, + 'there are no undefined values in the symbol table when running with thread support', + ); + } + else { + skip("This perl is not threaded, skipping threaded debugger tests"); + } +} + +# Test [perl #61222] +{ + local $ENV{PERLDB_OPTS}; + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'm Pie', + 'q', + ], + prog => '../lib/perl5db/t/rt-61222', + } + ); + + $wrapper->contents_unlike(qr/INCORRECT/, "[perl #61222]"); +} + +sub _calc_trace_wrapper +{ + my ($args) = @_; + + return _calc_generic_wrapper( + { + cmds => + [ + 't 2', + 'c', + 'q', + ], + %$args, + } + ); +} + +# [perl 104168] level option for tracing +{ + my $wrapper = _calc_trace_wrapper({ prog => '../lib/perl5db/t/rt-104168' }); + $wrapper->contents_like(qr/level 2/, "[perl #104168] - level 2 appears"); + $wrapper->contents_unlike(qr/baz/, "[perl #104168] - no 'baz'"); +} + +# taint tests +{ + my $wrapper = _calc_trace_wrapper( + { + prog => '../lib/perl5db/t/taint', + extra_opts => ' NonStop=1', + switches => [ '-d', '-T', ], + } + ); + + my $output = $wrapper->get_output(); + chomp $output if $^O eq 'VMS'; # newline guaranteed at EOF + is($output, '[$^X][done]', "taint"); +} + # Testing that we can set a line in the middle of the file. { my $wrapper = DebugWrap->new( @@ -668,6 +812,74 @@ package main; ); } +# Tests for x with @_ +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'b 10', + 'c', + 'x @_', + 'q', + ], + prog => '../lib/perl5db/t/test-passing-at-underscore-to-x-etc', + } + ); + + $wrapper->contents_like( + # qr/^0\s+HASH\([^\)]+\)\n\s+500 => 600\n/, + qr/Arg1.*?Capsula.*GreekHumor.*Socrates/ms, + q/x command test with '@_'./, + ); +} + +# Tests for mutating @_ +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'b 10', + 'c', + 'shift(@_)', + 'print "\n\n\n(((" . join(",", @_) . ")))\n\n\n"', + 'q', + ], + prog => '../lib/perl5db/t/test-passing-at-underscore-to-x-etc', + } + ); + + $wrapper->output_like( + qr/^\(\(\(Capsula,GreekHumor,Socrates\)\)\)$/ms, + q/Mutating '@_'./, + ); +} + +# Tests for x with AutoTrace=1. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'n', + 'o AutoTrace=1', + # So it may fail. + q/x "failure"/, + q/x \$x/, + 'q', + ], + prog => '../lib/perl5db/t/with-subroutine', + } + ); + + $wrapper->contents_like( + # qr/^0\s+HASH\([^\)]+\)\n\s+500 => 600\n/, + qr/^0\s+SCALAR\([^\)]+\)\n\s+-> 'hello world'\n/ms, + "x after AutoTrace=1 command is working." + ); +} + # Tests for "T" (stack trace). { my $prog_fn = '../lib/perl5db/t/rt-104168'; @@ -689,11 +901,11 @@ package main; "'" . quotemeta($prog_fn) . "' line %s\\n", (map { quotemeta($_) } @$_) ) - } + } ( ['.', 'main::baz', 14,], ['.', 'main::bar', 9,], - ['.', 'main::foo', 6] + ['.', 'main::foo', 6], ) ); $wrapper->contents_like( @@ -808,6 +1020,1693 @@ package main; ); } +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'b fact', + 'c', + 'c', + 'c', + 'n', + 'print "<$n>"', + 'q', + ], + prog => '../lib/perl5db/t/fact', + } + ); + + $wrapper->output_like( + qr/<3>/, + 'b subroutine works fine', + ); +} + +# Test for 'M' (module list). +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'M', + 'q', + ], + prog => '../lib/perl5db/t/load-modules' + } + ); + + $wrapper->contents_like( + qr[Scalar/Util\.pm], + 'M (module list) works fine', + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'b 14', + 'c', + '$flag = 1;', + 'r', + 'print "Var=$var\n";', + 'q', + ], + prog => '../lib/perl5db/t/test-r-statement', + } + ); + + $wrapper->output_like( + qr/ + ^Foo$ + .*? + ^Bar$ + .*? + ^Var=Test$ + /msx, + 'r statement is working properly.', + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'l', + 'q', + ], + prog => '../lib/perl5db/t/test-l-statement-1', + } + ); + + $wrapper->contents_like( + qr/ + ^1==>\s+\$x\ =\ 1;\n + 2:\s+print\ "1\\n";\n + 3\s*\n + 4:\s+\$x\ =\ 2;\n + 5:\s+print\ "2\\n";\n + /msx, + 'l statement is working properly (test No. 1).', + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'l', + q/# After l 1/, + 'l', + q/# After l 2/, + '-', + q/# After -/, + 'q', + ], + prog => '../lib/perl5db/t/test-l-statement-1', + } + ); + + my $first_l_out = qr/ + 1==>\s+\$x\ =\ 1;\n + 2:\s+print\ "1\\n";\n + 3\s*\n + 4:\s+\$x\ =\ 2;\n + 5:\s+print\ "2\\n";\n + 6\s*\n + 7:\s+\$x\ =\ 3;\n + 8:\s+print\ "3\\n";\n + 9\s*\n + 10:\s+\$x\ =\ 4;\n + /msx; + + my $second_l_out = qr/ + 11:\s+print\ "4\\n";\n + 12\s*\n + 13:\s+\$x\ =\ 5;\n + 14:\s+print\ "5\\n";\n + 15\s*\n + 16:\s+\$x\ =\ 6;\n + 17:\s+print\ "6\\n";\n + 18\s*\n + 19:\s+\$x\ =\ 7;\n + 20:\s+print\ "7\\n";\n + /msx; + $wrapper->contents_like( + qr/ + ^$first_l_out + [^\n]*?DB<\d+>\ \#\ After\ l\ 1\n + [\ \t]*\n + [^\n]*?DB<\d+>\ l\s*\n + $second_l_out + [^\n]*?DB<\d+>\ \#\ After\ l\ 2\n + [\ \t]*\n + [^\n]*?DB<\d+>\ -\s*\n + $first_l_out + [^\n]*?DB<\d+>\ \#\ After\ -\n + /msx, + 'l followed by l and then followed by -', + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'l fact', + 'q', + ], + prog => '../lib/perl5db/t/test-l-statement-2', + } + ); + + my $first_l_out = qr/ + 6\s+sub\ fact\ \{\n + 7:\s+my\ \$n\ =\ shift;\n + 8:\s+if\ \(\$n\ >\ 1\)\ \{\n + 9:\s+return\ \$n\ \*\ fact\(\$n\ -\ 1\); + /msx; + + $wrapper->contents_like( + qr/ + DB<1>\s+l\ fact\n + $first_l_out + /msx, + 'l subroutine_name', + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'b fact', + 'c', + # Repeat several times to avoid @typeahead problems. + '.', + '.', + '.', + '.', + 'q', + ], + prog => '../lib/perl5db/t/test-l-statement-2', + } + ); + + my $line_out = qr / + ^main::fact\([^\n]*?:7\):\n + ^7:\s+my\ \$n\ =\ shift;\n + /msx; + + $wrapper->contents_like( + qr/ + $line_out + $line_out + /msx, + 'Test the "." command', + ); +} + +# Testing that the f command works. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'f ../lib/perl5db/t/MyModule.pm', + 'b 12', + 'c', + q/do { use IO::Handle; STDOUT->autoflush(1); print "Var=$var\n"; }/, + 'c', + 'q', + ], + include_t => 1, + prog => '../lib/perl5db/t/filename-line-breakpoint' + } + ); + + $wrapper->output_like(qr/ + ^Var=Bar$ + .* + ^In\ MyModule\.$ + .* + ^In\ Main\ File\.$ + .* + /msx, + "f command is working.", + ); +} + +# We broke the /pattern/ command because apparently the CORE::eval-s inside +# lib/perl5db.pl cannot handle lexical variable properly. So we now fix this +# bug. +# +# TODO : +# +# 1. Go over the rest of the "eval"s in lib/perl5db.t and see if they cause +# problems. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + '/for/', + 'q', + ], + prog => '../lib/perl5db/t/eval-line-bug', + } + ); + + $wrapper->contents_like( + qr/12: \s* for\ my\ \$q\ \(1\ \.\.\ 10\)\ \{/msx, + "/pat/ command is working and found a match.", + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'b 22', + 'c', + '?for?', + 'q', + ], + prog => '../lib/perl5db/t/eval-line-bug', + } + ); + + $wrapper->contents_like( + qr/12: \s* for\ my\ \$q\ \(1\ \.\.\ 10\)\ \{/msx, + "?pat? command is working and found a match.", + ); +} + +# Test the L command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'b 6', + 'b 13 ($q == 5)', + 'L', + 'q', + ], + prog => '../lib/perl5db/t/eval-line-bug', + } + ); + + $wrapper->contents_like( + qr# + ^\S*?eval-line-bug:\n + \s*6:\s*my\ \$i\ =\ 5;\n + \s*break\ if\ \(1\)\n + \s*13:\s*\$i\ \+=\ \$q;\n + \s*break\ if\ \(\(\$q\ ==\ 5\)\)\n + #msx, + "L command is listing breakpoints", + ); +} + +# Test the L command for watch expressions. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'w (5+6)', + 'L', + 'q', + ], + prog => '../lib/perl5db/t/eval-line-bug', + } + ); + + $wrapper->contents_like( + qr# + ^Watch-expressions:\n + \s*\(5\+6\)\n + #msx, + "L command is listing watch expressions", + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'w (5+6)', + 'w (11*23)', + 'W (5+6)', + 'L', + 'q', + ], + prog => '../lib/perl5db/t/eval-line-bug', + } + ); + + $wrapper->contents_like( + qr# + ^Watch-expressions:\n + \s*\(11\*23\)\n + ^auto\( + #msx, + "L command is not listing deleted watch expressions", + ); +} + +# Test the L command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'b 6', + 'a 13 print $i', + 'L', + 'q', + ], + prog => '../lib/perl5db/t/eval-line-bug', + } + ); + + $wrapper->contents_like( + qr# + ^\S*?eval-line-bug:\n + \s*6:\s*my\ \$i\ =\ 5;\n + \s*break\ if\ \(1\)\n + \s*13:\s*\$i\ \+=\ \$q;\n + \s*action:\s+print\ \$i\n + #msx, + "L command is listing actions and breakpoints", + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'S', + 'q', + ], + prog => '../lib/perl5db/t/rt-104168', + } + ); + + $wrapper->contents_like( + qr# + ^main::bar\n + main::baz\n + main::foo\n + #msx, + "S command - 1", + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'S ^main::ba', + 'q', + ], + prog => '../lib/perl5db/t/rt-104168', + } + ); + + $wrapper->contents_like( + qr# + ^main::bar\n + main::baz\n + auto\( + #msx, + "S command with regex", + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'S !^main::ba', + 'q', + ], + prog => '../lib/perl5db/t/rt-104168', + } + ); + + $wrapper->contents_unlike( + qr# + ^main::ba + #msx, + "S command with negative regex", + ); + + $wrapper->contents_like( + qr# + ^main::foo\n + #msx, + "S command with negative regex - what it still matches", + ); +} + +# Test the 'a' command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'a 13 print "\nVar<Q>=$q\n"', + 'c', + 'q', + ], + prog => '../lib/perl5db/t/eval-line-bug', + } + ); + + $wrapper->output_like(qr# + \nVar<Q>=1\n + \nVar<Q>=2\n + \nVar<Q>=3\n + #msx, + "a command is working", + ); +} + +# Test the 'a' command with no line number. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'n', + q/a print "Hello " . (3 * 4) . "\n";/, + 'c', + 'q', + ], + prog => '../lib/perl5db/t/test-a-statement-1', + } + ); + + $wrapper->output_like(qr# + (?:^Hello\ 12\n.*?){4} + #msx, + "a command with no line number is working", + ); +} + +# Test the 'A' command +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'a 13 print "\nVar<Q>=$q\n"', + 'A 13', + 'c', + 'q', + ], + prog => '../lib/perl5db/t/eval-line-bug', + } + ); + + $wrapper->output_like( + qr#\A\z#msx, # The empty string. + "A command (for removing actions) is working", + ); +} + +# Test the 'A *' command +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'a 6 print "\nFail!\n"', + 'a 13 print "\nVar<Q>=$q\n"', + 'A *', + 'c', + 'q', + ], + prog => '../lib/perl5db/t/eval-line-bug', + } + ); + + $wrapper->output_like( + qr#\A\z#msx, # The empty string. + "'A *' command (for removing all actions) is working", + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'n', + 'w $foo', + 'c', + 'print "\nIDX=<$idx>\n"', + 'q', + ], + prog => '../lib/perl5db/t/test-w-statement-1', + } + ); + + + $wrapper->contents_like(qr# + \$foo\ changed:\n + \s+old\ value:\s+'1'\n + \s+new\ value:\s+'2'\n + #msx, + 'w command - watchpoint changed', + ); + $wrapper->output_like(qr# + \nIDX=<20>\n + #msx, + "w command - correct output from IDX", + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'n', + 'w $foo', + 'W $foo', + 'c', + 'print "\nIDX=<$idx>\n"', + 'q', + ], + prog => '../lib/perl5db/t/test-w-statement-1', + } + ); + + $wrapper->contents_unlike(qr# + \$foo\ changed: + #msx, + 'W command - watchpoint was deleted', + ); + + $wrapper->output_like(qr# + \nIDX=<>\n + #msx, + "W command - stopped at end.", + ); +} + +# Test the W * command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'n', + 'w $foo', + 'w ($foo*$foo)', + 'W *', + 'c', + 'print "\nIDX=<$idx>\n"', + 'q', + ], + prog => '../lib/perl5db/t/test-w-statement-1', + } + ); + + $wrapper->contents_unlike(qr# + \$foo\ changed: + #msx, + '"W *" command - watchpoint was deleted', + ); + + $wrapper->output_like(qr# + \nIDX=<>\n + #msx, + '"W *" command - stopped at end.', + ); +} + +# Test the 'o' command (without further arguments). +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o', + 'q', + ], + prog => '../lib/perl5db/t/test-w-statement-1', + } + ); + + $wrapper->contents_like(qr# + ^\s*warnLevel\ =\ '1'\n + #msx, + q#"o" command (without arguments) displays warnLevel#, + ); + + $wrapper->contents_like(qr# + ^\s*signalLevel\ =\ '1'\n + #msx, + q#"o" command (without arguments) displays signalLevel#, + ); + + $wrapper->contents_like(qr# + ^\s*dieLevel\ =\ '1'\n + #msx, + q#"o" command (without arguments) displays dieLevel#, + ); + + $wrapper->contents_like(qr# + ^\s*hashDepth\ =\ 'N/A'\n + #msx, + q#"o" command (without arguments) displays hashDepth#, + ); +} + +# Test the 'o' query command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o hashDepth? signalLevel?', + 'q', + ], + prog => '../lib/perl5db/t/test-w-statement-1', + } + ); + + $wrapper->contents_unlike(qr#warnLevel#, + q#"o" query command does not display warnLevel#, + ); + + $wrapper->contents_like(qr# + ^\s*signalLevel\ =\ '1'\n + #msx, + q#"o" query command displays signalLevel#, + ); + + $wrapper->contents_unlike(qr#dieLevel#, + q#"o" query command does not display dieLevel#, + ); + + $wrapper->contents_like(qr# + ^\s*hashDepth\ =\ 'N/A'\n + #msx, + q#"o" query command displays hashDepth#, + ); +} + +# Test the 'o' set command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o signalLevel=0', + 'o', + 'q', + ], + prog => '../lib/perl5db/t/test-w-statement-1', + } + ); + + $wrapper->contents_like(qr/ + ^\s*(signalLevel\ =\ '0'\n) + .*? + ^\s*\1 + /msx, + q#o set command works#, + ); + + $wrapper->contents_like(qr# + ^\s*hashDepth\ =\ 'N/A'\n + #msx, + q#o set command - hashDepth#, + ); +} + +# Test the '<' and "< ?" commands. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + q/< print "\nX=<$x>\n"/, + q/b 7/, + q/< ?/, + 'c', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr/ + ^pre-perl\ commands:\n + \s*<\ --\ print\ "\\nX=<\$x>\\n"\n + /msx, + q#Test < and < ? commands - contents.#, + ); + + $wrapper->output_like(qr# + ^X=<FirstVal>\n + #msx, + q#Test < and < ? commands - output.#, + ); +} + +# Test the '< *' command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + q/< print "\nX=<$x>\n"/, + q/b 7/, + q/< */, + 'c', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->output_unlike(qr/FirstVal/, + q#Test the '< *' command.#, + ); +} + +# Test the '>' and "> ?" commands. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + q/$::foo = 500;/, + q/> print "\nFOO=<$::foo>\n"/, + q/b 7/, + q/> ?/, + 'c', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr/ + ^post-perl\ commands:\n + \s*>\ --\ print\ "\\nFOO=<\$::foo>\\n"\n + /msx, + q#Test > and > ? commands - contents.#, + ); + + $wrapper->output_like(qr# + ^FOO=<500>\n + #msx, + q#Test > and > ? commands - output.#, + ); +} + +# Test the '> *' command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + q/> print "\nFOO=<$::foo>\n"/, + q/b 7/, + q/> */, + 'c', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->output_unlike(qr/FOO=/, + q#Test the '> *' command.#, + ); +} + +# Test the < and > commands together +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + q/$::lorem = 0;/, + q/< $::lorem += 10;/, + q/> print "\nLOREM=<$::lorem>\n"/, + q/b 7/, + q/b 5/, + 'c', + 'c', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->output_like(qr# + ^LOREM=<10>\n + #msx, + q#Test < and > commands. #, + ); +} + +# Test the { ? and { [command] commands. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + '{ ?', + '{ l', + '{ ?', + q/b 5/, + q/c/, + q/q/, + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr# + ^No\ pre-debugger\ actions\.\n + .*? + ^pre-debugger\ commands:\n + \s+\{\ --\ l\n + .*? + ^5==>b\s+\$x\ =\ "FirstVal";\n + 6\s*\n + 7:\s+\$dummy\+\+;\n + 8\s*\n + 9:\s+\$x\ =\ "SecondVal";\n + + #msx, + 'Test the pre-prompt debugger commands', + ); +} + +# Test the { * command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + '{ q', + '{ *', + q/b 5/, + q/c/, + q/print (("One" x 5), "\n");/, + q/q/, + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr# + ^All\ \{\ actions\ cleared\.\n + #msx, + 'Test the { * command', + ); + + $wrapper->output_like(qr/OneOneOneOneOne/, + '{ * test - output is OK.', + ); +} + +# Test the ! command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'l 3-5', + '!', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr# + (^3:\s+my\ \$dummy\ =\ 0;\n + 4\s*\n + 5:\s+\$x\ =\ "FirstVal";)\n + .*? + ^l\ 3-5\n + \1 + #msx, + 'Test the ! command (along with l 3-5)', + ); +} + +# Test the ! -number command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'l 3-5', + 'l 2', + '! -1', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr# + (^3:\s+my\ \$dummy\ =\ 0;\n + 4\s*\n + 5:\s+\$x\ =\ "FirstVal";)\n + .*? + ^2==\>\s+my\ \$x\ =\ "One";\n + .*? + ^l\ 3-5\n + \1 + #msx, + 'Test the ! -n command (along with l)', + ); +} + +# Test the 'source' command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'source ../lib/perl5db/t/source-cmd-test.perldb', + # If we have a 'q' here, then the typeahead will override the + # input, and so it won't be reached - solution: + # put a q inside the .perldb commands. + # ( This may be a bug or a misfeature. ) + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr# + ^3:\s+my\ \$dummy\ =\ 0;\n + 4\s*\n + 5:\s+\$x\ =\ "FirstVal";\n + 6\s*\n + 7:\s+\$dummy\+\+;\n + 8\s*\n + 9:\s+\$x\ =\ "SecondVal";\n + 10\s*\n + #msx, + 'Test the source command (along with l)', + ); +} + +# Test the 'source' command being traversed from withing typeahead. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'source ../lib/perl5db/t/source-cmd-test-no-q.perldb', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr# + ^3:\s+my\ \$dummy\ =\ 0;\n + 4\s*\n + 5:\s+\$x\ =\ "FirstVal";\n + 6\s*\n + 7:\s+\$dummy\+\+;\n + 8\s*\n + 9:\s+\$x\ =\ "SecondVal";\n + 10\s*\n + #msx, + 'Test the source command inside a typeahead', + ); +} + +# Test the 'H -number' command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'l 1-10', + 'l 5-10', + 'x "Hello World"', + 'l 1-5', + 'b 3', + 'x (20+4)', + 'H -7', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr# + ^\d+:\s+H\ -7\n + \d+:\s+x\ \(20\+4\)\n + \d+:\s+b\ 3\n + \d+:\s+l\ 1-5\n + \d+:\s+x\ "Hello\ World"\n + \d+:\s+l\ 5-10\n + \d+:\s+l\ 1-10\n + #msx, + 'Test the H -num command', + ); +} + +# Add a test for H (without arguments) +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'l 1-10', + 'l 5-10', + 'x "Hello World"', + 'l 1-5', + 'b 3', + 'x (20+4)', + 'H', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr# + ^\d+:\s+x\ \(20\+4\)\n + \d+:\s+b\ 3\n + \d+:\s+l\ 1-5\n + \d+:\s+x\ "Hello\ World"\n + \d+:\s+l\ 5-10\n + \d+:\s+l\ 1-10\n + #msx, + 'Test the H command (without a number.)', + ); +} + +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + '= quit q', + '= foobar l', + 'foobar', + 'quit', + ], + prog => '../lib/perl5db/t/test-l-statement-1', + } + ); + + $wrapper->contents_like( + qr/ + ^1==>\s+\$x\ =\ 1;\n + 2:\s+print\ "1\\n";\n + 3\s*\n + 4:\s+\$x\ =\ 2;\n + 5:\s+print\ "2\\n";\n + /msx, + 'Test the = (command alias) command.', + ); +} + +# Test the m statement. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'm main', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr# + ^via\ UNIVERSAL:\ DOES$ + #msx, + "Test m for main - 1", + ); + + $wrapper->contents_like(qr# + ^via\ UNIVERSAL:\ can$ + #msx, + "Test m for main - 2", + ); +} + +# Test the m statement. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'b 41', + 'c', + 'm $obj', + 'q', + ], + prog => '../lib/perl5db/t/test-m-statement-1', + } + ); + + $wrapper->contents_like(qr#^greet$#ms, + "Test m for obj - 1", + ); + + $wrapper->contents_like(qr#^via UNIVERSAL: can$#ms, + "Test m for obj - 1", + ); +} + +# Test the M command. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'M', + 'q', + ], + prog => '../lib/perl5db/t/test-m-statement-1', + } + ); + + $wrapper->contents_like(qr# + ^'strict\.pm'\ =>\ '\d+\.\d+\ from + #msx, + "Test M", + ); + +} + +# Test the recallCommand option. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o recallCommand=%', + 'l 3-5', + 'l 2', + '% -1', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr# + (^3:\s+my\ \$dummy\ =\ 0;\n + 4\s*\n + 5:\s+\$x\ =\ "FirstVal";)\n + .*? + ^2==\>\s+my\ \$x\ =\ "One";\n + .*? + ^l\ 3-5\n + \1 + #msx, + 'Test the o recallCommand option', + ); +} + +# Test the dieLevel option +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + q/o dieLevel='1'/, + q/c/, + 'q', + ], + prog => '../lib/perl5db/t/test-dieLevel-option-1', + } + ); + + $wrapper->output_like(qr# + ^This\ program\ dies\.\ at\ \S+\ line\ 18\.\n + .*? + ^\s+main::baz\(\)\ called\ at\ \S+\ line\ 13\n + \s+main::bar\(\)\ called\ at\ \S+\ line\ 7\n + \s+main::foo\(\)\ called\ at\ \S+\ line\ 21\n + #msx, + 'Test the o dieLevel option', + ); +} + +# Test the warnLevel option +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + q/o warnLevel='1'/, + q/c/, + 'q', + ], + prog => '../lib/perl5db/t/test-warnLevel-option-1', + } + ); + + $wrapper->contents_like(qr# + ^This\ is\ not\ a\ warning\.\ at\ \S+\ line\ 18\.\n + .*? + ^\s+main::baz\(\)\ called\ at\ \S+\ line\ 13\n + \s+main::bar\(\)\ called\ at\ \S+\ line\ 25\n + \s+main::myfunc\(\)\ called\ at\ \S+\ line\ 28\n + #msx, + 'Test the o warnLevel option', + ); +} + +# Test the t command +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 't', + 'c', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr/ + ^main::\([^:]+:15\):\n + 15:\s+\$dummy\+\+;\n + main::\([^:]+:17\):\n + 17:\s+\$x\ =\ "FourthVal";\n + /msx, + 'Test the t command (without a number.)', + ); +} + +# Test the o AutoTrace command +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o AutoTrace', + 'c', + 'q', + ], + prog => '../lib/perl5db/t/disable-breakpoints-1', + } + ); + + $wrapper->contents_like(qr/ + ^main::\([^:]+:15\):\n + 15:\s+\$dummy\+\+;\n + main::\([^:]+:17\):\n + 17:\s+\$x\ =\ "FourthVal";\n + /msx, + 'Test the o AutoTrace command', + ); +} + +# Test the t command with function calls +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 't', + 'b 18', + 'c', + 'x ["foo"]', + 'x ["bar"]', + 'q', + ], + prog => '../lib/perl5db/t/test-warnLevel-option-1', + } + ); + + $wrapper->contents_like(qr/ + ^main::\([^:]+:28\):\n + 28:\s+myfunc\(\);\n + main::myfunc\([^:]+:25\):\n + 25:\s+bar\(\);\n + /msx, + 'Test the t command with function calls.', + ); +} + +# Test the o AutoTrace command with function calls +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o AutoTrace', + 'b 18', + 'c', + 'x ["foo"]', + 'x ["bar"]', + 'q', + ], + prog => '../lib/perl5db/t/test-warnLevel-option-1', + } + ); + + $wrapper->contents_like(qr/ + ^main::\([^:]+:28\):\n + 28:\s+myfunc\(\);\n + main::myfunc\([^:]+:25\):\n + 25:\s+bar\(\);\n + /msx, + 'Test the t command with function calls.', + ); +} + +# Test the final message. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'c', + 'q', + ], + prog => '../lib/perl5db/t/test-warnLevel-option-1', + } + ); + + $wrapper->contents_like(qr/ + ^Debugged\ program\ terminated\. + /msx, + 'Test the final "Debugged program terminated" message.', + ); +} + +# Test the o inhibit_exit=0 command +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o inhibit_exit=0', + 'n', + 'n', + 'n', + 'n', + 'q', + ], + prog => '../lib/perl5db/t/test-warnLevel-option-1', + } + ); + + $wrapper->contents_unlike(qr/ + ^Debugged\ program\ terminated\. + /msx, + 'Test the o inhibit_exit=0 command.', + ); +} + +# Test the o PrintRet=1 option +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o PrintRet=1', + 'b 29', + 'c', + q/$x = 's';/, + 'b 10', + 'c', + 'r', + 'q', + ], + prog => '../lib/perl5db/t/test-PrintRet-option-1', + } + ); + + $wrapper->contents_like( + qr/scalar context return from main::return_scalar: 20024/, + "Test o PrintRet=1", + ); +} + +# Test the o PrintRet=0 option +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o PrintRet=0', + 'b 29', + 'c', + q/$x = 's';/, + 'b 10', + 'c', + 'r', + 'q', + ], + prog => '../lib/perl5db/t/test-PrintRet-option-1', + } + ); + + $wrapper->contents_unlike( + qr/scalar context/, + "Test o PrintRet=0", + ); +} + +# Test the o PrintRet=1 option in list context +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o PrintRet=1', + 'b 29', + 'c', + q/$x = 'l';/, + 'b 17', + 'c', + 'r', + 'q', + ], + prog => '../lib/perl5db/t/test-PrintRet-option-1', + } + ); + + $wrapper->contents_like( + qr/list context return from main::return_list:\n0\s*'Foo'\n1\s*'Bar'\n2\s*'Baz'\n/, + "Test o PrintRet=1 in list context", + ); +} + +# Test the o PrintRet=0 option in list context +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o PrintRet=0', + 'b 29', + 'c', + q/$x = 'l';/, + 'b 17', + 'c', + 'r', + 'q', + ], + prog => '../lib/perl5db/t/test-PrintRet-option-1', + } + ); + + $wrapper->contents_unlike( + qr/list context/, + "Test o PrintRet=0 in list context", + ); +} + +# Test the o PrintRet=1 option in void context +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o PrintRet=1', + 'b 29', + 'c', + q/$x = 'v';/, + 'b 24', + 'c', + 'r', + 'q', + ], + prog => '../lib/perl5db/t/test-PrintRet-option-1', + } + ); + + $wrapper->contents_like( + qr/void context return from main::return_void/, + "Test o PrintRet=1 in void context", + ); +} + +# Test the o PrintRet=1 option in void context +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'o PrintRet=0', + 'b 29', + 'c', + q/$x = 'v';/, + 'b 24', + 'c', + 'r', + 'q', + ], + prog => '../lib/perl5db/t/test-PrintRet-option-1', + } + ); + + $wrapper->contents_unlike( + qr/void context/, + "Test o PrintRet=0 in void context", + ); +} + +# Test the o frame option. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + # This is to avoid getting the "Debugger program terminated" + # junk that interferes with the normal output. + 'o inhibit_exit=0', + 'b 10', + 'c', + 'o frame=255', + 'c', + 'q', + ], + prog => '../lib/perl5db/t/test-frame-option-1', + } + ); + + $wrapper->contents_like( + qr/ + in\s*\.=main::my_other_func\(3,\ 1200\)\ from.*? + out\s*\.=main::my_other_func\(3,\ 1200\)\ from + /msx, + "Test o PrintRet=0 in void context", + ); +} + +{ # test t expr + my $wrapper = DebugWrap->new( + { + cmds => + [ + # This is to avoid getting the "Debugger program terminated" + # junk that interferes with the normal output. + 'o inhibit_exit=0', + 't fact(3)', + 'q', + ], + prog => '../lib/perl5db/t/fact', + } + ); + + $wrapper->contents_like( + qr/ + (?:^main::fact.*return\ \$n\ \*\ fact\(\$n\ -\ 1\);.*) + /msx, + "Test t expr", + ); +} + +# Test the w for lexical variables expression. +{ + my $wrapper = DebugWrap->new( + { + cmds => + [ + # This is to avoid getting the "Debugger program terminated" + # junk that interferes with the normal output. + 'w $exp', + 'n', + 'n', + 'n', + 'n', + 'q', + ], + prog => '../lib/perl5db/t/break-on-dot', + } + ); + + $wrapper->contents_like( + qr/ +\s+old\ value:\s+'1'\n +\s+new\ value:\s+'2'\n + /msx, + "Test w for lexical values.", + ); +} + +# Test the perldoc command +# We don't actually run the program, but we need to provide one to the wrapper. +SKIP: +{ + $^O eq "linux" + or skip "man errors aren't especially portable", 1; + -x '/usr/bin/man' + or skip "man command seems to be missing", 1; + local $ENV{LANG} = "C"; + local $ENV{LC_MESSAGES} = "C"; + local $ENV{LC_ALL} = "C"; + my $wrapper = DebugWrap->new( + { + cmds => + [ + 'perldoc perlrules', + 'q', + ], + prog => '../lib/perl5db/t/fact', + } + ); + + $wrapper->output_like( + qr/No manual entry for perlrules/, + 'perldoc command works fine', + ); +} + END { 1 while unlink ($rc_filename, $out_fn); } diff --git a/gnu/usr.bin/perl/lib/perl5db/t/fact b/gnu/usr.bin/perl/lib/perl5db/t/fact new file mode 100644 index 00000000000..ac25eac1edb --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/fact @@ -0,0 +1,14 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +sub fact { + my $n = shift; + if ($n > 1) { + return $n * fact($n - 1); + } else { + return 1; + } +} +fact(5); diff --git a/gnu/usr.bin/perl/lib/perl5db/t/load-modules b/gnu/usr.bin/perl/lib/perl5db/t/load-modules new file mode 100644 index 00000000000..202326357c5 --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/load-modules @@ -0,0 +1,6 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use Scalar::Util; diff --git a/gnu/usr.bin/perl/lib/perl5db/t/source-cmd-test-no-q.perldb b/gnu/usr.bin/perl/lib/perl5db/t/source-cmd-test-no-q.perldb new file mode 100644 index 00000000000..6a6fddd3676 --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/source-cmd-test-no-q.perldb @@ -0,0 +1 @@ +l 3-10 diff --git a/gnu/usr.bin/perl/lib/perl5db/t/source-cmd-test.perldb b/gnu/usr.bin/perl/lib/perl5db/t/source-cmd-test.perldb new file mode 100644 index 00000000000..41a73657640 --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/source-cmd-test.perldb @@ -0,0 +1,2 @@ +l 3-10 +q diff --git a/gnu/usr.bin/perl/lib/perl5db/t/test-PrintRet-option-1 b/gnu/usr.bin/perl/lib/perl5db/t/test-PrintRet-option-1 new file mode 100644 index 00000000000..ccf6607f794 --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/test-PrintRet-option-1 @@ -0,0 +1,46 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +my ($x, $y); + +sub return_scalar +{ + $y++; + + return "20024"; +} + +sub return_list +{ + $y++; + + return ("Foo", "Bar", "Baz"); +} + +sub return_void +{ + $y++; + + return; +} + +$y++; + +# Choose one based on $x +# +if ($x eq "s") +{ + my $s = return_scalar(); +} +elsif ($x eq "l") +{ + my @l = return_list(); +} +else +{ + return_void(); + $y++; +} + diff --git a/gnu/usr.bin/perl/lib/perl5db/t/test-a-statement-1 b/gnu/usr.bin/perl/lib/perl5db/t/test-a-statement-1 new file mode 100644 index 00000000000..a1782a0034b --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/test-a-statement-1 @@ -0,0 +1,22 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +while (my $l = <DATA>) { + chomp $l; + print "$l\n"; +} + +__DATA__ +123456789012 This is a test +3456789012345This is another test +6789012345678This is yet another test +9012345678901Is this yet another test? +234567890123 Yes, this is another test. +4567890123456I think this is a test. +7890123456789Now is the time. +0123456789012For all good men. +3456789012345To come to the aid party. +678901234678 This is the tenth line. + diff --git a/gnu/usr.bin/perl/lib/perl5db/t/test-dieLevel-option-1 b/gnu/usr.bin/perl/lib/perl5db/t/test-dieLevel-option-1 new file mode 100644 index 00000000000..0849ae2a0d2 --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/test-dieLevel-option-1 @@ -0,0 +1,22 @@ +use strict; +use warnings; + +sub foo +{ + print "In foo\n"; + bar(); +} + +sub bar +{ + print "In baz\n"; + baz(); +} + +sub baz +{ + die "This program dies."; +} + +foo(); + diff --git a/gnu/usr.bin/perl/lib/perl5db/t/test-frame-option-1 b/gnu/usr.bin/perl/lib/perl5db/t/test-frame-option-1 new file mode 100644 index 00000000000..a6b4dd8c4fb --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/test-frame-option-1 @@ -0,0 +1,26 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +sub my_func +{ + my ($num1, $num2) = @_; + + print $num1+$num2, "\n"; + + my_other_func ($num1*3, $num2*24); + + return $num1*$num2; +} + +sub my_other_func +{ + my ($num1, $num2) = @_; + + print "my_other_func: n1=<$num1> n2=<$num2>\n"; + + return $num1 * $num2; +} + +my_func(1, 50); diff --git a/gnu/usr.bin/perl/lib/perl5db/t/test-l-statement-1 b/gnu/usr.bin/perl/lib/perl5db/t/test-l-statement-1 new file mode 100644 index 00000000000..990a1695034 --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/test-l-statement-1 @@ -0,0 +1,20 @@ +$x = 1; +print "1\n"; + +$x = 2; +print "2\n"; + +$x = 3; +print "3\n"; + +$x = 4; +print "4\n"; + +$x = 5; +print "5\n"; + +$x = 6; +print "6\n"; + +$x = 7; +print "7\n"; diff --git a/gnu/usr.bin/perl/lib/perl5db/t/test-l-statement-2 b/gnu/usr.bin/perl/lib/perl5db/t/test-l-statement-2 new file mode 100644 index 00000000000..9e6a2105e98 --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/test-l-statement-2 @@ -0,0 +1,24 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +sub fact { + my $n = shift; + if ($n > 1) { + return $n * fact($n - 1); + } else { + return 1; + } +} + +sub bar { + print "One\n"; + print "Two\n"; + print "Three\n"; + + return; +} + +fact(5); +bar(); diff --git a/gnu/usr.bin/perl/lib/perl5db/t/test-m-statement-1 b/gnu/usr.bin/perl/lib/perl5db/t/test-m-statement-1 new file mode 100644 index 00000000000..a699ed342e7 --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/test-m-statement-1 @@ -0,0 +1,43 @@ +use strict; +use warnings; + +package MyClass; + +sub new +{ + my $class = shift; + + my $self = bless {}, $class; + + $self->_init(@_); + + return $self; +} + +sub _init +{ + my $self = shift; + + $self->{foo} = 'bar'; + + return; +} + +sub greet +{ + my ($self, $msg) = @_; + + print "$msg - $self->{foo}\n"; + + return; +} + +1; + +package main; + +my $obj = MyClass->new; + +$obj->greet("Hello"); + +1; diff --git a/gnu/usr.bin/perl/lib/perl5db/t/test-passing-at-underscore-to-x-etc b/gnu/usr.bin/perl/lib/perl5db/t/test-passing-at-underscore-to-x-etc new file mode 100644 index 00000000000..ff14df65739 --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/test-passing-at-underscore-to-x-etc @@ -0,0 +1,15 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +print "One\n"; + +sub my_pass_args_to +{ + print "Two\n"; +} + +my_pass_args_to ("Arg1", "Capsula", "GreekHumor", "Socrates"); + +print "Three\n"; diff --git a/gnu/usr.bin/perl/lib/perl5db/t/test-r-statement b/gnu/usr.bin/perl/lib/perl5db/t/test-r-statement new file mode 100644 index 00000000000..f8c7bf5555c --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/test-r-statement @@ -0,0 +1,27 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +my $var = "Test"; + +sub mysub +{ + my $flag = 1; + + $flag = 0; + + print "Foo\n"; + + if ($flag) + { + print "Bar\n"; + } + + return; +} + +mysub(); + +$var .= "More"; + diff --git a/gnu/usr.bin/perl/lib/perl5db/t/test-w-statement-1 b/gnu/usr.bin/perl/lib/perl5db/t/test-w-statement-1 new file mode 100644 index 00000000000..bfd5ccd7d80 --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/test-w-statement-1 @@ -0,0 +1,20 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use vars qw($foo); + +$foo = 1; + +print "Hello\n"; + +for my $idx (map { $_ * 10 } 1 .. 10) +{ + if ($idx > 17) + { + $foo = 2; + print "Baz\n"; + } +} + diff --git a/gnu/usr.bin/perl/lib/perl5db/t/test-warnLevel-option-1 b/gnu/usr.bin/perl/lib/perl5db/t/test-warnLevel-option-1 new file mode 100644 index 00000000000..04b71f99137 --- /dev/null +++ b/gnu/usr.bin/perl/lib/perl5db/t/test-warnLevel-option-1 @@ -0,0 +1,29 @@ +use strict; +use warnings; + +sub foo +{ + print "In foo\n"; + bar(); +} + +sub bar +{ + print "In baz\n"; + baz(); +} + +sub baz +{ + warn "This is not a warning."; + + return; +} + +sub myfunc +{ + bar(); +} + +myfunc(); + diff --git a/gnu/usr.bin/perl/lib/sort.pm b/gnu/usr.bin/perl/lib/sort.pm index 922f82b4691..7c8e50db577 100644 --- a/gnu/usr.bin/perl/lib/sort.pm +++ b/gnu/usr.bin/perl/lib/sort.pm @@ -1,6 +1,6 @@ package sort; -our $VERSION = '2.01'; +our $VERSION = '2.02'; # The hints for pp_sort are now stored in $^H{sort}; older versions # of perl used the global variable $sort::hints. -- rjh 2005-12-19 @@ -180,14 +180,14 @@ So now this code would be written: { use sort qw(defaults _quicksort); # force quicksort no sort "stable"; # stability not wanted my $current; - BEGIN { $current = print sort::current; } + BEGIN { $current = sort::current; } print "$current\n"; @a = sort @b; # Pragmas go out of scope at the end of the block } { use sort qw(defaults stable); # force stability my $current; - BEGIN { $current = print sort::current; } + BEGIN { $current = sort::current; } print "$current\n"; @c = sort @d; } diff --git a/gnu/usr.bin/perl/lib/unicore/ArabicShaping.txt b/gnu/usr.bin/perl/lib/unicore/ArabicShaping.txt index 35e79f6e1f3..fd22f5d6e07 100644 --- a/gnu/usr.bin/perl/lib/unicore/ArabicShaping.txt +++ b/gnu/usr.bin/perl/lib/unicore/ArabicShaping.txt @@ -1,20 +1,20 @@ -# ArabicShaping-6.1.0.txt -# Date: 2011-04-15, 23:16:00 GMT [KW] +# ArabicShaping-6.2.0.txt +# Date: 2012-05-15, 21:05:00 GMT [KW] # # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # This file defines the Joining_Type and Joining_Group # property values for Arabic, Syriac, N'Ko, and Mandaic # positional shaping, repeating in machine readable form the # information exemplified in Tables 8-3, 8-8, 8-9, 8-10, 8-13, 8-14, -# 8-15, 13-5, 14-5, and 14-6 of The Unicode Standard, Version 6.1. +# 8-15, 13-5, 14-5, and 14-6 of The Unicode Standard, Version 6.2. # # See sections 8.2, 8.3, 13.5, and 14.12 of The Unicode Standard, -# Version 6.1 for more information. +# Version 6.2 for more information. # # Each line contains four fields, separated by a semicolon. # diff --git a/gnu/usr.bin/perl/lib/unicore/BidiMirroring.txt b/gnu/usr.bin/perl/lib/unicore/BidiMirroring.txt index 2e719bc1e05..ec41b769375 100644 --- a/gnu/usr.bin/perl/lib/unicore/BidiMirroring.txt +++ b/gnu/usr.bin/perl/lib/unicore/BidiMirroring.txt @@ -1,19 +1,19 @@ -# BidiMirroring-6.1.0.txt -# Date: 2011-12-20, 19:31:00 GMT [KW, LI] +# BidiMirroring-6.2.0.txt +# Date: 2012-05-15, 24:19:00 GMT [KW, LI] # # Bidi_Mirroring_Glyph Property # # This file is an informative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # This data file lists characters that have the Bidi_Mirrored=Yes property # value, for which there is another Unicode character that typically has a glyph # that is the mirror image of the original character's glyph. # -# The repertoire covered by the file is Unicode 6.1.0. +# The repertoire covered by the file is Unicode 6.2.0. # # The file contains a list of lines with mappings from one code point # to another one for character-based mirroring. @@ -30,16 +30,8 @@ # characters exist with mirrored glyphs, are # listed as comments at the end of the file. # -# Note: (2011-12-19) There is an inconsistency between the -# following statement about the default value -# of the Bidi_Mirroring_Glyph property and the -# value of the @missing line for Bidi_Mirroring_Glyph in -# PropertyValueAliases.txt. This inconsistency was discovered too -# late in the release process to be resolved by -# the UTC. The inconsistency will be resolved in a future revision. -# # Formally, the default value of the Bidi_Mirroring_Glyph property -# for each code point is the code point itself, unless a mapping to +# for each code point is <none>, unless a mapping to # some other character is specified in this data file. When a code # point has the default value for the Bidi_Mirroring_Glyph property, # that means that no other character exists whose glyph is suitable @@ -50,12 +42,13 @@ # # This file was originally created by Markus Scherer. # Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler, -# and for Unicode 6.1 by Ken Whistler and Laurentiu Iancu. +# and for Unicode 6.1 and 6.2 by Ken Whistler and Laurentiu Iancu. # # ############################################################ # # Property: Bidi_Mirroring_Glyph # +# @missing: 0000..10FFFF; <none> 0028; 0029 # LEFT PARENTHESIS 0029; 0028 # RIGHT PARENTHESIS diff --git a/gnu/usr.bin/perl/lib/unicore/Blocks.txt b/gnu/usr.bin/perl/lib/unicore/Blocks.txt index f9a384e3ffb..6a06ab14451 100644 --- a/gnu/usr.bin/perl/lib/unicore/Blocks.txt +++ b/gnu/usr.bin/perl/lib/unicore/Blocks.txt @@ -1,8 +1,8 @@ -# Blocks-6.1.0.txt -# Date: 2011-06-14, 18:26:00 GMT [KW, LI] +# Blocks-6.2.0.txt +# Date: 2012-05-14, 22:42:00 GMT [KW, LI] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -248,4 +248,4 @@ E0100..E01EF; Variation Selectors Supplement F0000..FFFFF; Supplementary Private Use Area-A 100000..10FFFF; Supplementary Private Use Area-B -# EOF
\ No newline at end of file +# EOF diff --git a/gnu/usr.bin/perl/lib/unicore/CJKRadicals.txt b/gnu/usr.bin/perl/lib/unicore/CJKRadicals.txt index a7debb6e1ed..53c6731aacc 100644 --- a/gnu/usr.bin/perl/lib/unicore/CJKRadicals.txt +++ b/gnu/usr.bin/perl/lib/unicore/CJKRadicals.txt @@ -1,8 +1,8 @@ -# CJKRadicals-6.1.0.txt -# Date: 2011-08-30, 23:14:00 GMT [RC, KW] +# CJKRadicals-6.2.0.txt +# Date: 2012-05-15, 21:08:00 GMT [RC, KW] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr38/ # @@ -24,7 +24,7 @@ # # This file was created for Unicode 5.2 by Richard Cook. # Updated for Unicode 6.0 by Richard Cook. -# Updated for Unicode 6.1 by Ken Whistler. +# Updated for Unicode 6.1 and 6.2 by Ken Whistler. # # #################################################### diff --git a/gnu/usr.bin/perl/lib/unicore/CaseFolding.txt b/gnu/usr.bin/perl/lib/unicore/CaseFolding.txt index 0d9a4090cde..df1813d2adc 100644 --- a/gnu/usr.bin/perl/lib/unicore/CaseFolding.txt +++ b/gnu/usr.bin/perl/lib/unicore/CaseFolding.txt @@ -1,8 +1,8 @@ -# CaseFolding-6.1.0.txt -# Date: 2011-07-25, 21:21:56 GMT [MD] +# CaseFolding-6.2.0.txt +# Date: 2012-08-14, 17:54:49 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -1222,3 +1222,5 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 10425; C; 1044D; # DESERET CAPITAL LETTER ENG 10426; C; 1044E; # DESERET CAPITAL LETTER OI 10427; C; 1044F; # DESERET CAPITAL LETTER EW +# +# EOF diff --git a/gnu/usr.bin/perl/lib/unicore/CompositionExclusions.txt b/gnu/usr.bin/perl/lib/unicore/CompositionExclusions.txt index f12f7d61bfc..cd19f42255d 100644 --- a/gnu/usr.bin/perl/lib/unicore/CompositionExclusions.txt +++ b/gnu/usr.bin/perl/lib/unicore/CompositionExclusions.txt @@ -1,5 +1,5 @@ -# CompositionExclusions-6.1.0.txt -# Date: 2011-07-12, 00:13:00 GMT [KW, LI] +# CompositionExclusions-6.2.0.txt +# Date: 2012-05-15, 22:21:00 GMT [KW, LI] # # This file lists the characters for the Composition Exclusion Table # defined in UAX #15, Unicode Normalization Forms. @@ -7,7 +7,7 @@ # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # For more information, see @@ -203,3 +203,4 @@ FB4E # HEBREW LETTER PE WITH RAFE # Total code points: 4 +# EOF diff --git a/gnu/usr.bin/perl/lib/unicore/DAge.txt b/gnu/usr.bin/perl/lib/unicore/DAge.txt index 6ff0206b4cf..0629232a217 100644 --- a/gnu/usr.bin/perl/lib/unicore/DAge.txt +++ b/gnu/usr.bin/perl/lib/unicore/DAge.txt @@ -1,5 +1,5 @@ -# DerivedAge-6.1.0.txt -# Date: 2012-01-20, 21:47:00 GMT [MD, KW] +# DerivedAge-6.2.0.txt +# Date: 2012-09-20, 21:30:39 GMT [MD] # # Unicode Character Database # Copyright (c) 1991-2012 Unicode, Inc. @@ -1294,4 +1294,14 @@ FA2E..FA2F ; 6.1 # [2] CJK COMPATIBILITY IDEOGRAPH-FA2E..CJK COMPATIBILITY # Total code points: 732 +# ================================================ + +# Age=V6_2 + +# Newly assigned in Unicode 6.2.0 (September, 2012) + +20BA ; 6.2 # TURKISH LIRA SIGN + +# Total code points: 1 + # EOF diff --git a/gnu/usr.bin/perl/lib/unicore/DCoreProperties.txt b/gnu/usr.bin/perl/lib/unicore/DCoreProperties.txt index abdcd2201e2..395004c09b2 100644 --- a/gnu/usr.bin/perl/lib/unicore/DCoreProperties.txt +++ b/gnu/usr.bin/perl/lib/unicore/DCoreProperties.txt @@ -1,8 +1,8 @@ -# DerivedCoreProperties-6.1.0.txt -# Date: 2011-12-11, 18:26:55 GMT [MD] +# DerivedCoreProperties-6.2.0.txt +# Date: 2012-05-20, 00:42:31 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -9228,7 +9228,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 208D ; Grapheme_Base # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; Grapheme_Base # Pe SUBSCRIPT RIGHT PARENTHESIS 2090..209C ; Grapheme_Base # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T -20A0..20B9 ; Grapheme_Base # Sc [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN +20A0..20BA ; Grapheme_Base # Sc [27] EURO-CURRENCY SIGN..TURKISH LIRA SIGN 2100..2101 ; Grapheme_Base # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2102 ; Grapheme_Base # L& DOUBLE-STRUCK CAPITAL C 2103..2106 ; Grapheme_Base # So [4] DEGREE CELSIUS..CADA UNA @@ -9980,7 +9980,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 2B740..2B81D ; Grapheme_Base # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; Grapheme_Base # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 108660 +# Total code points: 108661 # ================================================ diff --git a/gnu/usr.bin/perl/lib/unicore/DNormalizationProps.txt b/gnu/usr.bin/perl/lib/unicore/DNormalizationProps.txt index 2d717477677..2ecd8e22fff 100644 --- a/gnu/usr.bin/perl/lib/unicore/DNormalizationProps.txt +++ b/gnu/usr.bin/perl/lib/unicore/DNormalizationProps.txt @@ -1,8 +1,8 @@ -# DerivedNormalizationProps-6.1.0.txt -# Date: 2011-07-26, 04:18:07 GMT [MD] +# DerivedNormalizationProps-6.2.0.txt +# Date: 2012-05-23, 20:34:48 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ diff --git a/gnu/usr.bin/perl/lib/unicore/EastAsianWidth.txt b/gnu/usr.bin/perl/lib/unicore/EastAsianWidth.txt index ea38eef618d..949f7ff3925 100644 --- a/gnu/usr.bin/perl/lib/unicore/EastAsianWidth.txt +++ b/gnu/usr.bin/perl/lib/unicore/EastAsianWidth.txt @@ -1,12 +1,12 @@ -# EastAsianWidth-6.1.0.txt -# Date: 2011-09-19, 18:46:00 GMT [KW] +# EastAsianWidth-6.2.0.txt +# Date: 2012-05-15, 18:30:00 GMT [KW] # # East Asian Width Properties # # This file is an informative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # The format is two fields separated by a semicolon. @@ -3813,11 +3813,11 @@ 11A0;N # HANGUL JUNGSEONG ARAEA-U 11A1;N # HANGUL JUNGSEONG ARAEA-I 11A2;N # HANGUL JUNGSEONG SSANGARAEA -11A3;W # HANGUL JUNGSEONG A-EU -11A4;W # HANGUL JUNGSEONG YA-U -11A5;W # HANGUL JUNGSEONG YEO-YA -11A6;W # HANGUL JUNGSEONG O-YA -11A7;W # HANGUL JUNGSEONG O-YAE +11A3;N # HANGUL JUNGSEONG A-EU +11A4;N # HANGUL JUNGSEONG YA-U +11A5;N # HANGUL JUNGSEONG YEO-YA +11A6;N # HANGUL JUNGSEONG O-YA +11A7;N # HANGUL JUNGSEONG O-YAE 11A8;N # HANGUL JONGSEONG KIYEOK 11A9;N # HANGUL JONGSEONG SSANGKIYEOK 11AA;N # HANGUL JONGSEONG KIYEOK-SIOS @@ -3900,12 +3900,12 @@ 11F7;N # HANGUL JONGSEONG HIEUH-MIEUM 11F8;N # HANGUL JONGSEONG HIEUH-PIEUP 11F9;N # HANGUL JONGSEONG YEORINHIEUH -11FA;W # HANGUL JONGSEONG KIYEOK-NIEUN -11FB;W # HANGUL JONGSEONG KIYEOK-PIEUP -11FC;W # HANGUL JONGSEONG KIYEOK-CHIEUCH -11FD;W # HANGUL JONGSEONG KIYEOK-KHIEUKH -11FE;W # HANGUL JONGSEONG KIYEOK-HIEUH -11FF;W # HANGUL JONGSEONG SSANGNIEUN +11FA;N # HANGUL JONGSEONG KIYEOK-NIEUN +11FB;N # HANGUL JONGSEONG KIYEOK-PIEUP +11FC;N # HANGUL JONGSEONG KIYEOK-CHIEUCH +11FD;N # HANGUL JONGSEONG KIYEOK-KHIEUKH +11FE;N # HANGUL JONGSEONG KIYEOK-HIEUH +11FF;N # HANGUL JONGSEONG SSANGNIEUN 1200;N # ETHIOPIC SYLLABLE HA 1201;N # ETHIOPIC SYLLABLE HU 1202;N # ETHIOPIC SYLLABLE HI @@ -7228,6 +7228,7 @@ 20B7;N # SPESMILO SIGN 20B8;N # TENGE SIGN 20B9;N # INDIAN RUPEE SIGN +20BA;N # TURKISH LIRA SIGN 20D0;N # COMBINING LEFT HARPOON ABOVE 20D1;N # COMBINING RIGHT HARPOON ABOVE 20D2;N # COMBINING LONG VERTICAL LINE OVERLAY @@ -14394,78 +14395,78 @@ ABF7;N # MEETEI MAYEK DIGIT SEVEN ABF8;N # MEETEI MAYEK DIGIT EIGHT ABF9;N # MEETEI MAYEK DIGIT NINE AC00..D7A3;W # <Hangul Syllable, First>..<Hangul Syllable, Last> -D7B0;W # HANGUL JUNGSEONG O-YEO -D7B1;W # HANGUL JUNGSEONG O-O-I -D7B2;W # HANGUL JUNGSEONG YO-A -D7B3;W # HANGUL JUNGSEONG YO-AE -D7B4;W # HANGUL JUNGSEONG YO-EO -D7B5;W # HANGUL JUNGSEONG U-YEO -D7B6;W # HANGUL JUNGSEONG U-I-I -D7B7;W # HANGUL JUNGSEONG YU-AE -D7B8;W # HANGUL JUNGSEONG YU-O -D7B9;W # HANGUL JUNGSEONG EU-A -D7BA;W # HANGUL JUNGSEONG EU-EO -D7BB;W # HANGUL JUNGSEONG EU-E -D7BC;W # HANGUL JUNGSEONG EU-O -D7BD;W # HANGUL JUNGSEONG I-YA-O -D7BE;W # HANGUL JUNGSEONG I-YAE -D7BF;W # HANGUL JUNGSEONG I-YEO -D7C0;W # HANGUL JUNGSEONG I-YE -D7C1;W # HANGUL JUNGSEONG I-O-I -D7C2;W # HANGUL JUNGSEONG I-YO -D7C3;W # HANGUL JUNGSEONG I-YU -D7C4;W # HANGUL JUNGSEONG I-I -D7C5;W # HANGUL JUNGSEONG ARAEA-A -D7C6;W # HANGUL JUNGSEONG ARAEA-E -D7CB;W # HANGUL JONGSEONG NIEUN-RIEUL -D7CC;W # HANGUL JONGSEONG NIEUN-CHIEUCH -D7CD;W # HANGUL JONGSEONG SSANGTIKEUT -D7CE;W # HANGUL JONGSEONG SSANGTIKEUT-PIEUP -D7CF;W # HANGUL JONGSEONG TIKEUT-PIEUP -D7D0;W # HANGUL JONGSEONG TIKEUT-SIOS -D7D1;W # HANGUL JONGSEONG TIKEUT-SIOS-KIYEOK -D7D2;W # HANGUL JONGSEONG TIKEUT-CIEUC -D7D3;W # HANGUL JONGSEONG TIKEUT-CHIEUCH -D7D4;W # HANGUL JONGSEONG TIKEUT-THIEUTH -D7D5;W # HANGUL JONGSEONG RIEUL-SSANGKIYEOK -D7D6;W # HANGUL JONGSEONG RIEUL-KIYEOK-HIEUH -D7D7;W # HANGUL JONGSEONG SSANGRIEUL-KHIEUKH -D7D8;W # HANGUL JONGSEONG RIEUL-MIEUM-HIEUH -D7D9;W # HANGUL JONGSEONG RIEUL-PIEUP-TIKEUT -D7DA;W # HANGUL JONGSEONG RIEUL-PIEUP-PHIEUPH -D7DB;W # HANGUL JONGSEONG RIEUL-YESIEUNG -D7DC;W # HANGUL JONGSEONG RIEUL-YEORINHIEUH-HIEUH -D7DD;W # HANGUL JONGSEONG KAPYEOUNRIEUL -D7DE;W # HANGUL JONGSEONG MIEUM-NIEUN -D7DF;W # HANGUL JONGSEONG MIEUM-SSANGNIEUN -D7E0;W # HANGUL JONGSEONG SSANGMIEUM -D7E1;W # HANGUL JONGSEONG MIEUM-PIEUP-SIOS -D7E2;W # HANGUL JONGSEONG MIEUM-CIEUC -D7E3;W # HANGUL JONGSEONG PIEUP-TIKEUT -D7E4;W # HANGUL JONGSEONG PIEUP-RIEUL-PHIEUPH -D7E5;W # HANGUL JONGSEONG PIEUP-MIEUM -D7E6;W # HANGUL JONGSEONG SSANGPIEUP -D7E7;W # HANGUL JONGSEONG PIEUP-SIOS-TIKEUT -D7E8;W # HANGUL JONGSEONG PIEUP-CIEUC -D7E9;W # HANGUL JONGSEONG PIEUP-CHIEUCH -D7EA;W # HANGUL JONGSEONG SIOS-MIEUM -D7EB;W # HANGUL JONGSEONG SIOS-KAPYEOUNPIEUP -D7EC;W # HANGUL JONGSEONG SSANGSIOS-KIYEOK -D7ED;W # HANGUL JONGSEONG SSANGSIOS-TIKEUT -D7EE;W # HANGUL JONGSEONG SIOS-PANSIOS -D7EF;W # HANGUL JONGSEONG SIOS-CIEUC -D7F0;W # HANGUL JONGSEONG SIOS-CHIEUCH -D7F1;W # HANGUL JONGSEONG SIOS-THIEUTH -D7F2;W # HANGUL JONGSEONG SIOS-HIEUH -D7F3;W # HANGUL JONGSEONG PANSIOS-PIEUP -D7F4;W # HANGUL JONGSEONG PANSIOS-KAPYEOUNPIEUP -D7F5;W # HANGUL JONGSEONG YESIEUNG-MIEUM -D7F6;W # HANGUL JONGSEONG YESIEUNG-HIEUH -D7F7;W # HANGUL JONGSEONG CIEUC-PIEUP -D7F8;W # HANGUL JONGSEONG CIEUC-SSANGPIEUP -D7F9;W # HANGUL JONGSEONG SSANGCIEUC -D7FA;W # HANGUL JONGSEONG PHIEUPH-SIOS -D7FB;W # HANGUL JONGSEONG PHIEUPH-THIEUTH +D7B0;N # HANGUL JUNGSEONG O-YEO +D7B1;N # HANGUL JUNGSEONG O-O-I +D7B2;N # HANGUL JUNGSEONG YO-A +D7B3;N # HANGUL JUNGSEONG YO-AE +D7B4;N # HANGUL JUNGSEONG YO-EO +D7B5;N # HANGUL JUNGSEONG U-YEO +D7B6;N # HANGUL JUNGSEONG U-I-I +D7B7;N # HANGUL JUNGSEONG YU-AE +D7B8;N # HANGUL JUNGSEONG YU-O +D7B9;N # HANGUL JUNGSEONG EU-A +D7BA;N # HANGUL JUNGSEONG EU-EO +D7BB;N # HANGUL JUNGSEONG EU-E +D7BC;N # HANGUL JUNGSEONG EU-O +D7BD;N # HANGUL JUNGSEONG I-YA-O +D7BE;N # HANGUL JUNGSEONG I-YAE +D7BF;N # HANGUL JUNGSEONG I-YEO +D7C0;N # HANGUL JUNGSEONG I-YE +D7C1;N # HANGUL JUNGSEONG I-O-I +D7C2;N # HANGUL JUNGSEONG I-YO +D7C3;N # HANGUL JUNGSEONG I-YU +D7C4;N # HANGUL JUNGSEONG I-I +D7C5;N # HANGUL JUNGSEONG ARAEA-A +D7C6;N # HANGUL JUNGSEONG ARAEA-E +D7CB;N # HANGUL JONGSEONG NIEUN-RIEUL +D7CC;N # HANGUL JONGSEONG NIEUN-CHIEUCH +D7CD;N # HANGUL JONGSEONG SSANGTIKEUT +D7CE;N # HANGUL JONGSEONG SSANGTIKEUT-PIEUP +D7CF;N # HANGUL JONGSEONG TIKEUT-PIEUP +D7D0;N # HANGUL JONGSEONG TIKEUT-SIOS +D7D1;N # HANGUL JONGSEONG TIKEUT-SIOS-KIYEOK +D7D2;N # HANGUL JONGSEONG TIKEUT-CIEUC +D7D3;N # HANGUL JONGSEONG TIKEUT-CHIEUCH +D7D4;N # HANGUL JONGSEONG TIKEUT-THIEUTH +D7D5;N # HANGUL JONGSEONG RIEUL-SSANGKIYEOK +D7D6;N # HANGUL JONGSEONG RIEUL-KIYEOK-HIEUH +D7D7;N # HANGUL JONGSEONG SSANGRIEUL-KHIEUKH +D7D8;N # HANGUL JONGSEONG RIEUL-MIEUM-HIEUH +D7D9;N # HANGUL JONGSEONG RIEUL-PIEUP-TIKEUT +D7DA;N # HANGUL JONGSEONG RIEUL-PIEUP-PHIEUPH +D7DB;N # HANGUL JONGSEONG RIEUL-YESIEUNG +D7DC;N # HANGUL JONGSEONG RIEUL-YEORINHIEUH-HIEUH +D7DD;N # HANGUL JONGSEONG KAPYEOUNRIEUL +D7DE;N # HANGUL JONGSEONG MIEUM-NIEUN +D7DF;N # HANGUL JONGSEONG MIEUM-SSANGNIEUN +D7E0;N # HANGUL JONGSEONG SSANGMIEUM +D7E1;N # HANGUL JONGSEONG MIEUM-PIEUP-SIOS +D7E2;N # HANGUL JONGSEONG MIEUM-CIEUC +D7E3;N # HANGUL JONGSEONG PIEUP-TIKEUT +D7E4;N # HANGUL JONGSEONG PIEUP-RIEUL-PHIEUPH +D7E5;N # HANGUL JONGSEONG PIEUP-MIEUM +D7E6;N # HANGUL JONGSEONG SSANGPIEUP +D7E7;N # HANGUL JONGSEONG PIEUP-SIOS-TIKEUT +D7E8;N # HANGUL JONGSEONG PIEUP-CIEUC +D7E9;N # HANGUL JONGSEONG PIEUP-CHIEUCH +D7EA;N # HANGUL JONGSEONG SIOS-MIEUM +D7EB;N # HANGUL JONGSEONG SIOS-KAPYEOUNPIEUP +D7EC;N # HANGUL JONGSEONG SSANGSIOS-KIYEOK +D7ED;N # HANGUL JONGSEONG SSANGSIOS-TIKEUT +D7EE;N # HANGUL JONGSEONG SIOS-PANSIOS +D7EF;N # HANGUL JONGSEONG SIOS-CIEUC +D7F0;N # HANGUL JONGSEONG SIOS-CHIEUCH +D7F1;N # HANGUL JONGSEONG SIOS-THIEUTH +D7F2;N # HANGUL JONGSEONG SIOS-HIEUH +D7F3;N # HANGUL JONGSEONG PANSIOS-PIEUP +D7F4;N # HANGUL JONGSEONG PANSIOS-KAPYEOUNPIEUP +D7F5;N # HANGUL JONGSEONG YESIEUNG-MIEUM +D7F6;N # HANGUL JONGSEONG YESIEUNG-HIEUH +D7F7;N # HANGUL JONGSEONG CIEUC-PIEUP +D7F8;N # HANGUL JONGSEONG CIEUC-SSANGPIEUP +D7F9;N # HANGUL JONGSEONG SSANGCIEUC +D7FA;N # HANGUL JONGSEONG PHIEUPH-SIOS +D7FB;N # HANGUL JONGSEONG PHIEUPH-THIEUTH D800..DB7F;N # <Non Private Use High Surrogate, First>..<Non Private Use High Surrogate, Last> DB80..DBFF;N # <Private Use High Surrogate, First>..<Private Use High Surrogate, Last> DC00..DFFF;N # <Low Surrogate, First>..<Low Surrogate, Last> diff --git a/gnu/usr.bin/perl/lib/unicore/EmojiSources.txt b/gnu/usr.bin/perl/lib/unicore/EmojiSources.txt index c360c5e2531..cf0f6cbf262 100644 --- a/gnu/usr.bin/perl/lib/unicore/EmojiSources.txt +++ b/gnu/usr.bin/perl/lib/unicore/EmojiSources.txt @@ -1,8 +1,8 @@ -# EmojiSources-6.1.0.txt -# Date: 2011-08-30, 23:30:00 GMT [MS, KW] +# EmojiSources-6.2.0.txt +# Date: 2012-03-08, 21:21:00 GMT [MS, KW] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -15,8 +15,9 @@ # Note: It is possible that future versions of this file will include # additional data columns providing mappings for additional vendors. # -# Created for Unicode 6.0 by Marcus Scherer. -# Updated for Unicode 6.1 by Ken Whistler. +# Created for Unicode 6.0 by Markus Scherer. +# Updated for Unicode 6.1 by Ken Whistler (no changes to mappings). +# Updated for Unicode 6.2 by Ken Whistler (no changes to mappings). # # Format: Semicolon-delimited file with a fixed number of fields. # The number of fields may increase in the future. diff --git a/gnu/usr.bin/perl/lib/unicore/HangulSyllableType.txt b/gnu/usr.bin/perl/lib/unicore/HangulSyllableType.txt index 8b457daaba7..a4db00b2f34 100644 --- a/gnu/usr.bin/perl/lib/unicore/HangulSyllableType.txt +++ b/gnu/usr.bin/perl/lib/unicore/HangulSyllableType.txt @@ -1,8 +1,8 @@ -# HangulSyllableType-6.1.0.txt -# Date: 2011-08-25, 00:02:18 GMT [MD] +# HangulSyllableType-6.2.0.txt +# Date: 2012-05-23, 20:34:56 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ diff --git a/gnu/usr.bin/perl/lib/unicore/Index.txt b/gnu/usr.bin/perl/lib/unicore/Index.txt index 50e1c9d5b98..c9885ac30e2 100644 --- a/gnu/usr.bin/perl/lib/unicore/Index.txt +++ b/gnu/usr.bin/perl/lib/unicore/Index.txt @@ -3061,8 +3061,8 @@ Linear B Syllabary 10000 Lines, Horizontal Scan 23BA Lines, Vertical 2223 LIRA SIGN 20A4 +LIRA SIGN, TURKISH 20BA lira, italian 00A3 -lira, turkish 00A3 Lisu A4D0 liter 2113 LIVRE TOURNOIS SIGN 20B6 @@ -5249,8 +5249,7 @@ TUGRIK SIGN 20AE TURBAN, MAN WITH 1F473 turbofan 274B Turkic, Old 10C00 -turkish currency 20A4 -turkish lira 00A3 +TURKISH LIRA SIGN 20BA TURNED A, LATIN SMALL LETTER 0250 TURNED AE, LATIN SMALL LETTER 1D02 TURNED ALPHA, LATIN SMALL LETTER 0252 diff --git a/gnu/usr.bin/perl/lib/unicore/IndicMatraCategory.txt b/gnu/usr.bin/perl/lib/unicore/IndicMatraCategory.txt index 68cbd093505..03a043ed87e 100644 --- a/gnu/usr.bin/perl/lib/unicore/IndicMatraCategory.txt +++ b/gnu/usr.bin/perl/lib/unicore/IndicMatraCategory.txt @@ -1,8 +1,8 @@ -# IndicMatraCategory-6.1.0.txt -# Date: 2011-08-31, 23:50:00 GMT [KW] +# IndicMatraCategory-6.2.0.txt +# Date: 2012-05-15, 21:10:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see UAX #44. # diff --git a/gnu/usr.bin/perl/lib/unicore/IndicSyllabicCategory.txt b/gnu/usr.bin/perl/lib/unicore/IndicSyllabicCategory.txt index 9d771bacc04..fd03ea335bb 100644 --- a/gnu/usr.bin/perl/lib/unicore/IndicSyllabicCategory.txt +++ b/gnu/usr.bin/perl/lib/unicore/IndicSyllabicCategory.txt @@ -1,8 +1,8 @@ -# IndicSyllabicCategory-6.1.0.txt -# Date: 2011-08-31, 23:54:00 GMT [KW] +# IndicSyllabicCategory-6.2.0.txt +# Date: 2012-05-15, 21:12:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see UAX #44. # diff --git a/gnu/usr.bin/perl/lib/unicore/Jamo.txt b/gnu/usr.bin/perl/lib/unicore/Jamo.txt index 3f325dee1f9..ee32f6e6c11 100644 --- a/gnu/usr.bin/perl/lib/unicore/Jamo.txt +++ b/gnu/usr.bin/perl/lib/unicore/Jamo.txt @@ -1,14 +1,14 @@ -# Jamo-6.1.0.txt -# Date: 2011-06-22, 23:07:00 GMT [KW, LI] +# Jamo-6.2.0.txt +# Date: 2012-05-15, 22:23:00 GMT [KW, LI] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # # This file defines the Jamo_Short_Name property. # -# See Section 3.12 of The Unicode Standard, Version 6.1 +# See Section 3.12 of The Unicode Standard, Version 6.2 # for more information. # # Each line contains two fields, separated by a semicolon. @@ -90,3 +90,4 @@ 11C1; P # HANGUL JONGSEONG PHIEUPH 11C2; H # HANGUL JONGSEONG HIEUH +# EOF diff --git a/gnu/usr.bin/perl/lib/unicore/LineBreak.txt b/gnu/usr.bin/perl/lib/unicore/LineBreak.txt index 98e9671f66d..e309836b0ee 100644 --- a/gnu/usr.bin/perl/lib/unicore/LineBreak.txt +++ b/gnu/usr.bin/perl/lib/unicore/LineBreak.txt @@ -1,5 +1,5 @@ -# LineBreak-6.1.0.txt -# Date: 2011-11-08, 20:25:00 GMT [KW] +# LineBreak-6.2.0.txt +# Date: 2012-08-08, 19:26:00 GMT [KW] # # Line Break Properties # @@ -7,7 +7,7 @@ # Unicode Character Database. # It contains both normative and informative data. # -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # The format is two fields separated by a semicolon. @@ -19,7 +19,7 @@ # Informative: # "XX", "OP", "CL", "CP", "QU", "NS", "EX", "SY", # "IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY", -# "BB", "BA", "SA", "AI", "B2", "HL", "CJ" +# "BB", "BA", "SA", "AI", "B2", "HL", "CJ", "RI" # - All code points, assigned and unassigned, that are not listed # explicitly are given the value "XX". # The unassigned code points that default to "ID" include ranges in the @@ -7235,6 +7235,7 @@ 20B7;PR # SPESMILO SIGN 20B8;PR # TENGE SIGN 20B9;PR # INDIAN RUPEE SIGN +20BA;PR # TURKISH LIRA SIGN 20D0;CM # COMBINING LEFT HARPOON ABOVE 20D1;CM # COMBINING RIGHT HARPOON ABOVE 20D2;CM # COMBINING LONG VERTICAL LINE OVERLAY @@ -7800,8 +7801,8 @@ 2317;AL # VIEWDATA SQUARE 2318;AL # PLACE OF INTEREST SIGN 2319;AL # TURNED NOT SIGN -231A;AL # WATCH -231B;AL # HOURGLASS +231A;ID # WATCH +231B;ID # HOURGLASS 231C;AL # TOP LEFT CORNER 231D;AL # TOP RIGHT CORNER 231E;AL # BOTTOM LEFT CORNER @@ -8014,10 +8015,10 @@ 23ED;AL # BLACK RIGHT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR 23EE;AL # BLACK LEFT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR 23EF;AL # BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR -23F0;AL # ALARM CLOCK -23F1;AL # STOPWATCH -23F2;AL # TIMER CLOCK -23F3;AL # HOURGLASS WITH FLOWING SAND +23F0;ID # ALARM CLOCK +23F1;ID # STOPWATCH +23F2;ID # TIMER CLOCK +23F3;ID # HOURGLASS WITH FLOWING SAND 2400;AL # SYMBOL FOR NULL 2401;AL # SYMBOL FOR START OF HEADING 2402;AL # SYMBOL FOR START OF TEXT @@ -8484,10 +8485,10 @@ 25FD;AL # WHITE MEDIUM SMALL SQUARE 25FE;AL # BLACK MEDIUM SMALL SQUARE 25FF;AL # LOWER RIGHT TRIANGLE -2600;AL # BLACK SUN WITH RAYS -2601;AL # CLOUD -2602;AL # UMBRELLA -2603;AL # SNOWMAN +2600;ID # BLACK SUN WITH RAYS +2601;ID # CLOUD +2602;ID # UMBRELLA +2603;ID # SNOWMAN 2604;AL # COMET 2605;AI # BLACK STAR 2606;AI # WHITE STAR @@ -8504,18 +8505,18 @@ 2611;AL # BALLOT BOX WITH CHECK 2612;AL # BALLOT BOX WITH X 2613;AL # SALTIRE -2614;AI # UMBRELLA WITH RAIN DROPS -2615;AI # HOT BEVERAGE +2614;ID # UMBRELLA WITH RAIN DROPS +2615;ID # HOT BEVERAGE 2616;AI # WHITE SHOGI PIECE 2617;AI # BLACK SHOGI PIECE -2618;AL # SHAMROCK +2618;ID # SHAMROCK 2619;AL # REVERSED ROTATED FLORAL HEART BULLET -261A;AL # BLACK LEFT POINTING INDEX -261B;AL # BLACK RIGHT POINTING INDEX -261C;AI # WHITE LEFT POINTING INDEX -261D;AL # WHITE UP POINTING INDEX -261E;AI # WHITE RIGHT POINTING INDEX -261F;AL # WHITE DOWN POINTING INDEX +261A;ID # BLACK LEFT POINTING INDEX +261B;ID # BLACK RIGHT POINTING INDEX +261C;ID # WHITE LEFT POINTING INDEX +261D;ID # WHITE UP POINTING INDEX +261E;ID # WHITE RIGHT POINTING INDEX +261F;ID # WHITE DOWN POINTING INDEX 2620;AL # SKULL AND CROSSBONES 2621;AL # CAUTION SIGN 2622;AL # RADIOACTIVE SIGN @@ -8541,9 +8542,9 @@ 2636;AL # TRIGRAM FOR MOUNTAIN 2637;AL # TRIGRAM FOR EARTH 2638;AL # WHEEL OF DHARMA -2639;AL # WHITE FROWNING FACE -263A;AL # WHITE SMILING FACE -263B;AL # BLACK SMILING FACE +2639;ID # WHITE FROWNING FACE +263A;ID # WHITE SMILING FACE +263B;ID # BLACK SMILING FACE 263C;AL # WHITE SUN WITH RAYS 263D;AL # FIRST QUARTER MOON 263E;AL # LAST QUARTER MOON @@ -8588,7 +8589,7 @@ 2665;AI # BLACK HEART SUIT 2666;AL # BLACK DIAMOND SUIT 2667;AI # WHITE CLUB SUIT -2668;AI # HOT SPRINGS +2668;ID # HOT SPRINGS 2669;AI # QUARTER NOTE 266A;AI # EIGHTH NOTE 266B;AL # BEAMED EIGHTH NOTES @@ -8611,7 +8612,7 @@ 267C;AL # RECYCLED PAPER SYMBOL 267D;AL # PARTIALLY-RECYCLED PAPER SYMBOL 267E;AL # PERMANENT PAPER SIGN -267F;AL # WHEELCHAIR SYMBOL +267F;ID # WHEELCHAIR SYMBOL 2680;AL # DIE FACE-1 2681;AL # DIE FACE-2 2682;AL # DIE FACE-3 @@ -8673,43 +8674,43 @@ 26BA;AL # SEMISEXTILE 26BB;AL # QUINCUNX 26BC;AL # SESQUIQUADRATE -26BD;AL # SOCCER BALL -26BE;AI # BASEBALL -26BF;AI # SQUARED KEY -26C0;AL # WHITE DRAUGHTS MAN -26C1;AL # WHITE DRAUGHTS KING -26C2;AL # BLACK DRAUGHTS MAN -26C3;AL # BLACK DRAUGHTS KING -26C4;AI # SNOWMAN WITHOUT SNOW -26C5;AI # SUN BEHIND CLOUD -26C6;AI # RAIN -26C7;AI # BLACK SNOWMAN -26C8;AI # THUNDER CLOUD AND RAIN +26BD;ID # SOCCER BALL +26BE;ID # BASEBALL +26BF;ID # SQUARED KEY +26C0;ID # WHITE DRAUGHTS MAN +26C1;ID # WHITE DRAUGHTS KING +26C2;ID # BLACK DRAUGHTS MAN +26C3;ID # BLACK DRAUGHTS KING +26C4;ID # SNOWMAN WITHOUT SNOW +26C5;ID # SUN BEHIND CLOUD +26C6;ID # RAIN +26C7;ID # BLACK SNOWMAN +26C8;ID # THUNDER CLOUD AND RAIN 26C9;AI # TURNED WHITE SHOGI PIECE 26CA;AI # TURNED BLACK SHOGI PIECE 26CB;AI # WHITE DIAMOND IN SQUARE 26CC;AI # CROSSING LANES -26CD;AI # DISABLED CAR +26CD;ID # DISABLED CAR 26CE;AL # OPHIUCHUS -26CF;AI # PICK -26D0;AI # CAR SLIDING -26D1;AI # HELMET WITH WHITE CROSS +26CF;ID # PICK +26D0;ID # CAR SLIDING +26D1;ID # HELMET WITH WHITE CROSS 26D2;AI # CIRCLED CROSSING LANES -26D3;AI # CHAINS -26D4;AI # NO ENTRY +26D3;ID # CHAINS +26D4;ID # NO ENTRY 26D5;AI # ALTERNATE ONE-WAY LEFT WAY TRAFFIC 26D6;AI # BLACK TWO-WAY LEFT WAY TRAFFIC 26D7;AI # WHITE TWO-WAY LEFT WAY TRAFFIC -26D8;AI # BLACK LEFT LANE MERGE -26D9;AI # WHITE LEFT LANE MERGE +26D8;ID # BLACK LEFT LANE MERGE +26D9;ID # WHITE LEFT LANE MERGE 26DA;AI # DRIVE SLOW SIGN 26DB;AI # HEAVY WHITE DOWN-POINTING TRIANGLE -26DC;AI # LEFT CLOSED ENTRY +26DC;ID # LEFT CLOSED ENTRY 26DD;AI # SQUARED SALTIRE 26DE;AI # FALLING DIAGONAL IN WHITE CIRCLE IN BLACK SQUARE -26DF;AI # BLACK TRUCK -26E0;AI # RESTRICTED LEFT ENTRY-1 -26E1;AI # RESTRICTED LEFT ENTRY-2 +26DF;ID # BLACK TRUCK +26E0;ID # RESTRICTED LEFT ENTRY-1 +26E1;ID # RESTRICTED LEFT ENTRY-2 26E2;AL # ASTRONOMICAL SYMBOL FOR URANUS 26E3;AI # HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE 26E4;AL # PENTAGRAM @@ -8718,41 +8719,41 @@ 26E7;AL # INVERTED PENTAGRAM 26E8;AI # BLACK CROSS ON SHIELD 26E9;AI # SHINTO SHRINE -26EA;AI # CHURCH +26EA;ID # CHURCH 26EB;AI # CASTLE 26EC;AI # HISTORIC SITE 26ED;AI # GEAR WITHOUT HUB 26EE;AI # GEAR WITH HANDLES 26EF;AI # MAP SYMBOL FOR LIGHTHOUSE 26F0;AI # MOUNTAIN -26F1;AI # UMBRELLA ON GROUND -26F2;AI # FOUNTAIN -26F3;AI # FLAG IN HOLE -26F4;AI # FERRY -26F5;AI # SAILBOAT +26F1;ID # UMBRELLA ON GROUND +26F2;ID # FOUNTAIN +26F3;ID # FLAG IN HOLE +26F4;ID # FERRY +26F5;ID # SAILBOAT 26F6;AI # SQUARE FOUR CORNERS -26F7;AI # SKIER -26F8;AI # ICE SKATE -26F9;AI # PERSON WITH BALL -26FA;AI # TENT +26F7;ID # SKIER +26F8;ID # ICE SKATE +26F9;ID # PERSON WITH BALL +26FA;ID # TENT 26FB;AI # JAPANESE BANK SYMBOL 26FC;AI # HEADSTONE GRAVEYARD SYMBOL -26FD;AI # FUEL PUMP -26FE;AI # CUP ON BLACK SQUARE -26FF;AI # WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE -2701;AL # UPPER BLADE SCISSORS -2702;AL # BLACK SCISSORS -2703;AL # LOWER BLADE SCISSORS -2704;AL # WHITE SCISSORS +26FD;ID # FUEL PUMP +26FE;ID # CUP ON BLACK SQUARE +26FF;ID # WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE +2701;ID # UPPER BLADE SCISSORS +2702;ID # BLACK SCISSORS +2703;ID # LOWER BLADE SCISSORS +2704;ID # WHITE SCISSORS 2705;AL # WHITE HEAVY CHECK MARK 2706;AL # TELEPHONE LOCATION SIGN 2707;AL # TAPE DRIVE -2708;AL # AIRPLANE -2709;AL # ENVELOPE -270A;AL # RAISED FIST -270B;AL # RAISED HAND -270C;AL # VICTORY HAND -270D;AL # WRITING HAND +2708;ID # AIRPLANE +2709;ID # ENVELOPE +270A;ID # RAISED FIST +270B;ID # RAISED HAND +270C;ID # VICTORY HAND +270D;ID # WRITING HAND 270E;AL # LOWER RIGHT PENCIL 270F;AL # PENCIL 2710;AL # UPPER RIGHT PENCIL @@ -23150,209 +23151,209 @@ FFFD;AI # REPLACEMENT CHARACTER 1EEBB;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0;AL # ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL 1EEF1;AL # ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -1F000;AL # MAHJONG TILE EAST WIND -1F001;AL # MAHJONG TILE SOUTH WIND -1F002;AL # MAHJONG TILE WEST WIND -1F003;AL # MAHJONG TILE NORTH WIND -1F004;AL # MAHJONG TILE RED DRAGON -1F005;AL # MAHJONG TILE GREEN DRAGON -1F006;AL # MAHJONG TILE WHITE DRAGON -1F007;AL # MAHJONG TILE ONE OF CHARACTERS -1F008;AL # MAHJONG TILE TWO OF CHARACTERS -1F009;AL # MAHJONG TILE THREE OF CHARACTERS -1F00A;AL # MAHJONG TILE FOUR OF CHARACTERS -1F00B;AL # MAHJONG TILE FIVE OF CHARACTERS -1F00C;AL # MAHJONG TILE SIX OF CHARACTERS -1F00D;AL # MAHJONG TILE SEVEN OF CHARACTERS -1F00E;AL # MAHJONG TILE EIGHT OF CHARACTERS -1F00F;AL # MAHJONG TILE NINE OF CHARACTERS -1F010;AL # MAHJONG TILE ONE OF BAMBOOS -1F011;AL # MAHJONG TILE TWO OF BAMBOOS -1F012;AL # MAHJONG TILE THREE OF BAMBOOS -1F013;AL # MAHJONG TILE FOUR OF BAMBOOS -1F014;AL # MAHJONG TILE FIVE OF BAMBOOS -1F015;AL # MAHJONG TILE SIX OF BAMBOOS -1F016;AL # MAHJONG TILE SEVEN OF BAMBOOS -1F017;AL # MAHJONG TILE EIGHT OF BAMBOOS -1F018;AL # MAHJONG TILE NINE OF BAMBOOS -1F019;AL # MAHJONG TILE ONE OF CIRCLES -1F01A;AL # MAHJONG TILE TWO OF CIRCLES -1F01B;AL # MAHJONG TILE THREE OF CIRCLES -1F01C;AL # MAHJONG TILE FOUR OF CIRCLES -1F01D;AL # MAHJONG TILE FIVE OF CIRCLES -1F01E;AL # MAHJONG TILE SIX OF CIRCLES -1F01F;AL # MAHJONG TILE SEVEN OF CIRCLES -1F020;AL # MAHJONG TILE EIGHT OF CIRCLES -1F021;AL # MAHJONG TILE NINE OF CIRCLES -1F022;AL # MAHJONG TILE PLUM -1F023;AL # MAHJONG TILE ORCHID -1F024;AL # MAHJONG TILE BAMBOO -1F025;AL # MAHJONG TILE CHRYSANTHEMUM -1F026;AL # MAHJONG TILE SPRING -1F027;AL # MAHJONG TILE SUMMER -1F028;AL # MAHJONG TILE AUTUMN -1F029;AL # MAHJONG TILE WINTER -1F02A;AL # MAHJONG TILE JOKER -1F02B;AL # MAHJONG TILE BACK -1F030;AL # DOMINO TILE HORIZONTAL BACK -1F031;AL # DOMINO TILE HORIZONTAL-00-00 -1F032;AL # DOMINO TILE HORIZONTAL-00-01 -1F033;AL # DOMINO TILE HORIZONTAL-00-02 -1F034;AL # DOMINO TILE HORIZONTAL-00-03 -1F035;AL # DOMINO TILE HORIZONTAL-00-04 -1F036;AL # DOMINO TILE HORIZONTAL-00-05 -1F037;AL # DOMINO TILE HORIZONTAL-00-06 -1F038;AL # DOMINO TILE HORIZONTAL-01-00 -1F039;AL # DOMINO TILE HORIZONTAL-01-01 -1F03A;AL # DOMINO TILE HORIZONTAL-01-02 -1F03B;AL # DOMINO TILE HORIZONTAL-01-03 -1F03C;AL # DOMINO TILE HORIZONTAL-01-04 -1F03D;AL # DOMINO TILE HORIZONTAL-01-05 -1F03E;AL # DOMINO TILE HORIZONTAL-01-06 -1F03F;AL # DOMINO TILE HORIZONTAL-02-00 -1F040;AL # DOMINO TILE HORIZONTAL-02-01 -1F041;AL # DOMINO TILE HORIZONTAL-02-02 -1F042;AL # DOMINO TILE HORIZONTAL-02-03 -1F043;AL # DOMINO TILE HORIZONTAL-02-04 -1F044;AL # DOMINO TILE HORIZONTAL-02-05 -1F045;AL # DOMINO TILE HORIZONTAL-02-06 -1F046;AL # DOMINO TILE HORIZONTAL-03-00 -1F047;AL # DOMINO TILE HORIZONTAL-03-01 -1F048;AL # DOMINO TILE HORIZONTAL-03-02 -1F049;AL # DOMINO TILE HORIZONTAL-03-03 -1F04A;AL # DOMINO TILE HORIZONTAL-03-04 -1F04B;AL # DOMINO TILE HORIZONTAL-03-05 -1F04C;AL # DOMINO TILE HORIZONTAL-03-06 -1F04D;AL # DOMINO TILE HORIZONTAL-04-00 -1F04E;AL # DOMINO TILE HORIZONTAL-04-01 -1F04F;AL # DOMINO TILE HORIZONTAL-04-02 -1F050;AL # DOMINO TILE HORIZONTAL-04-03 -1F051;AL # DOMINO TILE HORIZONTAL-04-04 -1F052;AL # DOMINO TILE HORIZONTAL-04-05 -1F053;AL # DOMINO TILE HORIZONTAL-04-06 -1F054;AL # DOMINO TILE HORIZONTAL-05-00 -1F055;AL # DOMINO TILE HORIZONTAL-05-01 -1F056;AL # DOMINO TILE HORIZONTAL-05-02 -1F057;AL # DOMINO TILE HORIZONTAL-05-03 -1F058;AL # DOMINO TILE HORIZONTAL-05-04 -1F059;AL # DOMINO TILE HORIZONTAL-05-05 -1F05A;AL # DOMINO TILE HORIZONTAL-05-06 -1F05B;AL # DOMINO TILE HORIZONTAL-06-00 -1F05C;AL # DOMINO TILE HORIZONTAL-06-01 -1F05D;AL # DOMINO TILE HORIZONTAL-06-02 -1F05E;AL # DOMINO TILE HORIZONTAL-06-03 -1F05F;AL # DOMINO TILE HORIZONTAL-06-04 -1F060;AL # DOMINO TILE HORIZONTAL-06-05 -1F061;AL # DOMINO TILE HORIZONTAL-06-06 -1F062;AL # DOMINO TILE VERTICAL BACK -1F063;AL # DOMINO TILE VERTICAL-00-00 -1F064;AL # DOMINO TILE VERTICAL-00-01 -1F065;AL # DOMINO TILE VERTICAL-00-02 -1F066;AL # DOMINO TILE VERTICAL-00-03 -1F067;AL # DOMINO TILE VERTICAL-00-04 -1F068;AL # DOMINO TILE VERTICAL-00-05 -1F069;AL # DOMINO TILE VERTICAL-00-06 -1F06A;AL # DOMINO TILE VERTICAL-01-00 -1F06B;AL # DOMINO TILE VERTICAL-01-01 -1F06C;AL # DOMINO TILE VERTICAL-01-02 -1F06D;AL # DOMINO TILE VERTICAL-01-03 -1F06E;AL # DOMINO TILE VERTICAL-01-04 -1F06F;AL # DOMINO TILE VERTICAL-01-05 -1F070;AL # DOMINO TILE VERTICAL-01-06 -1F071;AL # DOMINO TILE VERTICAL-02-00 -1F072;AL # DOMINO TILE VERTICAL-02-01 -1F073;AL # DOMINO TILE VERTICAL-02-02 -1F074;AL # DOMINO TILE VERTICAL-02-03 -1F075;AL # DOMINO TILE VERTICAL-02-04 -1F076;AL # DOMINO TILE VERTICAL-02-05 -1F077;AL # DOMINO TILE VERTICAL-02-06 -1F078;AL # DOMINO TILE VERTICAL-03-00 -1F079;AL # DOMINO TILE VERTICAL-03-01 -1F07A;AL # DOMINO TILE VERTICAL-03-02 -1F07B;AL # DOMINO TILE VERTICAL-03-03 -1F07C;AL # DOMINO TILE VERTICAL-03-04 -1F07D;AL # DOMINO TILE VERTICAL-03-05 -1F07E;AL # DOMINO TILE VERTICAL-03-06 -1F07F;AL # DOMINO TILE VERTICAL-04-00 -1F080;AL # DOMINO TILE VERTICAL-04-01 -1F081;AL # DOMINO TILE VERTICAL-04-02 -1F082;AL # DOMINO TILE VERTICAL-04-03 -1F083;AL # DOMINO TILE VERTICAL-04-04 -1F084;AL # DOMINO TILE VERTICAL-04-05 -1F085;AL # DOMINO TILE VERTICAL-04-06 -1F086;AL # DOMINO TILE VERTICAL-05-00 -1F087;AL # DOMINO TILE VERTICAL-05-01 -1F088;AL # DOMINO TILE VERTICAL-05-02 -1F089;AL # DOMINO TILE VERTICAL-05-03 -1F08A;AL # DOMINO TILE VERTICAL-05-04 -1F08B;AL # DOMINO TILE VERTICAL-05-05 -1F08C;AL # DOMINO TILE VERTICAL-05-06 -1F08D;AL # DOMINO TILE VERTICAL-06-00 -1F08E;AL # DOMINO TILE VERTICAL-06-01 -1F08F;AL # DOMINO TILE VERTICAL-06-02 -1F090;AL # DOMINO TILE VERTICAL-06-03 -1F091;AL # DOMINO TILE VERTICAL-06-04 -1F092;AL # DOMINO TILE VERTICAL-06-05 -1F093;AL # DOMINO TILE VERTICAL-06-06 -1F0A0;AL # PLAYING CARD BACK -1F0A1;AL # PLAYING CARD ACE OF SPADES -1F0A2;AL # PLAYING CARD TWO OF SPADES -1F0A3;AL # PLAYING CARD THREE OF SPADES -1F0A4;AL # PLAYING CARD FOUR OF SPADES -1F0A5;AL # PLAYING CARD FIVE OF SPADES -1F0A6;AL # PLAYING CARD SIX OF SPADES -1F0A7;AL # PLAYING CARD SEVEN OF SPADES -1F0A8;AL # PLAYING CARD EIGHT OF SPADES -1F0A9;AL # PLAYING CARD NINE OF SPADES -1F0AA;AL # PLAYING CARD TEN OF SPADES -1F0AB;AL # PLAYING CARD JACK OF SPADES -1F0AC;AL # PLAYING CARD KNIGHT OF SPADES -1F0AD;AL # PLAYING CARD QUEEN OF SPADES -1F0AE;AL # PLAYING CARD KING OF SPADES -1F0B1;AL # PLAYING CARD ACE OF HEARTS -1F0B2;AL # PLAYING CARD TWO OF HEARTS -1F0B3;AL # PLAYING CARD THREE OF HEARTS -1F0B4;AL # PLAYING CARD FOUR OF HEARTS -1F0B5;AL # PLAYING CARD FIVE OF HEARTS -1F0B6;AL # PLAYING CARD SIX OF HEARTS -1F0B7;AL # PLAYING CARD SEVEN OF HEARTS -1F0B8;AL # PLAYING CARD EIGHT OF HEARTS -1F0B9;AL # PLAYING CARD NINE OF HEARTS -1F0BA;AL # PLAYING CARD TEN OF HEARTS -1F0BB;AL # PLAYING CARD JACK OF HEARTS -1F0BC;AL # PLAYING CARD KNIGHT OF HEARTS -1F0BD;AL # PLAYING CARD QUEEN OF HEARTS -1F0BE;AL # PLAYING CARD KING OF HEARTS -1F0C1;AL # PLAYING CARD ACE OF DIAMONDS -1F0C2;AL # PLAYING CARD TWO OF DIAMONDS -1F0C3;AL # PLAYING CARD THREE OF DIAMONDS -1F0C4;AL # PLAYING CARD FOUR OF DIAMONDS -1F0C5;AL # PLAYING CARD FIVE OF DIAMONDS -1F0C6;AL # PLAYING CARD SIX OF DIAMONDS -1F0C7;AL # PLAYING CARD SEVEN OF DIAMONDS -1F0C8;AL # PLAYING CARD EIGHT OF DIAMONDS -1F0C9;AL # PLAYING CARD NINE OF DIAMONDS -1F0CA;AL # PLAYING CARD TEN OF DIAMONDS -1F0CB;AL # PLAYING CARD JACK OF DIAMONDS -1F0CC;AL # PLAYING CARD KNIGHT OF DIAMONDS -1F0CD;AL # PLAYING CARD QUEEN OF DIAMONDS -1F0CE;AL # PLAYING CARD KING OF DIAMONDS -1F0CF;AL # PLAYING CARD BLACK JOKER -1F0D1;AL # PLAYING CARD ACE OF CLUBS -1F0D2;AL # PLAYING CARD TWO OF CLUBS -1F0D3;AL # PLAYING CARD THREE OF CLUBS -1F0D4;AL # PLAYING CARD FOUR OF CLUBS -1F0D5;AL # PLAYING CARD FIVE OF CLUBS -1F0D6;AL # PLAYING CARD SIX OF CLUBS -1F0D7;AL # PLAYING CARD SEVEN OF CLUBS -1F0D8;AL # PLAYING CARD EIGHT OF CLUBS -1F0D9;AL # PLAYING CARD NINE OF CLUBS -1F0DA;AL # PLAYING CARD TEN OF CLUBS -1F0DB;AL # PLAYING CARD JACK OF CLUBS -1F0DC;AL # PLAYING CARD KNIGHT OF CLUBS -1F0DD;AL # PLAYING CARD QUEEN OF CLUBS -1F0DE;AL # PLAYING CARD KING OF CLUBS -1F0DF;AL # PLAYING CARD WHITE JOKER +1F000;ID # MAHJONG TILE EAST WIND +1F001;ID # MAHJONG TILE SOUTH WIND +1F002;ID # MAHJONG TILE WEST WIND +1F003;ID # MAHJONG TILE NORTH WIND +1F004;ID # MAHJONG TILE RED DRAGON +1F005;ID # MAHJONG TILE GREEN DRAGON +1F006;ID # MAHJONG TILE WHITE DRAGON +1F007;ID # MAHJONG TILE ONE OF CHARACTERS +1F008;ID # MAHJONG TILE TWO OF CHARACTERS +1F009;ID # MAHJONG TILE THREE OF CHARACTERS +1F00A;ID # MAHJONG TILE FOUR OF CHARACTERS +1F00B;ID # MAHJONG TILE FIVE OF CHARACTERS +1F00C;ID # MAHJONG TILE SIX OF CHARACTERS +1F00D;ID # MAHJONG TILE SEVEN OF CHARACTERS +1F00E;ID # MAHJONG TILE EIGHT OF CHARACTERS +1F00F;ID # MAHJONG TILE NINE OF CHARACTERS +1F010;ID # MAHJONG TILE ONE OF BAMBOOS +1F011;ID # MAHJONG TILE TWO OF BAMBOOS +1F012;ID # MAHJONG TILE THREE OF BAMBOOS +1F013;ID # MAHJONG TILE FOUR OF BAMBOOS +1F014;ID # MAHJONG TILE FIVE OF BAMBOOS +1F015;ID # MAHJONG TILE SIX OF BAMBOOS +1F016;ID # MAHJONG TILE SEVEN OF BAMBOOS +1F017;ID # MAHJONG TILE EIGHT OF BAMBOOS +1F018;ID # MAHJONG TILE NINE OF BAMBOOS +1F019;ID # MAHJONG TILE ONE OF CIRCLES +1F01A;ID # MAHJONG TILE TWO OF CIRCLES +1F01B;ID # MAHJONG TILE THREE OF CIRCLES +1F01C;ID # MAHJONG TILE FOUR OF CIRCLES +1F01D;ID # MAHJONG TILE FIVE OF CIRCLES +1F01E;ID # MAHJONG TILE SIX OF CIRCLES +1F01F;ID # MAHJONG TILE SEVEN OF CIRCLES +1F020;ID # MAHJONG TILE EIGHT OF CIRCLES +1F021;ID # MAHJONG TILE NINE OF CIRCLES +1F022;ID # MAHJONG TILE PLUM +1F023;ID # MAHJONG TILE ORCHID +1F024;ID # MAHJONG TILE BAMBOO +1F025;ID # MAHJONG TILE CHRYSANTHEMUM +1F026;ID # MAHJONG TILE SPRING +1F027;ID # MAHJONG TILE SUMMER +1F028;ID # MAHJONG TILE AUTUMN +1F029;ID # MAHJONG TILE WINTER +1F02A;ID # MAHJONG TILE JOKER +1F02B;ID # MAHJONG TILE BACK +1F030;ID # DOMINO TILE HORIZONTAL BACK +1F031;ID # DOMINO TILE HORIZONTAL-00-00 +1F032;ID # DOMINO TILE HORIZONTAL-00-01 +1F033;ID # DOMINO TILE HORIZONTAL-00-02 +1F034;ID # DOMINO TILE HORIZONTAL-00-03 +1F035;ID # DOMINO TILE HORIZONTAL-00-04 +1F036;ID # DOMINO TILE HORIZONTAL-00-05 +1F037;ID # DOMINO TILE HORIZONTAL-00-06 +1F038;ID # DOMINO TILE HORIZONTAL-01-00 +1F039;ID # DOMINO TILE HORIZONTAL-01-01 +1F03A;ID # DOMINO TILE HORIZONTAL-01-02 +1F03B;ID # DOMINO TILE HORIZONTAL-01-03 +1F03C;ID # DOMINO TILE HORIZONTAL-01-04 +1F03D;ID # DOMINO TILE HORIZONTAL-01-05 +1F03E;ID # DOMINO TILE HORIZONTAL-01-06 +1F03F;ID # DOMINO TILE HORIZONTAL-02-00 +1F040;ID # DOMINO TILE HORIZONTAL-02-01 +1F041;ID # DOMINO TILE HORIZONTAL-02-02 +1F042;ID # DOMINO TILE HORIZONTAL-02-03 +1F043;ID # DOMINO TILE HORIZONTAL-02-04 +1F044;ID # DOMINO TILE HORIZONTAL-02-05 +1F045;ID # DOMINO TILE HORIZONTAL-02-06 +1F046;ID # DOMINO TILE HORIZONTAL-03-00 +1F047;ID # DOMINO TILE HORIZONTAL-03-01 +1F048;ID # DOMINO TILE HORIZONTAL-03-02 +1F049;ID # DOMINO TILE HORIZONTAL-03-03 +1F04A;ID # DOMINO TILE HORIZONTAL-03-04 +1F04B;ID # DOMINO TILE HORIZONTAL-03-05 +1F04C;ID # DOMINO TILE HORIZONTAL-03-06 +1F04D;ID # DOMINO TILE HORIZONTAL-04-00 +1F04E;ID # DOMINO TILE HORIZONTAL-04-01 +1F04F;ID # DOMINO TILE HORIZONTAL-04-02 +1F050;ID # DOMINO TILE HORIZONTAL-04-03 +1F051;ID # DOMINO TILE HORIZONTAL-04-04 +1F052;ID # DOMINO TILE HORIZONTAL-04-05 +1F053;ID # DOMINO TILE HORIZONTAL-04-06 +1F054;ID # DOMINO TILE HORIZONTAL-05-00 +1F055;ID # DOMINO TILE HORIZONTAL-05-01 +1F056;ID # DOMINO TILE HORIZONTAL-05-02 +1F057;ID # DOMINO TILE HORIZONTAL-05-03 +1F058;ID # DOMINO TILE HORIZONTAL-05-04 +1F059;ID # DOMINO TILE HORIZONTAL-05-05 +1F05A;ID # DOMINO TILE HORIZONTAL-05-06 +1F05B;ID # DOMINO TILE HORIZONTAL-06-00 +1F05C;ID # DOMINO TILE HORIZONTAL-06-01 +1F05D;ID # DOMINO TILE HORIZONTAL-06-02 +1F05E;ID # DOMINO TILE HORIZONTAL-06-03 +1F05F;ID # DOMINO TILE HORIZONTAL-06-04 +1F060;ID # DOMINO TILE HORIZONTAL-06-05 +1F061;ID # DOMINO TILE HORIZONTAL-06-06 +1F062;ID # DOMINO TILE VERTICAL BACK +1F063;ID # DOMINO TILE VERTICAL-00-00 +1F064;ID # DOMINO TILE VERTICAL-00-01 +1F065;ID # DOMINO TILE VERTICAL-00-02 +1F066;ID # DOMINO TILE VERTICAL-00-03 +1F067;ID # DOMINO TILE VERTICAL-00-04 +1F068;ID # DOMINO TILE VERTICAL-00-05 +1F069;ID # DOMINO TILE VERTICAL-00-06 +1F06A;ID # DOMINO TILE VERTICAL-01-00 +1F06B;ID # DOMINO TILE VERTICAL-01-01 +1F06C;ID # DOMINO TILE VERTICAL-01-02 +1F06D;ID # DOMINO TILE VERTICAL-01-03 +1F06E;ID # DOMINO TILE VERTICAL-01-04 +1F06F;ID # DOMINO TILE VERTICAL-01-05 +1F070;ID # DOMINO TILE VERTICAL-01-06 +1F071;ID # DOMINO TILE VERTICAL-02-00 +1F072;ID # DOMINO TILE VERTICAL-02-01 +1F073;ID # DOMINO TILE VERTICAL-02-02 +1F074;ID # DOMINO TILE VERTICAL-02-03 +1F075;ID # DOMINO TILE VERTICAL-02-04 +1F076;ID # DOMINO TILE VERTICAL-02-05 +1F077;ID # DOMINO TILE VERTICAL-02-06 +1F078;ID # DOMINO TILE VERTICAL-03-00 +1F079;ID # DOMINO TILE VERTICAL-03-01 +1F07A;ID # DOMINO TILE VERTICAL-03-02 +1F07B;ID # DOMINO TILE VERTICAL-03-03 +1F07C;ID # DOMINO TILE VERTICAL-03-04 +1F07D;ID # DOMINO TILE VERTICAL-03-05 +1F07E;ID # DOMINO TILE VERTICAL-03-06 +1F07F;ID # DOMINO TILE VERTICAL-04-00 +1F080;ID # DOMINO TILE VERTICAL-04-01 +1F081;ID # DOMINO TILE VERTICAL-04-02 +1F082;ID # DOMINO TILE VERTICAL-04-03 +1F083;ID # DOMINO TILE VERTICAL-04-04 +1F084;ID # DOMINO TILE VERTICAL-04-05 +1F085;ID # DOMINO TILE VERTICAL-04-06 +1F086;ID # DOMINO TILE VERTICAL-05-00 +1F087;ID # DOMINO TILE VERTICAL-05-01 +1F088;ID # DOMINO TILE VERTICAL-05-02 +1F089;ID # DOMINO TILE VERTICAL-05-03 +1F08A;ID # DOMINO TILE VERTICAL-05-04 +1F08B;ID # DOMINO TILE VERTICAL-05-05 +1F08C;ID # DOMINO TILE VERTICAL-05-06 +1F08D;ID # DOMINO TILE VERTICAL-06-00 +1F08E;ID # DOMINO TILE VERTICAL-06-01 +1F08F;ID # DOMINO TILE VERTICAL-06-02 +1F090;ID # DOMINO TILE VERTICAL-06-03 +1F091;ID # DOMINO TILE VERTICAL-06-04 +1F092;ID # DOMINO TILE VERTICAL-06-05 +1F093;ID # DOMINO TILE VERTICAL-06-06 +1F0A0;ID # PLAYING CARD BACK +1F0A1;ID # PLAYING CARD ACE OF SPADES +1F0A2;ID # PLAYING CARD TWO OF SPADES +1F0A3;ID # PLAYING CARD THREE OF SPADES +1F0A4;ID # PLAYING CARD FOUR OF SPADES +1F0A5;ID # PLAYING CARD FIVE OF SPADES +1F0A6;ID # PLAYING CARD SIX OF SPADES +1F0A7;ID # PLAYING CARD SEVEN OF SPADES +1F0A8;ID # PLAYING CARD EIGHT OF SPADES +1F0A9;ID # PLAYING CARD NINE OF SPADES +1F0AA;ID # PLAYING CARD TEN OF SPADES +1F0AB;ID # PLAYING CARD JACK OF SPADES +1F0AC;ID # PLAYING CARD KNIGHT OF SPADES +1F0AD;ID # PLAYING CARD QUEEN OF SPADES +1F0AE;ID # PLAYING CARD KING OF SPADES +1F0B1;ID # PLAYING CARD ACE OF HEARTS +1F0B2;ID # PLAYING CARD TWO OF HEARTS +1F0B3;ID # PLAYING CARD THREE OF HEARTS +1F0B4;ID # PLAYING CARD FOUR OF HEARTS +1F0B5;ID # PLAYING CARD FIVE OF HEARTS +1F0B6;ID # PLAYING CARD SIX OF HEARTS +1F0B7;ID # PLAYING CARD SEVEN OF HEARTS +1F0B8;ID # PLAYING CARD EIGHT OF HEARTS +1F0B9;ID # PLAYING CARD NINE OF HEARTS +1F0BA;ID # PLAYING CARD TEN OF HEARTS +1F0BB;ID # PLAYING CARD JACK OF HEARTS +1F0BC;ID # PLAYING CARD KNIGHT OF HEARTS +1F0BD;ID # PLAYING CARD QUEEN OF HEARTS +1F0BE;ID # PLAYING CARD KING OF HEARTS +1F0C1;ID # PLAYING CARD ACE OF DIAMONDS +1F0C2;ID # PLAYING CARD TWO OF DIAMONDS +1F0C3;ID # PLAYING CARD THREE OF DIAMONDS +1F0C4;ID # PLAYING CARD FOUR OF DIAMONDS +1F0C5;ID # PLAYING CARD FIVE OF DIAMONDS +1F0C6;ID # PLAYING CARD SIX OF DIAMONDS +1F0C7;ID # PLAYING CARD SEVEN OF DIAMONDS +1F0C8;ID # PLAYING CARD EIGHT OF DIAMONDS +1F0C9;ID # PLAYING CARD NINE OF DIAMONDS +1F0CA;ID # PLAYING CARD TEN OF DIAMONDS +1F0CB;ID # PLAYING CARD JACK OF DIAMONDS +1F0CC;ID # PLAYING CARD KNIGHT OF DIAMONDS +1F0CD;ID # PLAYING CARD QUEEN OF DIAMONDS +1F0CE;ID # PLAYING CARD KING OF DIAMONDS +1F0CF;ID # PLAYING CARD BLACK JOKER +1F0D1;ID # PLAYING CARD ACE OF CLUBS +1F0D2;ID # PLAYING CARD TWO OF CLUBS +1F0D3;ID # PLAYING CARD THREE OF CLUBS +1F0D4;ID # PLAYING CARD FOUR OF CLUBS +1F0D5;ID # PLAYING CARD FIVE OF CLUBS +1F0D6;ID # PLAYING CARD SIX OF CLUBS +1F0D7;ID # PLAYING CARD SEVEN OF CLUBS +1F0D8;ID # PLAYING CARD EIGHT OF CLUBS +1F0D9;ID # PLAYING CARD NINE OF CLUBS +1F0DA;ID # PLAYING CARD TEN OF CLUBS +1F0DB;ID # PLAYING CARD JACK OF CLUBS +1F0DC;ID # PLAYING CARD KNIGHT OF CLUBS +1F0DD;ID # PLAYING CARD QUEEN OF CLUBS +1F0DE;ID # PLAYING CARD KING OF CLUBS +1F0DF;ID # PLAYING CARD WHITE JOKER 1F100;AI # DIGIT ZERO FULL STOP 1F101;AI # DIGIT ZERO COMMA 1F102;AI # DIGIT ONE COMMA @@ -23498,32 +23499,32 @@ FFFD;AI # REPLACEMENT CHARACTER 1F198;AI # SQUARED SOS 1F199;AI # SQUARED UP WITH EXCLAMATION MARK 1F19A;AI # SQUARED VS -1F1E6;AL # REGIONAL INDICATOR SYMBOL LETTER A -1F1E7;AL # REGIONAL INDICATOR SYMBOL LETTER B -1F1E8;AL # REGIONAL INDICATOR SYMBOL LETTER C -1F1E9;AL # REGIONAL INDICATOR SYMBOL LETTER D -1F1EA;AL # REGIONAL INDICATOR SYMBOL LETTER E -1F1EB;AL # REGIONAL INDICATOR SYMBOL LETTER F -1F1EC;AL # REGIONAL INDICATOR SYMBOL LETTER G -1F1ED;AL # REGIONAL INDICATOR SYMBOL LETTER H -1F1EE;AL # REGIONAL INDICATOR SYMBOL LETTER I -1F1EF;AL # REGIONAL INDICATOR SYMBOL LETTER J -1F1F0;AL # REGIONAL INDICATOR SYMBOL LETTER K -1F1F1;AL # REGIONAL INDICATOR SYMBOL LETTER L -1F1F2;AL # REGIONAL INDICATOR SYMBOL LETTER M -1F1F3;AL # REGIONAL INDICATOR SYMBOL LETTER N -1F1F4;AL # REGIONAL INDICATOR SYMBOL LETTER O -1F1F5;AL # REGIONAL INDICATOR SYMBOL LETTER P -1F1F6;AL # REGIONAL INDICATOR SYMBOL LETTER Q -1F1F7;AL # REGIONAL INDICATOR SYMBOL LETTER R -1F1F8;AL # REGIONAL INDICATOR SYMBOL LETTER S -1F1F9;AL # REGIONAL INDICATOR SYMBOL LETTER T -1F1FA;AL # REGIONAL INDICATOR SYMBOL LETTER U -1F1FB;AL # REGIONAL INDICATOR SYMBOL LETTER V -1F1FC;AL # REGIONAL INDICATOR SYMBOL LETTER W -1F1FD;AL # REGIONAL INDICATOR SYMBOL LETTER X -1F1FE;AL # REGIONAL INDICATOR SYMBOL LETTER Y -1F1FF;AL # REGIONAL INDICATOR SYMBOL LETTER Z +1F1E6;RI # REGIONAL INDICATOR SYMBOL LETTER A +1F1E7;RI # REGIONAL INDICATOR SYMBOL LETTER B +1F1E8;RI # REGIONAL INDICATOR SYMBOL LETTER C +1F1E9;RI # REGIONAL INDICATOR SYMBOL LETTER D +1F1EA;RI # REGIONAL INDICATOR SYMBOL LETTER E +1F1EB;RI # REGIONAL INDICATOR SYMBOL LETTER F +1F1EC;RI # REGIONAL INDICATOR SYMBOL LETTER G +1F1ED;RI # REGIONAL INDICATOR SYMBOL LETTER H +1F1EE;RI # REGIONAL INDICATOR SYMBOL LETTER I +1F1EF;RI # REGIONAL INDICATOR SYMBOL LETTER J +1F1F0;RI # REGIONAL INDICATOR SYMBOL LETTER K +1F1F1;RI # REGIONAL INDICATOR SYMBOL LETTER L +1F1F2;RI # REGIONAL INDICATOR SYMBOL LETTER M +1F1F3;RI # REGIONAL INDICATOR SYMBOL LETTER N +1F1F4;RI # REGIONAL INDICATOR SYMBOL LETTER O +1F1F5;RI # REGIONAL INDICATOR SYMBOL LETTER P +1F1F6;RI # REGIONAL INDICATOR SYMBOL LETTER Q +1F1F7;RI # REGIONAL INDICATOR SYMBOL LETTER R +1F1F8;RI # REGIONAL INDICATOR SYMBOL LETTER S +1F1F9;RI # REGIONAL INDICATOR SYMBOL LETTER T +1F1FA;RI # REGIONAL INDICATOR SYMBOL LETTER U +1F1FB;RI # REGIONAL INDICATOR SYMBOL LETTER V +1F1FC;RI # REGIONAL INDICATOR SYMBOL LETTER W +1F1FD;RI # REGIONAL INDICATOR SYMBOL LETTER X +1F1FE;RI # REGIONAL INDICATOR SYMBOL LETTER Y +1F1FF;RI # REGIONAL INDICATOR SYMBOL LETTER Z 1F200;ID # SQUARE HIRAGANA HOKA 1F201;ID # SQUARED KATAKANA KOKO 1F202;ID # SQUARED KATAKANA SA @@ -23581,444 +23582,444 @@ FFFD;AI # REPLACEMENT CHARACTER 1F248;ID # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 1F250;ID # CIRCLED IDEOGRAPH ADVANTAGE 1F251;ID # CIRCLED IDEOGRAPH ACCEPT -1F300;AL # CYCLONE -1F301;AL # FOGGY -1F302;AL # CLOSED UMBRELLA -1F303;AL # NIGHT WITH STARS -1F304;AL # SUNRISE OVER MOUNTAINS -1F305;AL # SUNRISE -1F306;AL # CITYSCAPE AT DUSK -1F307;AL # SUNSET OVER BUILDINGS -1F308;AL # RAINBOW -1F309;AL # BRIDGE AT NIGHT -1F30A;AL # WATER WAVE -1F30B;AL # VOLCANO -1F30C;AL # MILKY WAY -1F30D;AL # EARTH GLOBE EUROPE-AFRICA -1F30E;AL # EARTH GLOBE AMERICAS -1F30F;AL # EARTH GLOBE ASIA-AUSTRALIA -1F310;AL # GLOBE WITH MERIDIANS -1F311;AL # NEW MOON SYMBOL -1F312;AL # WAXING CRESCENT MOON SYMBOL -1F313;AL # FIRST QUARTER MOON SYMBOL -1F314;AL # WAXING GIBBOUS MOON SYMBOL -1F315;AL # FULL MOON SYMBOL -1F316;AL # WANING GIBBOUS MOON SYMBOL -1F317;AL # LAST QUARTER MOON SYMBOL -1F318;AL # WANING CRESCENT MOON SYMBOL -1F319;AL # CRESCENT MOON -1F31A;AL # NEW MOON WITH FACE -1F31B;AL # FIRST QUARTER MOON WITH FACE -1F31C;AL # LAST QUARTER MOON WITH FACE -1F31D;AL # FULL MOON WITH FACE -1F31E;AL # SUN WITH FACE -1F31F;AL # GLOWING STAR -1F320;AL # SHOOTING STAR -1F330;AL # CHESTNUT -1F331;AL # SEEDLING -1F332;AL # EVERGREEN TREE -1F333;AL # DECIDUOUS TREE -1F334;AL # PALM TREE -1F335;AL # CACTUS -1F337;AL # TULIP -1F338;AL # CHERRY BLOSSOM -1F339;AL # ROSE -1F33A;AL # HIBISCUS -1F33B;AL # SUNFLOWER -1F33C;AL # BLOSSOM -1F33D;AL # EAR OF MAIZE -1F33E;AL # EAR OF RICE -1F33F;AL # HERB -1F340;AL # FOUR LEAF CLOVER -1F341;AL # MAPLE LEAF -1F342;AL # FALLEN LEAF -1F343;AL # LEAF FLUTTERING IN WIND -1F344;AL # MUSHROOM -1F345;AL # TOMATO -1F346;AL # AUBERGINE -1F347;AL # GRAPES -1F348;AL # MELON -1F349;AL # WATERMELON -1F34A;AL # TANGERINE -1F34B;AL # LEMON -1F34C;AL # BANANA -1F34D;AL # PINEAPPLE -1F34E;AL # RED APPLE -1F34F;AL # GREEN APPLE -1F350;AL # PEAR -1F351;AL # PEACH -1F352;AL # CHERRIES -1F353;AL # STRAWBERRY -1F354;AL # HAMBURGER -1F355;AL # SLICE OF PIZZA -1F356;AL # MEAT ON BONE -1F357;AL # POULTRY LEG -1F358;AL # RICE CRACKER -1F359;AL # RICE BALL -1F35A;AL # COOKED RICE -1F35B;AL # CURRY AND RICE -1F35C;AL # STEAMING BOWL -1F35D;AL # SPAGHETTI -1F35E;AL # BREAD -1F35F;AL # FRENCH FRIES -1F360;AL # ROASTED SWEET POTATO -1F361;AL # DANGO -1F362;AL # ODEN -1F363;AL # SUSHI -1F364;AL # FRIED SHRIMP -1F365;AL # FISH CAKE WITH SWIRL DESIGN -1F366;AL # SOFT ICE CREAM -1F367;AL # SHAVED ICE -1F368;AL # ICE CREAM -1F369;AL # DOUGHNUT -1F36A;AL # COOKIE -1F36B;AL # CHOCOLATE BAR -1F36C;AL # CANDY -1F36D;AL # LOLLIPOP -1F36E;AL # CUSTARD -1F36F;AL # HONEY POT -1F370;AL # SHORTCAKE -1F371;AL # BENTO BOX -1F372;AL # POT OF FOOD -1F373;AL # COOKING -1F374;AL # FORK AND KNIFE -1F375;AL # TEACUP WITHOUT HANDLE -1F376;AL # SAKE BOTTLE AND CUP -1F377;AL # WINE GLASS -1F378;AL # COCKTAIL GLASS -1F379;AL # TROPICAL DRINK -1F37A;AL # BEER MUG -1F37B;AL # CLINKING BEER MUGS -1F37C;AL # BABY BOTTLE -1F380;AL # RIBBON -1F381;AL # WRAPPED PRESENT -1F382;AL # BIRTHDAY CAKE -1F383;AL # JACK-O-LANTERN -1F384;AL # CHRISTMAS TREE -1F385;AL # FATHER CHRISTMAS -1F386;AL # FIREWORKS -1F387;AL # FIREWORK SPARKLER -1F388;AL # BALLOON -1F389;AL # PARTY POPPER -1F38A;AL # CONFETTI BALL -1F38B;AL # TANABATA TREE -1F38C;AL # CROSSED FLAGS -1F38D;AL # PINE DECORATION -1F38E;AL # JAPANESE DOLLS -1F38F;AL # CARP STREAMER -1F390;AL # WIND CHIME -1F391;AL # MOON VIEWING CEREMONY -1F392;AL # SCHOOL SATCHEL -1F393;AL # GRADUATION CAP -1F3A0;AL # CAROUSEL HORSE -1F3A1;AL # FERRIS WHEEL -1F3A2;AL # ROLLER COASTER -1F3A3;AL # FISHING POLE AND FISH -1F3A4;AL # MICROPHONE -1F3A5;AL # MOVIE CAMERA -1F3A6;AL # CINEMA -1F3A7;AL # HEADPHONE -1F3A8;AL # ARTIST PALETTE -1F3A9;AL # TOP HAT -1F3AA;AL # CIRCUS TENT -1F3AB;AL # TICKET -1F3AC;AL # CLAPPER BOARD -1F3AD;AL # PERFORMING ARTS -1F3AE;AL # VIDEO GAME -1F3AF;AL # DIRECT HIT -1F3B0;AL # SLOT MACHINE -1F3B1;AL # BILLIARDS -1F3B2;AL # GAME DIE -1F3B3;AL # BOWLING -1F3B4;AL # FLOWER PLAYING CARDS +1F300;ID # CYCLONE +1F301;ID # FOGGY +1F302;ID # CLOSED UMBRELLA +1F303;ID # NIGHT WITH STARS +1F304;ID # SUNRISE OVER MOUNTAINS +1F305;ID # SUNRISE +1F306;ID # CITYSCAPE AT DUSK +1F307;ID # SUNSET OVER BUILDINGS +1F308;ID # RAINBOW +1F309;ID # BRIDGE AT NIGHT +1F30A;ID # WATER WAVE +1F30B;ID # VOLCANO +1F30C;ID # MILKY WAY +1F30D;ID # EARTH GLOBE EUROPE-AFRICA +1F30E;ID # EARTH GLOBE AMERICAS +1F30F;ID # EARTH GLOBE ASIA-AUSTRALIA +1F310;ID # GLOBE WITH MERIDIANS +1F311;ID # NEW MOON SYMBOL +1F312;ID # WAXING CRESCENT MOON SYMBOL +1F313;ID # FIRST QUARTER MOON SYMBOL +1F314;ID # WAXING GIBBOUS MOON SYMBOL +1F315;ID # FULL MOON SYMBOL +1F316;ID # WANING GIBBOUS MOON SYMBOL +1F317;ID # LAST QUARTER MOON SYMBOL +1F318;ID # WANING CRESCENT MOON SYMBOL +1F319;ID # CRESCENT MOON +1F31A;ID # NEW MOON WITH FACE +1F31B;ID # FIRST QUARTER MOON WITH FACE +1F31C;ID # LAST QUARTER MOON WITH FACE +1F31D;ID # FULL MOON WITH FACE +1F31E;ID # SUN WITH FACE +1F31F;ID # GLOWING STAR +1F320;ID # SHOOTING STAR +1F330;ID # CHESTNUT +1F331;ID # SEEDLING +1F332;ID # EVERGREEN TREE +1F333;ID # DECIDUOUS TREE +1F334;ID # PALM TREE +1F335;ID # CACTUS +1F337;ID # TULIP +1F338;ID # CHERRY BLOSSOM +1F339;ID # ROSE +1F33A;ID # HIBISCUS +1F33B;ID # SUNFLOWER +1F33C;ID # BLOSSOM +1F33D;ID # EAR OF MAIZE +1F33E;ID # EAR OF RICE +1F33F;ID # HERB +1F340;ID # FOUR LEAF CLOVER +1F341;ID # MAPLE LEAF +1F342;ID # FALLEN LEAF +1F343;ID # LEAF FLUTTERING IN WIND +1F344;ID # MUSHROOM +1F345;ID # TOMATO +1F346;ID # AUBERGINE +1F347;ID # GRAPES +1F348;ID # MELON +1F349;ID # WATERMELON +1F34A;ID # TANGERINE +1F34B;ID # LEMON +1F34C;ID # BANANA +1F34D;ID # PINEAPPLE +1F34E;ID # RED APPLE +1F34F;ID # GREEN APPLE +1F350;ID # PEAR +1F351;ID # PEACH +1F352;ID # CHERRIES +1F353;ID # STRAWBERRY +1F354;ID # HAMBURGER +1F355;ID # SLICE OF PIZZA +1F356;ID # MEAT ON BONE +1F357;ID # POULTRY LEG +1F358;ID # RICE CRACKER +1F359;ID # RICE BALL +1F35A;ID # COOKED RICE +1F35B;ID # CURRY AND RICE +1F35C;ID # STEAMING BOWL +1F35D;ID # SPAGHETTI +1F35E;ID # BREAD +1F35F;ID # FRENCH FRIES +1F360;ID # ROASTED SWEET POTATO +1F361;ID # DANGO +1F362;ID # ODEN +1F363;ID # SUSHI +1F364;ID # FRIED SHRIMP +1F365;ID # FISH CAKE WITH SWIRL DESIGN +1F366;ID # SOFT ICE CREAM +1F367;ID # SHAVED ICE +1F368;ID # ICE CREAM +1F369;ID # DOUGHNUT +1F36A;ID # COOKIE +1F36B;ID # CHOCOLATE BAR +1F36C;ID # CANDY +1F36D;ID # LOLLIPOP +1F36E;ID # CUSTARD +1F36F;ID # HONEY POT +1F370;ID # SHORTCAKE +1F371;ID # BENTO BOX +1F372;ID # POT OF FOOD +1F373;ID # COOKING +1F374;ID # FORK AND KNIFE +1F375;ID # TEACUP WITHOUT HANDLE +1F376;ID # SAKE BOTTLE AND CUP +1F377;ID # WINE GLASS +1F378;ID # COCKTAIL GLASS +1F379;ID # TROPICAL DRINK +1F37A;ID # BEER MUG +1F37B;ID # CLINKING BEER MUGS +1F37C;ID # BABY BOTTLE +1F380;ID # RIBBON +1F381;ID # WRAPPED PRESENT +1F382;ID # BIRTHDAY CAKE +1F383;ID # JACK-O-LANTERN +1F384;ID # CHRISTMAS TREE +1F385;ID # FATHER CHRISTMAS +1F386;ID # FIREWORKS +1F387;ID # FIREWORK SPARKLER +1F388;ID # BALLOON +1F389;ID # PARTY POPPER +1F38A;ID # CONFETTI BALL +1F38B;ID # TANABATA TREE +1F38C;ID # CROSSED FLAGS +1F38D;ID # PINE DECORATION +1F38E;ID # JAPANESE DOLLS +1F38F;ID # CARP STREAMER +1F390;ID # WIND CHIME +1F391;ID # MOON VIEWING CEREMONY +1F392;ID # SCHOOL SATCHEL +1F393;ID # GRADUATION CAP +1F3A0;ID # CAROUSEL HORSE +1F3A1;ID # FERRIS WHEEL +1F3A2;ID # ROLLER COASTER +1F3A3;ID # FISHING POLE AND FISH +1F3A4;ID # MICROPHONE +1F3A5;ID # MOVIE CAMERA +1F3A6;ID # CINEMA +1F3A7;ID # HEADPHONE +1F3A8;ID # ARTIST PALETTE +1F3A9;ID # TOP HAT +1F3AA;ID # CIRCUS TENT +1F3AB;ID # TICKET +1F3AC;ID # CLAPPER BOARD +1F3AD;ID # PERFORMING ARTS +1F3AE;ID # VIDEO GAME +1F3AF;ID # DIRECT HIT +1F3B0;ID # SLOT MACHINE +1F3B1;ID # BILLIARDS +1F3B2;ID # GAME DIE +1F3B3;ID # BOWLING +1F3B4;ID # FLOWER PLAYING CARDS 1F3B5;AL # MUSICAL NOTE 1F3B6;AL # MULTIPLE MUSICAL NOTES -1F3B7;AL # SAXOPHONE -1F3B8;AL # GUITAR -1F3B9;AL # MUSICAL KEYBOARD -1F3BA;AL # TRUMPET -1F3BB;AL # VIOLIN +1F3B7;ID # SAXOPHONE +1F3B8;ID # GUITAR +1F3B9;ID # MUSICAL KEYBOARD +1F3BA;ID # TRUMPET +1F3BB;ID # VIOLIN 1F3BC;AL # MUSICAL SCORE -1F3BD;AL # RUNNING SHIRT WITH SASH -1F3BE;AL # TENNIS RACQUET AND BALL -1F3BF;AL # SKI AND SKI BOOT -1F3C0;AL # BASKETBALL AND HOOP -1F3C1;AL # CHEQUERED FLAG -1F3C2;AL # SNOWBOARDER -1F3C3;AL # RUNNER -1F3C4;AL # SURFER -1F3C6;AL # TROPHY -1F3C7;AL # HORSE RACING -1F3C8;AL # AMERICAN FOOTBALL -1F3C9;AL # RUGBY FOOTBALL -1F3CA;AL # SWIMMER -1F3E0;AL # HOUSE BUILDING -1F3E1;AL # HOUSE WITH GARDEN -1F3E2;AL # OFFICE BUILDING -1F3E3;AL # JAPANESE POST OFFICE -1F3E4;AL # EUROPEAN POST OFFICE -1F3E5;AL # HOSPITAL -1F3E6;AL # BANK -1F3E7;AL # AUTOMATED TELLER MACHINE -1F3E8;AL # HOTEL -1F3E9;AL # LOVE HOTEL -1F3EA;AL # CONVENIENCE STORE -1F3EB;AL # SCHOOL -1F3EC;AL # DEPARTMENT STORE -1F3ED;AL # FACTORY -1F3EE;AL # IZAKAYA LANTERN -1F3EF;AL # JAPANESE CASTLE -1F3F0;AL # EUROPEAN CASTLE -1F400;AL # RAT -1F401;AL # MOUSE -1F402;AL # OX -1F403;AL # WATER BUFFALO -1F404;AL # COW -1F405;AL # TIGER -1F406;AL # LEOPARD -1F407;AL # RABBIT -1F408;AL # CAT -1F409;AL # DRAGON -1F40A;AL # CROCODILE -1F40B;AL # WHALE -1F40C;AL # SNAIL -1F40D;AL # SNAKE -1F40E;AL # HORSE -1F40F;AL # RAM -1F410;AL # GOAT -1F411;AL # SHEEP -1F412;AL # MONKEY -1F413;AL # ROOSTER -1F414;AL # CHICKEN -1F415;AL # DOG -1F416;AL # PIG -1F417;AL # BOAR -1F418;AL # ELEPHANT -1F419;AL # OCTOPUS -1F41A;AL # SPIRAL SHELL -1F41B;AL # BUG -1F41C;AL # ANT -1F41D;AL # HONEYBEE -1F41E;AL # LADY BEETLE -1F41F;AL # FISH -1F420;AL # TROPICAL FISH -1F421;AL # BLOWFISH -1F422;AL # TURTLE -1F423;AL # HATCHING CHICK -1F424;AL # BABY CHICK -1F425;AL # FRONT-FACING BABY CHICK -1F426;AL # BIRD -1F427;AL # PENGUIN -1F428;AL # KOALA -1F429;AL # POODLE -1F42A;AL # DROMEDARY CAMEL -1F42B;AL # BACTRIAN CAMEL -1F42C;AL # DOLPHIN -1F42D;AL # MOUSE FACE -1F42E;AL # COW FACE -1F42F;AL # TIGER FACE -1F430;AL # RABBIT FACE -1F431;AL # CAT FACE -1F432;AL # DRAGON FACE -1F433;AL # SPOUTING WHALE -1F434;AL # HORSE FACE -1F435;AL # MONKEY FACE -1F436;AL # DOG FACE -1F437;AL # PIG FACE -1F438;AL # FROG FACE -1F439;AL # HAMSTER FACE -1F43A;AL # WOLF FACE -1F43B;AL # BEAR FACE -1F43C;AL # PANDA FACE -1F43D;AL # PIG NOSE -1F43E;AL # PAW PRINTS -1F440;AL # EYES -1F442;AL # EAR -1F443;AL # NOSE -1F444;AL # MOUTH -1F445;AL # TONGUE -1F446;AL # WHITE UP POINTING BACKHAND INDEX -1F447;AL # WHITE DOWN POINTING BACKHAND INDEX -1F448;AL # WHITE LEFT POINTING BACKHAND INDEX -1F449;AL # WHITE RIGHT POINTING BACKHAND INDEX -1F44A;AL # FISTED HAND SIGN -1F44B;AL # WAVING HAND SIGN -1F44C;AL # OK HAND SIGN -1F44D;AL # THUMBS UP SIGN -1F44E;AL # THUMBS DOWN SIGN -1F44F;AL # CLAPPING HANDS SIGN -1F450;AL # OPEN HANDS SIGN -1F451;AL # CROWN -1F452;AL # WOMANS HAT -1F453;AL # EYEGLASSES -1F454;AL # NECKTIE -1F455;AL # T-SHIRT -1F456;AL # JEANS -1F457;AL # DRESS -1F458;AL # KIMONO -1F459;AL # BIKINI -1F45A;AL # WOMANS CLOTHES -1F45B;AL # PURSE -1F45C;AL # HANDBAG -1F45D;AL # POUCH -1F45E;AL # MANS SHOE -1F45F;AL # ATHLETIC SHOE -1F460;AL # HIGH-HEELED SHOE -1F461;AL # WOMANS SANDAL -1F462;AL # WOMANS BOOTS -1F463;AL # FOOTPRINTS -1F464;AL # BUST IN SILHOUETTE -1F465;AL # BUSTS IN SILHOUETTE -1F466;AL # BOY -1F467;AL # GIRL -1F468;AL # MAN -1F469;AL # WOMAN -1F46A;AL # FAMILY -1F46B;AL # MAN AND WOMAN HOLDING HANDS -1F46C;AL # TWO MEN HOLDING HANDS -1F46D;AL # TWO WOMEN HOLDING HANDS -1F46E;AL # POLICE OFFICER -1F46F;AL # WOMAN WITH BUNNY EARS -1F470;AL # BRIDE WITH VEIL -1F471;AL # PERSON WITH BLOND HAIR -1F472;AL # MAN WITH GUA PI MAO -1F473;AL # MAN WITH TURBAN -1F474;AL # OLDER MAN -1F475;AL # OLDER WOMAN -1F476;AL # BABY -1F477;AL # CONSTRUCTION WORKER -1F478;AL # PRINCESS -1F479;AL # JAPANESE OGRE -1F47A;AL # JAPANESE GOBLIN -1F47B;AL # GHOST -1F47C;AL # BABY ANGEL -1F47D;AL # EXTRATERRESTRIAL ALIEN -1F47E;AL # ALIEN MONSTER -1F47F;AL # IMP -1F480;AL # SKULL -1F481;AL # INFORMATION DESK PERSON -1F482;AL # GUARDSMAN -1F483;AL # DANCER -1F484;AL # LIPSTICK -1F485;AL # NAIL POLISH -1F486;AL # FACE MASSAGE -1F487;AL # HAIRCUT -1F488;AL # BARBER POLE -1F489;AL # SYRINGE -1F48A;AL # PILL -1F48B;AL # KISS MARK -1F48C;AL # LOVE LETTER -1F48D;AL # RING -1F48E;AL # GEM STONE -1F48F;AL # KISS -1F490;AL # BOUQUET -1F491;AL # COUPLE WITH HEART -1F492;AL # WEDDING -1F493;AL # BEATING HEART -1F494;AL # BROKEN HEART -1F495;AL # TWO HEARTS -1F496;AL # SPARKLING HEART -1F497;AL # GROWING HEART -1F498;AL # HEART WITH ARROW -1F499;AL # BLUE HEART -1F49A;AL # GREEN HEART -1F49B;AL # YELLOW HEART -1F49C;AL # PURPLE HEART -1F49D;AL # HEART WITH RIBBON -1F49E;AL # REVOLVING HEARTS -1F49F;AL # HEART DECORATION +1F3BD;ID # RUNNING SHIRT WITH SASH +1F3BE;ID # TENNIS RACQUET AND BALL +1F3BF;ID # SKI AND SKI BOOT +1F3C0;ID # BASKETBALL AND HOOP +1F3C1;ID # CHEQUERED FLAG +1F3C2;ID # SNOWBOARDER +1F3C3;ID # RUNNER +1F3C4;ID # SURFER +1F3C6;ID # TROPHY +1F3C7;ID # HORSE RACING +1F3C8;ID # AMERICAN FOOTBALL +1F3C9;ID # RUGBY FOOTBALL +1F3CA;ID # SWIMMER +1F3E0;ID # HOUSE BUILDING +1F3E1;ID # HOUSE WITH GARDEN +1F3E2;ID # OFFICE BUILDING +1F3E3;ID # JAPANESE POST OFFICE +1F3E4;ID # EUROPEAN POST OFFICE +1F3E5;ID # HOSPITAL +1F3E6;ID # BANK +1F3E7;ID # AUTOMATED TELLER MACHINE +1F3E8;ID # HOTEL +1F3E9;ID # LOVE HOTEL +1F3EA;ID # CONVENIENCE STORE +1F3EB;ID # SCHOOL +1F3EC;ID # DEPARTMENT STORE +1F3ED;ID # FACTORY +1F3EE;ID # IZAKAYA LANTERN +1F3EF;ID # JAPANESE CASTLE +1F3F0;ID # EUROPEAN CASTLE +1F400;ID # RAT +1F401;ID # MOUSE +1F402;ID # OX +1F403;ID # WATER BUFFALO +1F404;ID # COW +1F405;ID # TIGER +1F406;ID # LEOPARD +1F407;ID # RABBIT +1F408;ID # CAT +1F409;ID # DRAGON +1F40A;ID # CROCODILE +1F40B;ID # WHALE +1F40C;ID # SNAIL +1F40D;ID # SNAKE +1F40E;ID # HORSE +1F40F;ID # RAM +1F410;ID # GOAT +1F411;ID # SHEEP +1F412;ID # MONKEY +1F413;ID # ROOSTER +1F414;ID # CHICKEN +1F415;ID # DOG +1F416;ID # PIG +1F417;ID # BOAR +1F418;ID # ELEPHANT +1F419;ID # OCTOPUS +1F41A;ID # SPIRAL SHELL +1F41B;ID # BUG +1F41C;ID # ANT +1F41D;ID # HONEYBEE +1F41E;ID # LADY BEETLE +1F41F;ID # FISH +1F420;ID # TROPICAL FISH +1F421;ID # BLOWFISH +1F422;ID # TURTLE +1F423;ID # HATCHING CHICK +1F424;ID # BABY CHICK +1F425;ID # FRONT-FACING BABY CHICK +1F426;ID # BIRD +1F427;ID # PENGUIN +1F428;ID # KOALA +1F429;ID # POODLE +1F42A;ID # DROMEDARY CAMEL +1F42B;ID # BACTRIAN CAMEL +1F42C;ID # DOLPHIN +1F42D;ID # MOUSE FACE +1F42E;ID # COW FACE +1F42F;ID # TIGER FACE +1F430;ID # RABBIT FACE +1F431;ID # CAT FACE +1F432;ID # DRAGON FACE +1F433;ID # SPOUTING WHALE +1F434;ID # HORSE FACE +1F435;ID # MONKEY FACE +1F436;ID # DOG FACE +1F437;ID # PIG FACE +1F438;ID # FROG FACE +1F439;ID # HAMSTER FACE +1F43A;ID # WOLF FACE +1F43B;ID # BEAR FACE +1F43C;ID # PANDA FACE +1F43D;ID # PIG NOSE +1F43E;ID # PAW PRINTS +1F440;ID # EYES +1F442;ID # EAR +1F443;ID # NOSE +1F444;ID # MOUTH +1F445;ID # TONGUE +1F446;ID # WHITE UP POINTING BACKHAND INDEX +1F447;ID # WHITE DOWN POINTING BACKHAND INDEX +1F448;ID # WHITE LEFT POINTING BACKHAND INDEX +1F449;ID # WHITE RIGHT POINTING BACKHAND INDEX +1F44A;ID # FISTED HAND SIGN +1F44B;ID # WAVING HAND SIGN +1F44C;ID # OK HAND SIGN +1F44D;ID # THUMBS UP SIGN +1F44E;ID # THUMBS DOWN SIGN +1F44F;ID # CLAPPING HANDS SIGN +1F450;ID # OPEN HANDS SIGN +1F451;ID # CROWN +1F452;ID # WOMANS HAT +1F453;ID # EYEGLASSES +1F454;ID # NECKTIE +1F455;ID # T-SHIRT +1F456;ID # JEANS +1F457;ID # DRESS +1F458;ID # KIMONO +1F459;ID # BIKINI +1F45A;ID # WOMANS CLOTHES +1F45B;ID # PURSE +1F45C;ID # HANDBAG +1F45D;ID # POUCH +1F45E;ID # MANS SHOE +1F45F;ID # ATHLETIC SHOE +1F460;ID # HIGH-HEELED SHOE +1F461;ID # WOMANS SANDAL +1F462;ID # WOMANS BOOTS +1F463;ID # FOOTPRINTS +1F464;ID # BUST IN SILHOUETTE +1F465;ID # BUSTS IN SILHOUETTE +1F466;ID # BOY +1F467;ID # GIRL +1F468;ID # MAN +1F469;ID # WOMAN +1F46A;ID # FAMILY +1F46B;ID # MAN AND WOMAN HOLDING HANDS +1F46C;ID # TWO MEN HOLDING HANDS +1F46D;ID # TWO WOMEN HOLDING HANDS +1F46E;ID # POLICE OFFICER +1F46F;ID # WOMAN WITH BUNNY EARS +1F470;ID # BRIDE WITH VEIL +1F471;ID # PERSON WITH BLOND HAIR +1F472;ID # MAN WITH GUA PI MAO +1F473;ID # MAN WITH TURBAN +1F474;ID # OLDER MAN +1F475;ID # OLDER WOMAN +1F476;ID # BABY +1F477;ID # CONSTRUCTION WORKER +1F478;ID # PRINCESS +1F479;ID # JAPANESE OGRE +1F47A;ID # JAPANESE GOBLIN +1F47B;ID # GHOST +1F47C;ID # BABY ANGEL +1F47D;ID # EXTRATERRESTRIAL ALIEN +1F47E;ID # ALIEN MONSTER +1F47F;ID # IMP +1F480;ID # SKULL +1F481;ID # INFORMATION DESK PERSON +1F482;ID # GUARDSMAN +1F483;ID # DANCER +1F484;ID # LIPSTICK +1F485;ID # NAIL POLISH +1F486;ID # FACE MASSAGE +1F487;ID # HAIRCUT +1F488;ID # BARBER POLE +1F489;ID # SYRINGE +1F48A;ID # PILL +1F48B;ID # KISS MARK +1F48C;ID # LOVE LETTER +1F48D;ID # RING +1F48E;ID # GEM STONE +1F48F;ID # KISS +1F490;ID # BOUQUET +1F491;ID # COUPLE WITH HEART +1F492;ID # WEDDING +1F493;ID # BEATING HEART +1F494;ID # BROKEN HEART +1F495;ID # TWO HEARTS +1F496;ID # SPARKLING HEART +1F497;ID # GROWING HEART +1F498;ID # HEART WITH ARROW +1F499;ID # BLUE HEART +1F49A;ID # GREEN HEART +1F49B;ID # YELLOW HEART +1F49C;ID # PURPLE HEART +1F49D;ID # HEART WITH RIBBON +1F49E;ID # REVOLVING HEARTS +1F49F;ID # HEART DECORATION 1F4A0;AL # DIAMOND SHAPE WITH A DOT INSIDE -1F4A1;AL # ELECTRIC LIGHT BULB +1F4A1;ID # ELECTRIC LIGHT BULB 1F4A2;AL # ANGER SYMBOL -1F4A3;AL # BOMB +1F4A3;ID # BOMB 1F4A4;AL # SLEEPING SYMBOL -1F4A5;AL # COLLISION SYMBOL -1F4A6;AL # SPLASHING SWEAT SYMBOL -1F4A7;AL # DROPLET -1F4A8;AL # DASH SYMBOL -1F4A9;AL # PILE OF POO -1F4AA;AL # FLEXED BICEPS -1F4AB;AL # DIZZY SYMBOL -1F4AC;AL # SPEECH BALLOON -1F4AD;AL # THOUGHT BALLOON -1F4AE;AL # WHITE FLOWER +1F4A5;ID # COLLISION SYMBOL +1F4A6;ID # SPLASHING SWEAT SYMBOL +1F4A7;ID # DROPLET +1F4A8;ID # DASH SYMBOL +1F4A9;ID # PILE OF POO +1F4AA;ID # FLEXED BICEPS +1F4AB;ID # DIZZY SYMBOL +1F4AC;ID # SPEECH BALLOON +1F4AD;ID # THOUGHT BALLOON +1F4AE;ID # WHITE FLOWER 1F4AF;AL # HUNDRED POINTS SYMBOL -1F4B0;AL # MONEY BAG +1F4B0;ID # MONEY BAG 1F4B1;AL # CURRENCY EXCHANGE 1F4B2;AL # HEAVY DOLLAR SIGN -1F4B3;AL # CREDIT CARD -1F4B4;AL # BANKNOTE WITH YEN SIGN -1F4B5;AL # BANKNOTE WITH DOLLAR SIGN -1F4B6;AL # BANKNOTE WITH EURO SIGN -1F4B7;AL # BANKNOTE WITH POUND SIGN -1F4B8;AL # MONEY WITH WINGS -1F4B9;AL # CHART WITH UPWARDS TREND AND YEN SIGN -1F4BA;AL # SEAT -1F4BB;AL # PERSONAL COMPUTER -1F4BC;AL # BRIEFCASE -1F4BD;AL # MINIDISC -1F4BE;AL # FLOPPY DISK -1F4BF;AL # OPTICAL DISC -1F4C0;AL # DVD -1F4C1;AL # FILE FOLDER -1F4C2;AL # OPEN FILE FOLDER -1F4C3;AL # PAGE WITH CURL -1F4C4;AL # PAGE FACING UP -1F4C5;AL # CALENDAR -1F4C6;AL # TEAR-OFF CALENDAR -1F4C7;AL # CARD INDEX -1F4C8;AL # CHART WITH UPWARDS TREND -1F4C9;AL # CHART WITH DOWNWARDS TREND -1F4CA;AL # BAR CHART -1F4CB;AL # CLIPBOARD -1F4CC;AL # PUSHPIN -1F4CD;AL # ROUND PUSHPIN -1F4CE;AL # PAPERCLIP -1F4CF;AL # STRAIGHT RULER -1F4D0;AL # TRIANGULAR RULER -1F4D1;AL # BOOKMARK TABS -1F4D2;AL # LEDGER -1F4D3;AL # NOTEBOOK -1F4D4;AL # NOTEBOOK WITH DECORATIVE COVER -1F4D5;AL # CLOSED BOOK -1F4D6;AL # OPEN BOOK -1F4D7;AL # GREEN BOOK -1F4D8;AL # BLUE BOOK -1F4D9;AL # ORANGE BOOK -1F4DA;AL # BOOKS -1F4DB;AL # NAME BADGE -1F4DC;AL # SCROLL -1F4DD;AL # MEMO -1F4DE;AL # TELEPHONE RECEIVER -1F4DF;AL # PAGER -1F4E0;AL # FAX MACHINE -1F4E1;AL # SATELLITE ANTENNA -1F4E2;AL # PUBLIC ADDRESS LOUDSPEAKER -1F4E3;AL # CHEERING MEGAPHONE -1F4E4;AL # OUTBOX TRAY -1F4E5;AL # INBOX TRAY -1F4E6;AL # PACKAGE -1F4E7;AL # E-MAIL SYMBOL -1F4E8;AL # INCOMING ENVELOPE -1F4E9;AL # ENVELOPE WITH DOWNWARDS ARROW ABOVE -1F4EA;AL # CLOSED MAILBOX WITH LOWERED FLAG -1F4EB;AL # CLOSED MAILBOX WITH RAISED FLAG -1F4EC;AL # OPEN MAILBOX WITH RAISED FLAG -1F4ED;AL # OPEN MAILBOX WITH LOWERED FLAG -1F4EE;AL # POSTBOX -1F4EF;AL # POSTAL HORN -1F4F0;AL # NEWSPAPER -1F4F1;AL # MOBILE PHONE -1F4F2;AL # MOBILE PHONE WITH RIGHTWARDS ARROW AT LEFT -1F4F3;AL # VIBRATION MODE -1F4F4;AL # MOBILE PHONE OFF -1F4F5;AL # NO MOBILE PHONES -1F4F6;AL # ANTENNA WITH BARS -1F4F7;AL # CAMERA -1F4F9;AL # VIDEO CAMERA -1F4FA;AL # TELEVISION -1F4FB;AL # RADIO -1F4FC;AL # VIDEOCASSETTE +1F4B3;ID # CREDIT CARD +1F4B4;ID # BANKNOTE WITH YEN SIGN +1F4B5;ID # BANKNOTE WITH DOLLAR SIGN +1F4B6;ID # BANKNOTE WITH EURO SIGN +1F4B7;ID # BANKNOTE WITH POUND SIGN +1F4B8;ID # MONEY WITH WINGS +1F4B9;ID # CHART WITH UPWARDS TREND AND YEN SIGN +1F4BA;ID # SEAT +1F4BB;ID # PERSONAL COMPUTER +1F4BC;ID # BRIEFCASE +1F4BD;ID # MINIDISC +1F4BE;ID # FLOPPY DISK +1F4BF;ID # OPTICAL DISC +1F4C0;ID # DVD +1F4C1;ID # FILE FOLDER +1F4C2;ID # OPEN FILE FOLDER +1F4C3;ID # PAGE WITH CURL +1F4C4;ID # PAGE FACING UP +1F4C5;ID # CALENDAR +1F4C6;ID # TEAR-OFF CALENDAR +1F4C7;ID # CARD INDEX +1F4C8;ID # CHART WITH UPWARDS TREND +1F4C9;ID # CHART WITH DOWNWARDS TREND +1F4CA;ID # BAR CHART +1F4CB;ID # CLIPBOARD +1F4CC;ID # PUSHPIN +1F4CD;ID # ROUND PUSHPIN +1F4CE;ID # PAPERCLIP +1F4CF;ID # STRAIGHT RULER +1F4D0;ID # TRIANGULAR RULER +1F4D1;ID # BOOKMARK TABS +1F4D2;ID # LEDGER +1F4D3;ID # NOTEBOOK +1F4D4;ID # NOTEBOOK WITH DECORATIVE COVER +1F4D5;ID # CLOSED BOOK +1F4D6;ID # OPEN BOOK +1F4D7;ID # GREEN BOOK +1F4D8;ID # BLUE BOOK +1F4D9;ID # ORANGE BOOK +1F4DA;ID # BOOKS +1F4DB;ID # NAME BADGE +1F4DC;ID # SCROLL +1F4DD;ID # MEMO +1F4DE;ID # TELEPHONE RECEIVER +1F4DF;ID # PAGER +1F4E0;ID # FAX MACHINE +1F4E1;ID # SATELLITE ANTENNA +1F4E2;ID # PUBLIC ADDRESS LOUDSPEAKER +1F4E3;ID # CHEERING MEGAPHONE +1F4E4;ID # OUTBOX TRAY +1F4E5;ID # INBOX TRAY +1F4E6;ID # PACKAGE +1F4E7;ID # E-MAIL SYMBOL +1F4E8;ID # INCOMING ENVELOPE +1F4E9;ID # ENVELOPE WITH DOWNWARDS ARROW ABOVE +1F4EA;ID # CLOSED MAILBOX WITH LOWERED FLAG +1F4EB;ID # CLOSED MAILBOX WITH RAISED FLAG +1F4EC;ID # OPEN MAILBOX WITH RAISED FLAG +1F4ED;ID # OPEN MAILBOX WITH LOWERED FLAG +1F4EE;ID # POSTBOX +1F4EF;ID # POSTAL HORN +1F4F0;ID # NEWSPAPER +1F4F1;ID # MOBILE PHONE +1F4F2;ID # MOBILE PHONE WITH RIGHTWARDS ARROW AT LEFT +1F4F3;ID # VIBRATION MODE +1F4F4;ID # MOBILE PHONE OFF +1F4F5;ID # NO MOBILE PHONES +1F4F6;ID # ANTENNA WITH BARS +1F4F7;ID # CAMERA +1F4F9;ID # VIDEO CAMERA +1F4FA;ID # TELEVISION +1F4FB;ID # RADIO +1F4FC;ID # VIDEOCASSETTE 1F500;AL # TWISTED RIGHTWARDS ARROWS 1F501;AL # CLOCKWISE RIGHTWARDS AND LEFTWARDS OPEN CIRCLE ARROWS 1F502;AL # CLOCKWISE RIGHTWARDS AND LEFTWARDS OPEN CIRCLE ARROWS WITH CIRCLED ONE OVERLAY @@ -24026,22 +24027,22 @@ FFFD;AI # REPLACEMENT CHARACTER 1F504;AL # ANTICLOCKWISE DOWNWARDS AND UPWARDS OPEN CIRCLE ARROWS 1F505;AL # LOW BRIGHTNESS SYMBOL 1F506;AL # HIGH BRIGHTNESS SYMBOL -1F507;AL # SPEAKER WITH CANCELLATION STROKE -1F508;AL # SPEAKER -1F509;AL # SPEAKER WITH ONE SOUND WAVE -1F50A;AL # SPEAKER WITH THREE SOUND WAVES -1F50B;AL # BATTERY -1F50C;AL # ELECTRIC PLUG -1F50D;AL # LEFT-POINTING MAGNIFYING GLASS -1F50E;AL # RIGHT-POINTING MAGNIFYING GLASS -1F50F;AL # LOCK WITH INK PEN -1F510;AL # CLOSED LOCK WITH KEY -1F511;AL # KEY -1F512;AL # LOCK -1F513;AL # OPEN LOCK -1F514;AL # BELL -1F515;AL # BELL WITH CANCELLATION STROKE -1F516;AL # BOOKMARK +1F507;ID # SPEAKER WITH CANCELLATION STROKE +1F508;ID # SPEAKER +1F509;ID # SPEAKER WITH ONE SOUND WAVE +1F50A;ID # SPEAKER WITH THREE SOUND WAVES +1F50B;ID # BATTERY +1F50C;ID # ELECTRIC PLUG +1F50D;ID # LEFT-POINTING MAGNIFYING GLASS +1F50E;ID # RIGHT-POINTING MAGNIFYING GLASS +1F50F;ID # LOCK WITH INK PEN +1F510;ID # CLOSED LOCK WITH KEY +1F511;ID # KEY +1F512;ID # LOCK +1F513;ID # OPEN LOCK +1F514;ID # BELL +1F515;ID # BELL WITH CANCELLATION STROKE +1F516;ID # BOOKMARK 1F517;AL # LINK SYMBOL 1F518;AL # RADIO BUTTON 1F519;AL # BACK WITH LEFTWARDS ARROW ABOVE @@ -24056,19 +24057,19 @@ FFFD;AI # REPLACEMENT CHARACTER 1F522;AL # INPUT SYMBOL FOR NUMBERS 1F523;AL # INPUT SYMBOL FOR SYMBOLS 1F524;AL # INPUT SYMBOL FOR LATIN LETTERS -1F525;AL # FIRE -1F526;AL # ELECTRIC TORCH -1F527;AL # WRENCH -1F528;AL # HAMMER -1F529;AL # NUT AND BOLT -1F52A;AL # HOCHO -1F52B;AL # PISTOL -1F52C;AL # MICROSCOPE -1F52D;AL # TELESCOPE -1F52E;AL # CRYSTAL BALL -1F52F;AL # SIX POINTED STAR WITH MIDDLE DOT -1F530;AL # JAPANESE SYMBOL FOR BEGINNER -1F531;AL # TRIDENT EMBLEM +1F525;ID # FIRE +1F526;ID # ELECTRIC TORCH +1F527;ID # WRENCH +1F528;ID # HAMMER +1F529;ID # NUT AND BOLT +1F52A;ID # HOCHO +1F52B;ID # PISTOL +1F52C;ID # MICROSCOPE +1F52D;ID # TELESCOPE +1F52E;ID # CRYSTAL BALL +1F52F;ID # SIX POINTED STAR WITH MIDDLE DOT +1F530;ID # JAPANESE SYMBOL FOR BEGINNER +1F531;ID # TRIDENT EMBLEM 1F532;AL # BLACK SQUARE BUTTON 1F533;AL # WHITE SQUARE BUTTON 1F534;AL # LARGE RED CIRCLE @@ -24085,181 +24086,181 @@ FFFD;AI # REPLACEMENT CHARACTER 1F541;AL # CROSS POMMEE WITH HALF-CIRCLE BELOW 1F542;AL # CROSS POMMEE 1F543;AL # NOTCHED LEFT SEMICIRCLE WITH THREE DOTS -1F550;AL # CLOCK FACE ONE OCLOCK -1F551;AL # CLOCK FACE TWO OCLOCK -1F552;AL # CLOCK FACE THREE OCLOCK -1F553;AL # CLOCK FACE FOUR OCLOCK -1F554;AL # CLOCK FACE FIVE OCLOCK -1F555;AL # CLOCK FACE SIX OCLOCK -1F556;AL # CLOCK FACE SEVEN OCLOCK -1F557;AL # CLOCK FACE EIGHT OCLOCK -1F558;AL # CLOCK FACE NINE OCLOCK -1F559;AL # CLOCK FACE TEN OCLOCK -1F55A;AL # CLOCK FACE ELEVEN OCLOCK -1F55B;AL # CLOCK FACE TWELVE OCLOCK -1F55C;AL # CLOCK FACE ONE-THIRTY -1F55D;AL # CLOCK FACE TWO-THIRTY -1F55E;AL # CLOCK FACE THREE-THIRTY -1F55F;AL # CLOCK FACE FOUR-THIRTY -1F560;AL # CLOCK FACE FIVE-THIRTY -1F561;AL # CLOCK FACE SIX-THIRTY -1F562;AL # CLOCK FACE SEVEN-THIRTY -1F563;AL # CLOCK FACE EIGHT-THIRTY -1F564;AL # CLOCK FACE NINE-THIRTY -1F565;AL # CLOCK FACE TEN-THIRTY -1F566;AL # CLOCK FACE ELEVEN-THIRTY -1F567;AL # CLOCK FACE TWELVE-THIRTY -1F5FB;AL # MOUNT FUJI -1F5FC;AL # TOKYO TOWER -1F5FD;AL # STATUE OF LIBERTY -1F5FE;AL # SILHOUETTE OF JAPAN -1F5FF;AL # MOYAI -1F600;AL # GRINNING FACE -1F601;AL # GRINNING FACE WITH SMILING EYES -1F602;AL # FACE WITH TEARS OF JOY -1F603;AL # SMILING FACE WITH OPEN MOUTH -1F604;AL # SMILING FACE WITH OPEN MOUTH AND SMILING EYES -1F605;AL # SMILING FACE WITH OPEN MOUTH AND COLD SWEAT -1F606;AL # SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES -1F607;AL # SMILING FACE WITH HALO -1F608;AL # SMILING FACE WITH HORNS -1F609;AL # WINKING FACE -1F60A;AL # SMILING FACE WITH SMILING EYES -1F60B;AL # FACE SAVOURING DELICIOUS FOOD -1F60C;AL # RELIEVED FACE -1F60D;AL # SMILING FACE WITH HEART-SHAPED EYES -1F60E;AL # SMILING FACE WITH SUNGLASSES -1F60F;AL # SMIRKING FACE -1F610;AL # NEUTRAL FACE -1F611;AL # EXPRESSIONLESS FACE -1F612;AL # UNAMUSED FACE -1F613;AL # FACE WITH COLD SWEAT -1F614;AL # PENSIVE FACE -1F615;AL # CONFUSED FACE -1F616;AL # CONFOUNDED FACE -1F617;AL # KISSING FACE -1F618;AL # FACE THROWING A KISS -1F619;AL # KISSING FACE WITH SMILING EYES -1F61A;AL # KISSING FACE WITH CLOSED EYES -1F61B;AL # FACE WITH STUCK-OUT TONGUE -1F61C;AL # FACE WITH STUCK-OUT TONGUE AND WINKING EYE -1F61D;AL # FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES -1F61E;AL # DISAPPOINTED FACE -1F61F;AL # WORRIED FACE -1F620;AL # ANGRY FACE -1F621;AL # POUTING FACE -1F622;AL # CRYING FACE -1F623;AL # PERSEVERING FACE -1F624;AL # FACE WITH LOOK OF TRIUMPH -1F625;AL # DISAPPOINTED BUT RELIEVED FACE -1F626;AL # FROWNING FACE WITH OPEN MOUTH -1F627;AL # ANGUISHED FACE -1F628;AL # FEARFUL FACE -1F629;AL # WEARY FACE -1F62A;AL # SLEEPY FACE -1F62B;AL # TIRED FACE -1F62C;AL # GRIMACING FACE -1F62D;AL # LOUDLY CRYING FACE -1F62E;AL # FACE WITH OPEN MOUTH -1F62F;AL # HUSHED FACE -1F630;AL # FACE WITH OPEN MOUTH AND COLD SWEAT -1F631;AL # FACE SCREAMING IN FEAR -1F632;AL # ASTONISHED FACE -1F633;AL # FLUSHED FACE -1F634;AL # SLEEPING FACE -1F635;AL # DIZZY FACE -1F636;AL # FACE WITHOUT MOUTH -1F637;AL # FACE WITH MEDICAL MASK -1F638;AL # GRINNING CAT FACE WITH SMILING EYES -1F639;AL # CAT FACE WITH TEARS OF JOY -1F63A;AL # SMILING CAT FACE WITH OPEN MOUTH -1F63B;AL # SMILING CAT FACE WITH HEART-SHAPED EYES -1F63C;AL # CAT FACE WITH WRY SMILE -1F63D;AL # KISSING CAT FACE WITH CLOSED EYES -1F63E;AL # POUTING CAT FACE -1F63F;AL # CRYING CAT FACE -1F640;AL # WEARY CAT FACE -1F645;AL # FACE WITH NO GOOD GESTURE -1F646;AL # FACE WITH OK GESTURE -1F647;AL # PERSON BOWING DEEPLY -1F648;AL # SEE-NO-EVIL MONKEY -1F649;AL # HEAR-NO-EVIL MONKEY -1F64A;AL # SPEAK-NO-EVIL MONKEY -1F64B;AL # HAPPY PERSON RAISING ONE HAND -1F64C;AL # PERSON RAISING BOTH HANDS IN CELEBRATION -1F64D;AL # PERSON FROWNING -1F64E;AL # PERSON WITH POUTING FACE -1F64F;AL # PERSON WITH FOLDED HANDS -1F680;AL # ROCKET -1F681;AL # HELICOPTER -1F682;AL # STEAM LOCOMOTIVE -1F683;AL # RAILWAY CAR -1F684;AL # HIGH-SPEED TRAIN -1F685;AL # HIGH-SPEED TRAIN WITH BULLET NOSE -1F686;AL # TRAIN -1F687;AL # METRO -1F688;AL # LIGHT RAIL -1F689;AL # STATION -1F68A;AL # TRAM -1F68B;AL # TRAM CAR -1F68C;AL # BUS -1F68D;AL # ONCOMING BUS -1F68E;AL # TROLLEYBUS -1F68F;AL # BUS STOP -1F690;AL # MINIBUS -1F691;AL # AMBULANCE -1F692;AL # FIRE ENGINE -1F693;AL # POLICE CAR -1F694;AL # ONCOMING POLICE CAR -1F695;AL # TAXI -1F696;AL # ONCOMING TAXI -1F697;AL # AUTOMOBILE -1F698;AL # ONCOMING AUTOMOBILE -1F699;AL # RECREATIONAL VEHICLE -1F69A;AL # DELIVERY TRUCK -1F69B;AL # ARTICULATED LORRY -1F69C;AL # TRACTOR -1F69D;AL # MONORAIL -1F69E;AL # MOUNTAIN RAILWAY -1F69F;AL # SUSPENSION RAILWAY -1F6A0;AL # MOUNTAIN CABLEWAY -1F6A1;AL # AERIAL TRAMWAY -1F6A2;AL # SHIP -1F6A3;AL # ROWBOAT -1F6A4;AL # SPEEDBOAT -1F6A5;AL # HORIZONTAL TRAFFIC LIGHT -1F6A6;AL # VERTICAL TRAFFIC LIGHT -1F6A7;AL # CONSTRUCTION SIGN -1F6A8;AL # POLICE CARS REVOLVING LIGHT -1F6A9;AL # TRIANGULAR FLAG ON POST -1F6AA;AL # DOOR -1F6AB;AL # NO ENTRY SIGN -1F6AC;AL # SMOKING SYMBOL -1F6AD;AL # NO SMOKING SYMBOL -1F6AE;AL # PUT LITTER IN ITS PLACE SYMBOL -1F6AF;AL # DO NOT LITTER SYMBOL -1F6B0;AL # POTABLE WATER SYMBOL -1F6B1;AL # NON-POTABLE WATER SYMBOL -1F6B2;AL # BICYCLE -1F6B3;AL # NO BICYCLES -1F6B4;AL # BICYCLIST -1F6B5;AL # MOUNTAIN BICYCLIST -1F6B6;AL # PEDESTRIAN -1F6B7;AL # NO PEDESTRIANS -1F6B8;AL # CHILDREN CROSSING -1F6B9;AL # MENS SYMBOL -1F6BA;AL # WOMENS SYMBOL -1F6BB;AL # RESTROOM -1F6BC;AL # BABY SYMBOL -1F6BD;AL # TOILET -1F6BE;AL # WATER CLOSET -1F6BF;AL # SHOWER -1F6C0;AL # BATH -1F6C1;AL # BATHTUB -1F6C2;AL # PASSPORT CONTROL -1F6C3;AL # CUSTOMS -1F6C4;AL # BAGGAGE CLAIM -1F6C5;AL # LEFT LUGGAGE +1F550;ID # CLOCK FACE ONE OCLOCK +1F551;ID # CLOCK FACE TWO OCLOCK +1F552;ID # CLOCK FACE THREE OCLOCK +1F553;ID # CLOCK FACE FOUR OCLOCK +1F554;ID # CLOCK FACE FIVE OCLOCK +1F555;ID # CLOCK FACE SIX OCLOCK +1F556;ID # CLOCK FACE SEVEN OCLOCK +1F557;ID # CLOCK FACE EIGHT OCLOCK +1F558;ID # CLOCK FACE NINE OCLOCK +1F559;ID # CLOCK FACE TEN OCLOCK +1F55A;ID # CLOCK FACE ELEVEN OCLOCK +1F55B;ID # CLOCK FACE TWELVE OCLOCK +1F55C;ID # CLOCK FACE ONE-THIRTY +1F55D;ID # CLOCK FACE TWO-THIRTY +1F55E;ID # CLOCK FACE THREE-THIRTY +1F55F;ID # CLOCK FACE FOUR-THIRTY +1F560;ID # CLOCK FACE FIVE-THIRTY +1F561;ID # CLOCK FACE SIX-THIRTY +1F562;ID # CLOCK FACE SEVEN-THIRTY +1F563;ID # CLOCK FACE EIGHT-THIRTY +1F564;ID # CLOCK FACE NINE-THIRTY +1F565;ID # CLOCK FACE TEN-THIRTY +1F566;ID # CLOCK FACE ELEVEN-THIRTY +1F567;ID # CLOCK FACE TWELVE-THIRTY +1F5FB;ID # MOUNT FUJI +1F5FC;ID # TOKYO TOWER +1F5FD;ID # STATUE OF LIBERTY +1F5FE;ID # SILHOUETTE OF JAPAN +1F5FF;ID # MOYAI +1F600;ID # GRINNING FACE +1F601;ID # GRINNING FACE WITH SMILING EYES +1F602;ID # FACE WITH TEARS OF JOY +1F603;ID # SMILING FACE WITH OPEN MOUTH +1F604;ID # SMILING FACE WITH OPEN MOUTH AND SMILING EYES +1F605;ID # SMILING FACE WITH OPEN MOUTH AND COLD SWEAT +1F606;ID # SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES +1F607;ID # SMILING FACE WITH HALO +1F608;ID # SMILING FACE WITH HORNS +1F609;ID # WINKING FACE +1F60A;ID # SMILING FACE WITH SMILING EYES +1F60B;ID # FACE SAVOURING DELICIOUS FOOD +1F60C;ID # RELIEVED FACE +1F60D;ID # SMILING FACE WITH HEART-SHAPED EYES +1F60E;ID # SMILING FACE WITH SUNGLASSES +1F60F;ID # SMIRKING FACE +1F610;ID # NEUTRAL FACE +1F611;ID # EXPRESSIONLESS FACE +1F612;ID # UNAMUSED FACE +1F613;ID # FACE WITH COLD SWEAT +1F614;ID # PENSIVE FACE +1F615;ID # CONFUSED FACE +1F616;ID # CONFOUNDED FACE +1F617;ID # KISSING FACE +1F618;ID # FACE THROWING A KISS +1F619;ID # KISSING FACE WITH SMILING EYES +1F61A;ID # KISSING FACE WITH CLOSED EYES +1F61B;ID # FACE WITH STUCK-OUT TONGUE +1F61C;ID # FACE WITH STUCK-OUT TONGUE AND WINKING EYE +1F61D;ID # FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES +1F61E;ID # DISAPPOINTED FACE +1F61F;ID # WORRIED FACE +1F620;ID # ANGRY FACE +1F621;ID # POUTING FACE +1F622;ID # CRYING FACE +1F623;ID # PERSEVERING FACE +1F624;ID # FACE WITH LOOK OF TRIUMPH +1F625;ID # DISAPPOINTED BUT RELIEVED FACE +1F626;ID # FROWNING FACE WITH OPEN MOUTH +1F627;ID # ANGUISHED FACE +1F628;ID # FEARFUL FACE +1F629;ID # WEARY FACE +1F62A;ID # SLEEPY FACE +1F62B;ID # TIRED FACE +1F62C;ID # GRIMACING FACE +1F62D;ID # LOUDLY CRYING FACE +1F62E;ID # FACE WITH OPEN MOUTH +1F62F;ID # HUSHED FACE +1F630;ID # FACE WITH OPEN MOUTH AND COLD SWEAT +1F631;ID # FACE SCREAMING IN FEAR +1F632;ID # ASTONISHED FACE +1F633;ID # FLUSHED FACE +1F634;ID # SLEEPING FACE +1F635;ID # DIZZY FACE +1F636;ID # FACE WITHOUT MOUTH +1F637;ID # FACE WITH MEDICAL MASK +1F638;ID # GRINNING CAT FACE WITH SMILING EYES +1F639;ID # CAT FACE WITH TEARS OF JOY +1F63A;ID # SMILING CAT FACE WITH OPEN MOUTH +1F63B;ID # SMILING CAT FACE WITH HEART-SHAPED EYES +1F63C;ID # CAT FACE WITH WRY SMILE +1F63D;ID # KISSING CAT FACE WITH CLOSED EYES +1F63E;ID # POUTING CAT FACE +1F63F;ID # CRYING CAT FACE +1F640;ID # WEARY CAT FACE +1F645;ID # FACE WITH NO GOOD GESTURE +1F646;ID # FACE WITH OK GESTURE +1F647;ID # PERSON BOWING DEEPLY +1F648;ID # SEE-NO-EVIL MONKEY +1F649;ID # HEAR-NO-EVIL MONKEY +1F64A;ID # SPEAK-NO-EVIL MONKEY +1F64B;ID # HAPPY PERSON RAISING ONE HAND +1F64C;ID # PERSON RAISING BOTH HANDS IN CELEBRATION +1F64D;ID # PERSON FROWNING +1F64E;ID # PERSON WITH POUTING FACE +1F64F;ID # PERSON WITH FOLDED HANDS +1F680;ID # ROCKET +1F681;ID # HELICOPTER +1F682;ID # STEAM LOCOMOTIVE +1F683;ID # RAILWAY CAR +1F684;ID # HIGH-SPEED TRAIN +1F685;ID # HIGH-SPEED TRAIN WITH BULLET NOSE +1F686;ID # TRAIN +1F687;ID # METRO +1F688;ID # LIGHT RAIL +1F689;ID # STATION +1F68A;ID # TRAM +1F68B;ID # TRAM CAR +1F68C;ID # BUS +1F68D;ID # ONCOMING BUS +1F68E;ID # TROLLEYBUS +1F68F;ID # BUS STOP +1F690;ID # MINIBUS +1F691;ID # AMBULANCE +1F692;ID # FIRE ENGINE +1F693;ID # POLICE CAR +1F694;ID # ONCOMING POLICE CAR +1F695;ID # TAXI +1F696;ID # ONCOMING TAXI +1F697;ID # AUTOMOBILE +1F698;ID # ONCOMING AUTOMOBILE +1F699;ID # RECREATIONAL VEHICLE +1F69A;ID # DELIVERY TRUCK +1F69B;ID # ARTICULATED LORRY +1F69C;ID # TRACTOR +1F69D;ID # MONORAIL +1F69E;ID # MOUNTAIN RAILWAY +1F69F;ID # SUSPENSION RAILWAY +1F6A0;ID # MOUNTAIN CABLEWAY +1F6A1;ID # AERIAL TRAMWAY +1F6A2;ID # SHIP +1F6A3;ID # ROWBOAT +1F6A4;ID # SPEEDBOAT +1F6A5;ID # HORIZONTAL TRAFFIC LIGHT +1F6A6;ID # VERTICAL TRAFFIC LIGHT +1F6A7;ID # CONSTRUCTION SIGN +1F6A8;ID # POLICE CARS REVOLVING LIGHT +1F6A9;ID # TRIANGULAR FLAG ON POST +1F6AA;ID # DOOR +1F6AB;ID # NO ENTRY SIGN +1F6AC;ID # SMOKING SYMBOL +1F6AD;ID # NO SMOKING SYMBOL +1F6AE;ID # PUT LITTER IN ITS PLACE SYMBOL +1F6AF;ID # DO NOT LITTER SYMBOL +1F6B0;ID # POTABLE WATER SYMBOL +1F6B1;ID # NON-POTABLE WATER SYMBOL +1F6B2;ID # BICYCLE +1F6B3;ID # NO BICYCLES +1F6B4;ID # BICYCLIST +1F6B5;ID # MOUNTAIN BICYCLIST +1F6B6;ID # PEDESTRIAN +1F6B7;ID # NO PEDESTRIANS +1F6B8;ID # CHILDREN CROSSING +1F6B9;ID # MENS SYMBOL +1F6BA;ID # WOMENS SYMBOL +1F6BB;ID # RESTROOM +1F6BC;ID # BABY SYMBOL +1F6BD;ID # TOILET +1F6BE;ID # WATER CLOSET +1F6BF;ID # SHOWER +1F6C0;ID # BATH +1F6C1;ID # BATHTUB +1F6C2;ID # PASSPORT CONTROL +1F6C3;ID # CUSTOMS +1F6C4;ID # BAGGAGE CLAIM +1F6C5;ID # LEFT LUGGAGE 1F700;AL # ALCHEMICAL SYMBOL FOR QUINTESSENCE 1F701;AL # ALCHEMICAL SYMBOL FOR AIR 1F702;AL # ALCHEMICAL SYMBOL FOR FIRE diff --git a/gnu/usr.bin/perl/lib/unicore/NameAliases.txt b/gnu/usr.bin/perl/lib/unicore/NameAliases.txt index 39926200965..482fb92be4e 100644 --- a/gnu/usr.bin/perl/lib/unicore/NameAliases.txt +++ b/gnu/usr.bin/perl/lib/unicore/NameAliases.txt @@ -1,5 +1,5 @@ -# NameAliases-6.1.0.txt -# Date: 2012-01-03, 21:52:00 GMT [KW] +# NameAliases-6.2.0.txt +# Date: 2012-05-15, 18:44:00 GMT [KW] # # This file is a normative contributory data file in the # Unicode Character Database. @@ -216,6 +216,7 @@ 01A2;LATIN CAPITAL LETTER GHA;correction 01A3;LATIN SMALL LETTER GHA;correction 034F;CGJ;abbreviation +0709;SYRIAC SUBLINEAR COLON SKEWED LEFT;correction 0CDE;KANNADA LETTER LLLA;correction 0E9D;LAO LETTER FO FON;correction 0E9F;LAO LETTER FO FAY;correction diff --git a/gnu/usr.bin/perl/lib/unicore/NamedSequences.txt b/gnu/usr.bin/perl/lib/unicore/NamedSequences.txt index e14c39505b1..040acdb6c37 100644 --- a/gnu/usr.bin/perl/lib/unicore/NamedSequences.txt +++ b/gnu/usr.bin/perl/lib/unicore/NamedSequences.txt @@ -1,8 +1,8 @@ -# NamedSequences-6.1.0.txt -# Date: 2011-07-26, 19:47:00 GMT [KW] +# NamedSequences-6.2.0.txt +# Date: 2012-05-15, 21:23:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -112,8 +112,8 @@ BENGALI LETTER KHINYA;0995 09CD 09B7 # Additions for Tamil. Provisional 2008-02-08, Approved 2009-08-14 # # A visual display of the Tamil named sequences is available -# in the documentation for Unicode 5.2. See: -# http://www.unicode.org/versions/Unicode5.2.0/ +# in the documentation for the Unicode Standard. See Section 9.6, Tamil in +# http://www.unicode.org/versions/latest/ TAMIL CONSONANT K; 0B95 0BCD TAMIL CONSONANT NG; 0B99 0BCD @@ -500,3 +500,5 @@ KATAKANA LETTER AINU TO;30C8 309A KATAKANA LETTER AINU P;31F7 309A MODIFIER LETTER EXTRA-HIGH EXTRA-LOW CONTOUR TONE BAR;02E5 02E9 MODIFIER LETTER EXTRA-LOW EXTRA-HIGH CONTOUR TONE BAR;02E9 02E5 + +# EOF diff --git a/gnu/usr.bin/perl/lib/unicore/NamedSqProv.txt b/gnu/usr.bin/perl/lib/unicore/NamedSqProv.txt index c7561948f83..7d87629e014 100644 --- a/gnu/usr.bin/perl/lib/unicore/NamedSqProv.txt +++ b/gnu/usr.bin/perl/lib/unicore/NamedSqProv.txt @@ -1,8 +1,8 @@ -# NamedSequencesProv-6.1.0.txt -# Date: 2011-07-26, 19:46:00 GMT [KW] +# NamedSequencesProv-6.2.0.txt +# Date: 2012-05-15, 21:29:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -54,3 +54,4 @@ # the sequence, based on the Lithuanian additions accepted # for Unicode 5.0. +# EOF diff --git a/gnu/usr.bin/perl/lib/unicore/NamesList.txt b/gnu/usr.bin/perl/lib/unicore/NamesList.txt index 19ecbdd55aa..17bd45b0a4c 100644 --- a/gnu/usr.bin/perl/lib/unicore/NamesList.txt +++ b/gnu/usr.bin/perl/lib/unicore/NamesList.txt @@ -1,6 +1,10 @@ -@@@ The Unicode Standard 6.1 -@@@+ U61M111117.lst - Final Unicode 6.1 names list. +; charset=UTF-8 +@@@ The Unicode Standard 6.2 +@@@+ U62M120808.lst + Unicode 6.2 names list, third delta (converted to UTF-8). + Updated more annotations in the Bengali block. + Added clarifying annotations for 0342. + Updated alias for 1110E. This file is semi-automatically derived from UnicodeData.txt and a set of manually created annotations using a script to select or suppress information from the data file. The rules used @@ -117,11 +121,15 @@ = pound sign, hash, crosshatch, octothorpe x (l b bar symbol - 2114) x (music sharp sign - 266F) + ~ 0023 FE0E text style + ~ 0023 FE0F emoji style 0024 DOLLAR SIGN - = milreis, escudo + = milréis, escudo + * used for many peso currencies in Latin America and elsewhere * glyph may have one or two vertical bars - * other currency symbol characters: 20A0-20B9 + * other currency symbol characters: 20A0-20BA x (currency sign - 00A4) + x (peso sign - 20B1) x (heavy dollar sign - 1F4B2) 0025 PERCENT SIGN x (arabic percent sign - 066A) @@ -184,15 +192,35 @@ x (division slash - 2215) @ ASCII digits 0030 DIGIT ZERO + ~ 0030 FE0E text style + ~ 0030 FE0F emoji style 0031 DIGIT ONE + ~ 0031 FE0E text style + ~ 0031 FE0F emoji style 0032 DIGIT TWO + ~ 0032 FE0E text style + ~ 0032 FE0F emoji style 0033 DIGIT THREE + ~ 0033 FE0E text style + ~ 0033 FE0F emoji style 0034 DIGIT FOUR + ~ 0034 FE0E text style + ~ 0034 FE0F emoji style 0035 DIGIT FIVE + ~ 0035 FE0E text style + ~ 0035 FE0F emoji style 0036 DIGIT SIX + ~ 0036 FE0E text style + ~ 0036 FE0F emoji style 0037 DIGIT SEVEN + ~ 0037 FE0E text style + ~ 0037 FE0F emoji style 0038 DIGIT EIGHT + ~ 0038 FE0E text style + ~ 0038 FE0F emoji style 0039 DIGIT NINE + ~ 0039 FE0E text style + ~ 0039 FE0F emoji style @ ASCII punctuation and symbols 003A COLON x (armenian full stop - 0589) @@ -381,6 +409,7 @@ @ Control character 007F <control> = DELETE +@~ Standardized Variation Sequences @@ 0080 C1 Controls and Latin-1 Supplement (Latin-1 Supplement) 00FF @ C1 controls @+ Alias names are those for ISO/IEC 6429:1992. @@ -464,9 +493,10 @@ 00A3 POUND SIGN = pound sterling, Irish punt, Italian lira, Turkish lira, etc. x (lira sign - 20A4) + x (turkish lira sign - 20BA) x (roman semuncia sign - 10192) 00A4 CURRENCY SIGN - * other currency symbol characters: 20A0-20B9 + * other currency symbol characters: 20A0-20BA x (dollar sign - 0024) 00A5 YEN SIGN = yuan sign @@ -677,7 +707,7 @@ : 0061 030A 00E6 LATIN SMALL LETTER AE = latin small ligature ae (1.0) - = ash (from Old English æsc) + = ash (from Old English æsc) * Danish, Norwegian, Icelandic, Faroese, Old English, French, IPA x (latin small ligature oe - 0153) x (cyrillic small ligature a ie - 04D5) @@ -981,7 +1011,7 @@ : 006F 030B 0152 LATIN CAPITAL LIGATURE OE 0153 LATIN SMALL LIGATURE OE - = ethel (from Old English eðel) + = ethel (from Old English eðel) * French, IPA, Old Icelandic, Old English, ... x (latin small letter ae - 00E6) x (latin letter small capital oe - 0276) @@ -2021,11 +2051,13 @@ 02B9 MODIFIER LETTER PRIME * primary stress, emphasis * transliteration of mjagkij znak (Cyrillic soft sign: palatalization) + * transliteration of Hebrew geresh x (apostrophe - 0027) x (acute accent - 00B4) x (modifier letter acute accent - 02CA) x (combining acute accent - 0301) x (greek numeral sign - 0374) + x (hebrew punctuation geresh - 05F3) x (prime - 2032) 02BA MODIFIER LETTER DOUBLE PRIME * exaggerated stress, contrastive stress @@ -2060,11 +2092,15 @@ x (single high-reversed-9 quotation mark - 201B) 02BE MODIFIER LETTER RIGHT HALF RING * transliteration of Arabic hamza (glottal stop) + * transliteration of Hebrew alef x (armenian apostrophe - 055A) + x (hebrew letter alef - 05D0) x (arabic letter hamza - 0621) 02BF MODIFIER LETTER LEFT HALF RING * transliteration of Arabic ain (voiced pharyngeal fricative) + * transliteration of Hebrew ayin x (armenian modifier letter left half ring - 0559) + x (hebrew letter ayin - 05E2) x (arabic letter ain - 0639) 02C0 MODIFIER LETTER GLOTTAL STOP * ejective or glottalized @@ -2442,6 +2478,10 @@ : 0301 combining acute accent @ Additions for Greek 0342 COMBINING GREEK PERISPOMENI + * Greek-specific form of circumflex for rising-falling accent + * alternative glyph is similar to an inverted breve + x (combining tilde - 0303) + x (combining inverted breve - 0311) 0343 COMBINING GREEK KORONIS : 0313 combining comma above 0344 COMBINING GREEK DIALYTIKA TONOS @@ -4058,6 +4098,7 @@ 0708 SYRIAC SUPRALINEAR COLON SKEWED LEFT * marks a minor phrase division 0709 SYRIAC SUBLINEAR COLON SKEWED RIGHT + % SYRIAC SUBLINEAR COLON SKEWED LEFT * marks the end of a real or rhetorical question 070A SYRIAC CONTRACTION * a contraction mark, mostly used in East Syriac @@ -4857,8 +4898,12 @@ = bengali letter va with lower diagonal (1.0) @ Currency signs 09F2 BENGALI RUPEE MARK + = taka + * historic currency sign 09F3 BENGALI RUPEE SIGN + = Bangladeshi taka @ Historic symbols for fractional values +@+ The use of these signs is not limited to currency, despite the character names. 09F4 BENGALI CURRENCY NUMERATOR ONE * not in current usage 09F5 BENGALI CURRENCY NUMERATOR TWO @@ -4870,7 +4915,11 @@ 09F9 BENGALI CURRENCY DENOMINATOR SIXTEEN @ Sign 09FA BENGALI ISSHAR -@ Currency sign + = ishvar + * represents the name of a deity + = svargiya + * written before the name of a deceased person +@ Historic currency sign 09FB BENGALI GANDA MARK @@ 0A00 Gurmukhi 0A7F @ Various signs @@ -5229,10 +5278,12 @@ 0B77 ORIYA FRACTION THREE SIXTEENTHS @@ 0B80 Tamil 0BFF @ Various signs +@+ The anusvara should not be confused with the use of a circular glyph for the pulli. 0B82 TAMIL SIGN ANUSVARA * not used in Tamil 0B83 TAMIL SIGN VISARGA = aytham + * in fonts which display the Tamil pulli as a ring shape, the glyph for aytham also uses rings @ Independent vowels 0B85 TAMIL LETTER A 0B86 TAMIL LETTER AA @@ -5292,7 +5343,9 @@ 0BCC TAMIL VOWEL SIGN AU : 0BC6 0BD7 @ Virama +@+ The Tamil pulli usually displays as a dot above, but in some fonts displays as a ring above. Do not use 0B82 to represent a ring-shaped pulli. 0BCD TAMIL SIGN VIRAMA + = pulli @ Various signs 0BD0 TAMIL OM 0BD7 TAMIL AU LENGTH MARK @@ -6745,8 +6798,9 @@ 10C3 GEORGIAN CAPITAL LETTER WE 10C4 GEORGIAN CAPITAL LETTER HAR 10C5 GEORGIAN CAPITAL LETTER HOE -@ Additional letters for Ossetian +@ Additional letter 10C7 GEORGIAN CAPITAL LETTER YN +@ Additional letter for Ossetian 10CD GEORGIAN CAPITAL LETTER AEN @ Mkhedruli @+ This is the modern secular alphabet, which is caseless. @@ -8888,52 +8942,95 @@ @ Basic letters 1820 MONGOLIAN LETTER A x (cyrillic small letter a - 0430) + ~ 1820 180B second form (isolate) + ~ 1820 180B second form (medial) + ~ 1820 180B second form (final) + ~ 1820 180C third form (medial) 1821 MONGOLIAN LETTER E x (cyrillic small letter e - 044D) + ~ 1821 180B second form (initial) + ~ 1821 180B second form (final) 1822 MONGOLIAN LETTER I x (cyrillic small letter i - 0438) + ~ 1822 180B second form (medial) 1823 MONGOLIAN LETTER O x (cyrillic small letter o - 043E) + ~ 1823 180B second form (medial) + ~ 1823 180B second form (final) 1824 MONGOLIAN LETTER U x (cyrillic small letter u - 0443) + ~ 1824 180B second form (medial) 1825 MONGOLIAN LETTER OE x (cyrillic small letter barred o - 04E9) + ~ 1825 180B second form (medial) + ~ 1825 180B second form (final) + ~ 1825 180C third form (medial) 1826 MONGOLIAN LETTER UE x (cyrillic small letter straight u - 04AF) + ~ 1826 180B second form (isolate) + ~ 1826 180B second form (medial) + ~ 1826 180B second form (final) + ~ 1826 180C third form (medial) 1827 MONGOLIAN LETTER EE 1828 MONGOLIAN LETTER NA x (cyrillic small letter en - 043D) + ~ 1828 180B second form (initial) + ~ 1828 180B second form (medial) + ~ 1828 180C third form (medial) + ~ 1828 180D separate form (medial) 1829 MONGOLIAN LETTER ANG 182A MONGOLIAN LETTER BA x (cyrillic small letter be - 0431) + ~ 182A 180B alternative form (final) 182B MONGOLIAN LETTER PA x (cyrillic small letter pe - 043F) 182C MONGOLIAN LETTER QA x (cyrillic small letter ha - 0445) + ~ 182C 180B second form (initial) + ~ 182C 180B second form (medial) + ~ 182C 180B feminine second form (isolate) + ~ 182C 180C third form (medial) + ~ 182C 180D fourth form (medial) 182D MONGOLIAN LETTER GA x (cyrillic small letter ghe - 0433) + ~ 182D 180B second form (initial) + ~ 182D 180B second form (medial) + ~ 182D 180B feminine form (final) + ~ 182D 180C third form (medial) + ~ 182D 180D feminine form (medial) 182E MONGOLIAN LETTER MA x (cyrillic small letter em - 043C) 182F MONGOLIAN LETTER LA x (cyrillic small letter el - 043B) 1830 MONGOLIAN LETTER SA x (cyrillic small letter es - 0441) + ~ 1830 180B second form (final) + ~ 1830 180C third form (final) 1831 MONGOLIAN LETTER SHA x (cyrillic small letter sha - 0448) 1832 MONGOLIAN LETTER TA x (cyrillic small letter te - 0442) + ~ 1832 180B second form (medial) 1833 MONGOLIAN LETTER DA x (cyrillic small letter de - 0434) + ~ 1833 180B second form (initial) + ~ 1833 180B second form (medial) + ~ 1833 180B second form (final) 1834 MONGOLIAN LETTER CHA x (cyrillic small letter che - 0447) 1835 MONGOLIAN LETTER JA x (cyrillic small letter zhe - 0436) + ~ 1835 180B second form (medial) 1836 MONGOLIAN LETTER YA x (cyrillic small letter short i - 0439) + ~ 1836 180B second form (initial) + ~ 1836 180B second form (medial) + ~ 1836 180C third form (medial) 1837 MONGOLIAN LETTER RA x (cyrillic small letter er - 0440) 1838 MONGOLIAN LETTER WA x (cyrillic small letter ve - 0432) + ~ 1838 180B second form (final) 1839 MONGOLIAN LETTER FA x (cyrillic small letter ef - 0444) 183A MONGOLIAN LETTER KA @@ -8954,16 +9051,29 @@ @ Todo letters 1843 MONGOLIAN LETTER TODO LONG VOWEL SIGN 1844 MONGOLIAN LETTER TODO E + ~ 1844 180B second form (medial) 1845 MONGOLIAN LETTER TODO I + ~ 1845 180B second form (medial) 1846 MONGOLIAN LETTER TODO O + ~ 1846 180B second form (medial) 1847 MONGOLIAN LETTER TODO U + ~ 1847 180B second form (isolate) + ~ 1847 180B second form (medial) + ~ 1847 180B second form (final) + ~ 1847 180C third form (medial) 1848 MONGOLIAN LETTER TODO OE + ~ 1848 180B second form (medial) 1849 MONGOLIAN LETTER TODO UE + ~ 1849 180B second form (isolate) + ~ 1849 180B second form (medial) 184A MONGOLIAN LETTER TODO ANG 184B MONGOLIAN LETTER TODO BA 184C MONGOLIAN LETTER TODO PA 184D MONGOLIAN LETTER TODO QA + ~ 184D 180B feminine form (initial) + ~ 184D 180B feminine form (medial) 184E MONGOLIAN LETTER TODO GA + ~ 184E 180B second form (medial) 184F MONGOLIAN LETTER TODO MA 1850 MONGOLIAN LETTER TODO TA 1851 MONGOLIAN LETTER TODO DA @@ -8980,38 +9090,68 @@ 185C MONGOLIAN LETTER TODO DZA @ Sibe letters 185D MONGOLIAN LETTER SIBE E + ~ 185D 180B second form (medial) + ~ 185D 180B second form (final) 185E MONGOLIAN LETTER SIBE I + ~ 185E 180B second form (medial) + ~ 185E 180B second form (final) + ~ 185E 180C third form (medial) + ~ 185E 180C third form (final) 185F MONGOLIAN LETTER SIBE IY 1860 MONGOLIAN LETTER SIBE UE + ~ 1860 180B second form (medial) + ~ 1860 180B second form (final) 1861 MONGOLIAN LETTER SIBE U 1862 MONGOLIAN LETTER SIBE ANG 1863 MONGOLIAN LETTER SIBE KA + ~ 1863 180B second form (medial) 1864 MONGOLIAN LETTER SIBE GA 1865 MONGOLIAN LETTER SIBE HA 1866 MONGOLIAN LETTER SIBE PA 1867 MONGOLIAN LETTER SIBE SHA 1868 MONGOLIAN LETTER SIBE TA + ~ 1868 180B second form (initial) + ~ 1868 180B second form (medial) + ~ 1868 180C third form (medial) 1869 MONGOLIAN LETTER SIBE DA + ~ 1869 180B second form (initial) + ~ 1869 180B second form (medial) 186A MONGOLIAN LETTER SIBE JA 186B MONGOLIAN LETTER SIBE FA 186C MONGOLIAN LETTER SIBE GAA 186D MONGOLIAN LETTER SIBE HAA 186E MONGOLIAN LETTER SIBE TSA 186F MONGOLIAN LETTER SIBE ZA + ~ 186F 180B second form (initial) + ~ 186F 180B second form (medial) 1870 MONGOLIAN LETTER SIBE RAA 1871 MONGOLIAN LETTER SIBE CHA 1872 MONGOLIAN LETTER SIBE ZHA @ Manchu letters 1873 MONGOLIAN LETTER MANCHU I + ~ 1873 180B second form (medial) + ~ 1873 180B second form (final) + ~ 1873 180C third form (medial) + ~ 1873 180C third form (final) + ~ 1873 180D fourth form (medial) 1874 MONGOLIAN LETTER MANCHU KA + ~ 1874 180B second form (medial) + ~ 1874 180B feminine first final form (final) + ~ 1874 180C feminine first medial form (medial) + ~ 1874 180C feminine second final form (final) + ~ 1874 180D feminine second medial form (medial) 1875 MONGOLIAN LETTER MANCHU RA 1876 MONGOLIAN LETTER MANCHU FA + ~ 1876 180B second form (initial) + ~ 1876 180B second form (medial) 1877 MONGOLIAN LETTER MANCHU ZHA @ Extensions for Sanskrit and Tibetan 1880 MONGOLIAN LETTER ALI GALI ANUSVARA ONE x (tibetan sign sna ldan - 0F83) + ~ 1880 180B second form 1881 MONGOLIAN LETTER ALI GALI VISARGA ONE x (tibetan sign rnam bcad - 0F7F) + ~ 1881 180B second form 1882 MONGOLIAN LETTER ALI GALI DAMARU x (tibetan sign lce tsa can - 0F88) 1883 MONGOLIAN LETTER ALI GALI UBADAMA @@ -9021,9 +9161,16 @@ x (tibetan mark paluta - 0F85) 1886 MONGOLIAN LETTER ALI GALI THREE BALUDA 1887 MONGOLIAN LETTER ALI GALI A + ~ 1887 180B second form (isolate) + ~ 1887 180B second form (final) + ~ 1887 180C third form (final) + ~ 1887 180D fourth form (final) 1888 MONGOLIAN LETTER ALI GALI I + ~ 1888 180B second form (final) 1889 MONGOLIAN LETTER ALI GALI KA 188A MONGOLIAN LETTER ALI GALI NGA + ~ 188A 180B second form (initial) + ~ 188A 180B second form (medial) 188B MONGOLIAN LETTER ALI GALI CA 188C MONGOLIAN LETTER ALI GALI TTA 188D MONGOLIAN LETTER ALI GALI TTHA @@ -9056,6 +9203,8 @@ 18A8 MONGOLIAN LETTER MANCHU ALI GALI BHA 18A9 MONGOLIAN LETTER ALI GALI DAGALGA 18AA MONGOLIAN LETTER MANCHU ALI GALI LHA +@~ Standarized Variation Sequences +@+ Unlike other blocks, these variation sequences use the script-specific variation selectors for Mongolian. @@ 18B0 Unified Canadian Aboriginal Syllabics Extended 18FF @ Syllables for Moose Cree 18B0 CANADIAN SYLLABICS OY @@ -9361,7 +9510,7 @@ @@ 19E0 Khmer Symbols 19FF @ Lunar date symbols 19E0 KHMER SYMBOL PATHAMASAT - * represents the first August in a leap year + * represents the first Ashadha (eighth month of the lunar calendar) @+ The following fifteen characters represent the first through the fifteenth waxing days, respectively. 19E1 KHMER SYMBOL MUOY KOET 19E2 KHMER SYMBOL PII KOET @@ -9379,7 +9528,7 @@ 19EE KHMER SYMBOL DAP-BUON KOET 19EF KHMER SYMBOL DAP-PRAM KOET 19F0 KHMER SYMBOL TUTEYASAT - * represents the second August in a leap year + * represents the second Ashadha in the lunar calendar during the Adhikameas leap year @+ The following fifteen characters represent the first through the fifteenth waning days, respectively. 19F1 KHMER SYMBOL MUOY ROC 19F2 KHMER SYMBOL PII ROC @@ -11833,7 +11982,7 @@ x (presentation form for vertical horizontal ellipsis - FE19) # 002E 002E 002E 2027 HYPHENATION POINT - * visible symbol used to indicate correct positions for word breaking, as in dic·tion·ar·ies + * visible symbol used to indicate correct positions for word breaking, as in dic·tion·ar·ies @ Format characters 2028 LINE SEPARATOR * may be used to represent this semantic unambiguously @@ -11915,6 +12064,8 @@ @ Double punctuation for vertical text 203C DOUBLE EXCLAMATION MARK x (exclamation mark - 0021) + ~ 203C FE0E text style + ~ 203C FE0F emoji style # 0021 0021 @ General punctuation 203D INTERROBANG @@ -11948,6 +12099,8 @@ 2048 QUESTION EXCLAMATION MARK # 003F 0021 2049 EXCLAMATION QUESTION MARK + ~ 2049 FE0E text style + ~ 2049 FE0F emoji style # 0021 003F @ General punctuation 204A TIRONIAN SIGN ET @@ -11966,7 +12119,7 @@ * editing mark 2051 TWO ASTERISKS ALIGNED VERTICALLY 2052 COMMERCIAL MINUS SIGN - = abzüglich (German), med avdrag av (Swedish), piska (Swedish, "whip") + = abzüglich (German), med avdrag av (Swedish), piska (Swedish, "whip") * a common glyph variant and fallback representation looks like ./. * may also be used as a dingbat to indicate correctness * used in Finno-Ugric Phonetic Alphabet to indicate a related borrowed form with different sound @@ -12037,6 +12190,7 @@ 206D ACTIVATE ARABIC FORM SHAPING 206E NATIONAL DIGIT SHAPES 206F NOMINAL DIGIT SHAPES +@~ Standardized Variation Sequences @@ 2070 Superscripts and Subscripts 209F @ Superscripts @+ See also superscript Latin letters in the Spacing Modifier Letters block starting at 02B0. @@ -12168,6 +12322,7 @@ * intended for lira, but not widely used * preferred character for lira is 00A3 x (pound sign - 00A3) + x (turkish lira sign - 20BA) 20A5 MILL SIGN * USA (1/10 cent) 20A6 NAIRA SIGN @@ -12193,13 +12348,15 @@ * Laos 20AE TUGRIK SIGN * Mongolia - * also transliterated as tugrug, tugric, tugrog, togrog, tögrög + * also transliterated as tugrug, tugric, tugrog, togrog, tögrög 20AF DRACHMA SIGN * Greece 20B0 GERMAN PENNY SIGN 20B1 PESO SIGN + = Filipino peso sign * Philippines - * the Mexican peso is indicated with the dollar sign + * extant and discontinued Latin-American peso currencies (Mexican, Chilean, Colombian, etc.) use the dollar sign + x (dollar sign - 0024) x (peseta sign - 20A7) 20B2 GUARANI SIGN * Paraguay @@ -12226,6 +12383,8 @@ * official rupee currency sign for India * contrasts with script-specific rupee signs and abbreviations x (devanagari letter ra - 0930) +20BA TURKISH LIRA SIGN + * official lira currency sign for Turkey @@ 20D0 Combining Diacritical Marks for Symbols 20FF @ Combining diacritical marks for symbols 20D0 COMBINING LEFT HARPOON ABOVE @@ -12425,7 +12584,7 @@ 212A KELVIN SIGN : 004B latin capital letter k 212B ANGSTROM SIGN - * non SI length unit (=0.1 nm) named after A. J. Ångström, Swedish physicist + * non SI length unit (=0.1 nm) named after A. J. Ã…ngström, Swedish physicist * preferred representation is 00C5 : 00C5 latin capital letter a with ring above 212C SCRIPT CAPITAL B @@ -12476,6 +12635,8 @@ @ Additional letterlike symbols 2139 INFORMATION SOURCE * intended for use with 20DD + ~ 2139 FE0E text style + ~ 2139 FE0F emoji style # <font> 0069 latin small letter i 213A ROTATED CAPITAL Q * a binding signature mark @@ -12534,6 +12695,7 @@ x (greek small letter digamma - 03DD) @ Biblical editorial symbol 214F SYMBOL FOR SAMARITAN SOURCE +@~ Standardized Variation Sequences @@ 2150 Number Forms 218F @ Fractions @+ Other fraction number forms are found in the Latin-1 Supplement block. @@ -12675,11 +12837,23 @@ * IPA: ingressive airflow 2194 LEFT RIGHT ARROW = z notation relation + ~ 2194 FE0E text style + ~ 2194 FE0F emoji style 2195 UP DOWN ARROW + ~ 2195 FE0E text style + ~ 2195 FE0F emoji style 2196 NORTH WEST ARROW + ~ 2196 FE0E text style + ~ 2196 FE0F emoji style 2197 NORTH EAST ARROW + ~ 2197 FE0E text style + ~ 2197 FE0F emoji style 2198 SOUTH EAST ARROW + ~ 2198 FE0E text style + ~ 2198 FE0F emoji style 2199 SOUTH WEST ARROW + ~ 2199 FE0E text style + ~ 2199 FE0F emoji style @ Arrows with modifications 219A LEFTWARDS ARROW WITH STROKE * negation of 2190 @@ -12711,7 +12885,11 @@ = depth symbol 21A8 UP DOWN ARROW WITH BASE 21A9 LEFTWARDS ARROW WITH HOOK + ~ 21A9 FE0E text style + ~ 21A9 FE0F emoji style 21AA RIGHTWARDS ARROW WITH HOOK + ~ 21AA FE0E text style + ~ 21AA FE0F emoji style 21AB LEFTWARDS ARROW WITH LOOP 21AC RIGHTWARDS ARROW WITH LOOP 21AD LEFT RIGHT WAVE ARROW @@ -12846,6 +13024,7 @@ 21FD LEFTWARDS OPEN-HEADED ARROW 21FE RIGHTWARDS OPEN-HEADED ARROW 21FF LEFT RIGHT OPEN-HEADED ARROW +@~ Standardized Variation Sequences @@ 2200 Mathematical Operators 22FF @@+ @ Miscellaneous mathematical symbols @@ -12973,13 +13152,19 @@ 2229 INTERSECTION = cap, hat x (n-ary intersection - 22C2) + ~ 2229 FE00 with serifs 222A UNION = cup x (n-ary union - 22C3) + ~ 222A FE00 with serifs @ Integrals 222B INTEGRAL x (latin small letter esh - 0283) +;experimenting with variant syntax + ~ 222B ALT1 slanted style 222C DOUBLE INTEGRAL +;experimenting with variant syntax + ~ 222C ALT1 slanted style # 222B 222B 222D TRIPLE INTEGRAL x (quadruple integral operator - 2A0C) @@ -13089,7 +13274,9 @@ 2266 LESS-THAN OVER EQUAL TO 2267 GREATER-THAN OVER EQUAL TO 2268 LESS-THAN BUT NOT EQUAL TO + ~ 2268 FE00 with vertical stroke 2269 GREATER-THAN BUT NOT EQUAL TO + ~ 2269 FE00 with vertical stroke 226A MUCH LESS-THAN x (left-pointing double angle quotation mark - 00AB) 226B MUCH GREATER-THAN @@ -13107,7 +13294,9 @@ 2271 NEITHER GREATER-THAN NOR EQUAL TO : 2265 0338 2272 LESS-THAN OR EQUIVALENT TO + ~ 2272 FE00 following the slant of the lower leg 2273 GREATER-THAN OR EQUIVALENT TO + ~ 2273 FE00 following the slant of the lower leg 2274 NEITHER LESS-THAN NOR EQUIVALENT TO : 2272 0338 2275 NEITHER GREATER-THAN NOR EQUIVALENT TO @@ -13149,7 +13338,9 @@ 2289 NEITHER A SUPERSET OF NOR EQUAL TO : 2287 0338 228A SUBSET OF WITH NOT EQUAL TO + ~ 228A FE00 with stroke through bottom members 228B SUPERSET OF WITH NOT EQUAL TO + ~ 228B FE00 with stroke through bottom members @ Operators 228C MULTISET 228D MULTISET MULTIPLICATION @@ -13166,12 +13357,15 @@ @ Operators 2293 SQUARE CAP x (n-ary square intersection operator - 2A05) + ~ 2293 FE00 with serifs 2294 SQUARE CUP + ~ 2294 FE00 with serifs 2295 CIRCLED PLUS = direct sum = vector pointing into page x (n-ary circled plus operator - 2A01) x (alchemical symbol for verdigris - 1F728) + ~ 2295 FE00 with white rim 2296 CIRCLED MINUS = symmetric difference x (circle with horizontal bar - 29B5) @@ -13181,6 +13375,7 @@ = vector pointing into page x (circled crossing lanes - 26D2) x (n-ary circled times operator - 2A02) + ~ 2297 FE00 with white rim 2298 CIRCLED DIVISION SLASH 2299 CIRCLED DOT OPERATOR = direct product @@ -13194,6 +13389,7 @@ 229B CIRCLED ASTERISK OPERATOR x (apl functional symbol circle star - 235F) 229C CIRCLED EQUALS + ~ 229C FE00 with equal sign touching the circle 229D CIRCLED DASH 229E SQUARED PLUS 229F SQUARED MINUS @@ -13318,7 +13514,9 @@ 22D8 VERY MUCH LESS-THAN 22D9 VERY MUCH GREATER-THAN 22DA LESS-THAN EQUAL TO OR GREATER-THAN + ~ 22DA FE00 with slanted equal 22DB GREATER-THAN EQUAL TO OR LESS-THAN + ~ 22DB FE00 with slanted equal 22DC EQUAL TO OR LESS-THAN 22DD EQUAL TO OR GREATER-THAN 22DE EQUAL TO OR PRECEDES @@ -13367,6 +13565,10 @@ 22FD CONTAINS WITH OVERBAR 22FE SMALL CONTAINS WITH OVERBAR 22FF Z NOTATION BAG MEMBERSHIP +@@~ Alternative Glyph Listing +@+ Experimental listing +@~ Standarized Variation Sequences +@+ Experimental listing @@ 2300 Miscellaneous Technical 23FF @ Miscellaneous technical 2300 DIAMETER SIGN @@ -13437,8 +13639,12 @@ 231A WATCH x (alarm clock - 23F0) x (clock face one oclock - 1F550) + ~ 231A FE0E text style + ~ 231A FE0F emoji style 231B HOURGLASS = alchemical symbol for hour + ~ 231B FE0E text style + ~ 231B FE0F emoji style @ Quine corners @+ These form a set of four quine corners, for quincuncial arrangement. They are also used in upper and lower pairs in mathematic, or more rarely in editorial usage as alternatives to half brackets. 231C TOP LEFT CORNER @@ -13799,6 +14005,7 @@ 23F2 TIMER CLOCK 23F3 HOURGLASS WITH FLOWING SAND x (hourglass - 231B) +@~ Standardized Variation Sequences @@ 2400 Control Pictures 243F @+ The diagonal lettering glyphs are only exemplary; alternate representations may be, and often are used in the visible display of control codes. @ Graphic pictures for control codes @@ -14083,6 +14290,8 @@ 24C1 CIRCLED LATIN CAPITAL LETTER L # <circle> 004C 24C2 CIRCLED LATIN CAPITAL LETTER M + ~ 24C2 FE0E text style + ~ 24C2 FE0F emoji style # <circle> 004D 24C3 CIRCLED LATIN CAPITAL LETTER N # <circle> 004E @@ -14192,6 +14401,7 @@ @ Additional white on black circled number 24FF NEGATIVE CIRCLED DIGIT ZERO x (dingbat negative circled digit one - 2776) +@~ Standardized Variation Sequences @@ 2500 Box Drawing 257F @+ All of these characters are intended for compatibility with old sets oriented toward character cell graphics. @ Light and heavy solid lines @@ -14426,8 +14636,12 @@ 25AA BLACK SMALL SQUARE = square bullet x (black very small square - 2B1D) + ~ 25AA FE0E text style + ~ 25AA FE0F emoji style 25AB WHITE SMALL SQUARE x (white very small square - 2B1E) + ~ 25AB FE0E text style + ~ 25AB FE0F emoji style 25AC BLACK RECTANGLE 25AD WHITE RECTANGLE 25AE BLACK VERTICAL RECTANGLE @@ -14450,6 +14664,8 @@ x (up-pointing small red triangle - 1F53C) 25B5 WHITE UP-POINTING SMALL TRIANGLE 25B6 BLACK RIGHT-POINTING TRIANGLE + ~ 25B6 FE0E text style + ~ 25B6 FE0F emoji style 25B7 WHITE RIGHT-POINTING TRIANGLE = z notation range restriction 25B8 BLACK RIGHT-POINTING SMALL TRIANGLE @@ -14470,6 +14686,8 @@ x (down-pointing small red triangle - 1F53D) 25BF WHITE DOWN-POINTING SMALL TRIANGLE 25C0 BLACK LEFT-POINTING TRIANGLE + ~ 25C0 FE0E text style + ~ 25C0 FE0F emoji style 25C1 WHITE LEFT-POINTING TRIANGLE = z notation domain restriction 25C2 BLACK LEFT-POINTING SMALL TRIANGLE @@ -14573,14 +14791,24 @@ 25FB WHITE MEDIUM SQUARE = always (modal operator) x (white square - 25A1) + ~ 25FB FE0E text style + ~ 25FB FE0F emoji style 25FC BLACK MEDIUM SQUARE x (black square - 25A0) + ~ 25FC FE0E text style + ~ 25FC FE0F emoji style 25FD WHITE MEDIUM SMALL SQUARE x (white small square - 25AB) + ~ 25FD FE0E text style + ~ 25FD FE0F emoji style 25FE BLACK MEDIUM SMALL SQUARE x (black small square - 25AA) + ~ 25FE FE0E text style + ~ 25FE FE0F emoji style 25FF LOWER RIGHT TRIANGLE x (right triangle - 22BF) +@~ Standarized Variation Sequences +@+ Emoji style variants include rendering of characters in ways not achievable with traditional or even digital typography. The sample glyphs shown here cannot faithfully represent the range of intended appearances. @@ 2600 Miscellaneous Symbols 26FF @@+ @ Weather and astrological symbols @@ -14588,8 +14816,12 @@ = clear weather x (sun - 2609) x (high brightness symbol - 1F506) + ~ 2600 FE0E text style + ~ 2600 FE0F emoji style 2601 CLOUD = cloudy weather + ~ 2601 FE0E text style + ~ 2601 FE0F emoji style 2602 UMBRELLA = rainy weather x (closed umbrella - 1F302) @@ -14622,10 +14854,14 @@ x (telephone sign - 2121) x (telephone location sign - 2706) x (telephone receiver - 1F4DE) + ~ 260E FE0E text style + ~ 260E FE0F emoji style 260F WHITE TELEPHONE 2610 BALLOT BOX x (white square - 25A1) 2611 BALLOT BOX WITH CHECK + ~ 2611 FE0E text style + ~ 2611 FE0F emoji style 2612 BALLOT BOX WITH X x (squared times - 22A0) 2613 SALTIRE @@ -14634,6 +14870,8 @@ @ Weather symbol 2614 UMBRELLA WITH RAIN DROPS = showery weather + ~ 2614 FE0E text style + ~ 2614 FE0F emoji style @ Miscellaneous symbol 2615 HOT BEVERAGE = tea or coffee, depending on locale @@ -14641,6 +14879,8 @@ x (watch - 231A) x (hourglass - 231B) x (teacup without handle - 1F375) + ~ 2615 FE0E text style + ~ 2615 FE0F emoji style @ Japanese chess symbols 2616 WHITE SHOGI PIECE 2617 BLACK SHOGI PIECE @@ -14656,6 +14896,8 @@ 261C WHITE LEFT POINTING INDEX x (white left pointing backhand index - 1F448) 261D WHITE UP POINTING INDEX + ~ 261D FE0E text style + ~ 261D FE0F emoji style 261E WHITE RIGHT POINTING INDEX = fist (typographic term) 261F WHITE DOWN POINTING INDEX @@ -14712,6 +14954,8 @@ 2639 WHITE FROWNING FACE 263A WHITE SMILING FACE = have a nice day! + ~ 263A FE0E text style + ~ 263A FE0F emoji style 263B BLACK SMILING FACE @ Miscellaneous symbol 263C WHITE SUN WITH RAYS @@ -14750,21 +14994,45 @@ 2647 PLUTO @ Zodiacal symbols 2648 ARIES + ~ 2648 FE0E text style + ~ 2648 FE0F emoji style 2649 TAURUS + ~ 2649 FE0E text style + ~ 2649 FE0F emoji style 264A GEMINI + ~ 264A FE0E text style + ~ 264A FE0F emoji style 264B CANCER + ~ 264B FE0E text style + ~ 264B FE0F emoji style 264C LEO + ~ 264C FE0E text style + ~ 264C FE0F emoji style 264D VIRGO = minim (alternate glyph) + ~ 264D FE0E text style + ~ 264D FE0F emoji style 264E LIBRA x (alchemical symbol for sublimation - 1F75E) + ~ 264E FE0E text style + ~ 264E FE0F emoji style 264F SCORPIUS = scorpio = minim, drop + ~ 264F FE0E text style + ~ 264F FE0F emoji style 2650 SAGITTARIUS + ~ 2650 FE0E text style + ~ 2650 FE0F emoji style 2651 CAPRICORN + ~ 2651 FE0E text style + ~ 2651 FE0F emoji style 2652 AQUARIUS + ~ 2652 FE0E text style + ~ 2652 FE0F emoji style 2653 PISCES + ~ 2653 FE0E text style + ~ 2653 FE0F emoji style @ Chess symbols 2654 WHITE CHESS KING 2655 WHITE CHESS QUEEN @@ -14780,22 +15048,32 @@ 265F BLACK CHESS PAWN @ Playing card symbols 2660 BLACK SPADE SUIT + ~ 2660 FE0E text style + ~ 2660 FE0F emoji style 2661 WHITE HEART SUIT 2662 WHITE DIAMOND SUIT x (white diamond - 25C7) x (lozenge - 25CA) 2663 BLACK CLUB SUIT x (shamrock - 2618) + ~ 2663 FE0E text style + ~ 2663 FE0F emoji style 2664 WHITE SPADE SUIT 2665 BLACK HEART SUIT = valentine x (heavy black heart - 2764) x (blue heart - 1F499) + ~ 2665 FE0E text style + ~ 2665 FE0F emoji style 2666 BLACK DIAMOND SUIT x (black diamond - 25C6) + ~ 2666 FE0E text style + ~ 2666 FE0F emoji style 2667 WHITE CLUB SUIT @ Miscellaneous symbol 2668 HOT SPRINGS + ~ 2668 FE0E text style + ~ 2668 FE0F emoji style @ Musical symbols 2669 QUARTER NOTE = crotchet @@ -14839,6 +15117,8 @@ * used together with other text and labels to indicate the type of material to be recycled 267B BLACK UNIVERSAL RECYCLING SYMBOL x (clockwise rightwards and leftwards open circle arrows - 1F501) + ~ 267B FE0E text style + ~ 267B FE0F emoji style 267C RECYCLED PAPER SYMBOL * used to indicate 100% recycled paper content 267D PARTIALLY-RECYCLED PAPER SYMBOL @@ -14846,6 +15126,8 @@ @ Miscellaneous symbols 267E PERMANENT PAPER SIGN 267F WHEELCHAIR SYMBOL + ~ 267F FE0E text style + ~ 267F FE0F emoji style @ Dice 2680 DIE FACE-1 x (game die - 1F3B2) @@ -14876,6 +15158,8 @@ x (pick - 26CF) 2693 ANCHOR = nautical term, harbor (on maps) + ~ 2693 FE0E text style + ~ 2693 FE0F emoji style 2694 CROSSED SWORDS = military term, battleground (on maps), killed in action 2695 STAFF OF AESCULAPIUS @@ -14911,9 +15195,13 @@ = background speaking @ Miscellaneous symbols 26A0 WARNING SIGN + ~ 26A0 FE0E text style + ~ 26A0 FE0F emoji style 26A1 HIGH VOLTAGE SIGN = thunder = lightning symbol + ~ 26A1 FE0E text style + ~ 26A1 FE0F emoji style @ Gender symbols 26A2 DOUBLED FEMALE SIGN = lesbianism @@ -14942,8 +15230,12 @@ = asexuality, sexless, genderless = engaged, betrothed * base for male or female sign + ~ 26AA FE0E text style + ~ 26AA FE0F emoji style 26AB MEDIUM BLACK CIRCLE * UI symbol for record function + ~ 26AB FE0E text style + ~ 26AB FE0F emoji style 26AC MEDIUM SMALL WHITE CIRCLE = engaged, betrothed (genealogy) * can represent wedding ring @@ -14980,7 +15272,11 @@ @ Sport symbols @+ See other sport symbols in the Miscellaneous Symbols and Pictographs block. 26BD SOCCER BALL + ~ 26BD FE0E text style + ~ 26BD FE0F emoji style 26BE BASEBALL + ~ 26BE FE0E text style + ~ 26BE FE0F emoji style @ Miscellaneous symbol from ARIB STD B24 26BF SQUARED KEY = parental lock @@ -14993,8 +15289,12 @@ @ Weather symbols from ARIB STD B24 26C4 SNOWMAN WITHOUT SNOW = light snow + ~ 26C4 FE0E text style + ~ 26C4 FE0F emoji style 26C5 SUN BEHIND CLOUD = partly cloudy + ~ 26C5 FE0E text style + ~ 26C5 FE0F emoji style 26C6 RAIN = rainy weather 26C7 BLACK SNOWMAN @@ -15029,6 +15329,8 @@ = tyre chains required 26D4 NO ENTRY x (no entry sign - 1F6AB) + ~ 26D4 FE0E text style + ~ 26D4 FE0F emoji style 26D5 ALTERNATE ONE-WAY LEFT WAY TRAFFIC * left side traffic 26D6 BLACK TWO-WAY LEFT WAY TRAFFIC @@ -15080,6 +15382,8 @@ 26E9 SHINTO SHRINE = torii 26EA CHURCH + ~ 26EA FE0E text style + ~ 26EA FE0F emoji style 26EB CASTLE x (european castle - 1F3F0) 26EC HISTORIC SITE @@ -15098,15 +15402,21 @@ x (umbrella - 2602) 26F2 FOUNTAIN = park + ~ 26F2 FE0E text style + ~ 26F2 FE0F emoji style 26F3 FLAG IN HOLE = golf course x (triangular flag on post - 1F6A9) + ~ 26F3 FE0E text style + ~ 26F3 FE0F emoji style 26F4 FERRY = ferry boat terminal x (ship - 1F6A2) 26F5 SAILBOAT = marina or yacht harbour x (rowboat - 1F6A3) + ~ 26F5 FE0E text style + ~ 26F5 FE0F emoji style 26F6 SQUARE FOUR CORNERS = intersection 26F7 SKIER @@ -15118,18 +15428,23 @@ = track and field, gymnasium 26FA TENT = camping site + ~ 26FA FE0E text style + ~ 26FA FE0F emoji style 26FB JAPANESE BANK SYMBOL x (bank - 1F3E6) 26FC HEADSTONE GRAVEYARD SYMBOL = graveyard, memorial park, cemetery 26FD FUEL PUMP = petrol station, gas station + ~ 26FD FE0E text style + ~ 26FD FE0F emoji style 26FE CUP ON BLACK SQUARE = drive-in restaurant x (hot beverage - 2615) x (teacup without handle - 1F375) 26FF WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE = Japanese self-defence force site +@~ Standardized Variation Sequences @@ 2700 Dingbats 27BF @+ ITC Zapf dingbats series 100. Some of the ITC Zapf dingbats have been unified with geometric shape characters. Gaps in the chart have subsequently been filled with other dingbat-like symbols. x (black telephone - 260E) @@ -15147,6 +15462,8 @@ @ Miscellaneous 2701 UPPER BLADE SCISSORS 2702 BLACK SCISSORS + ~ 2702 FE0E text style + ~ 2702 FE0F emoji style 2703 LOWER BLADE SCISSORS 2704 WHITE SCISSORS 2705 WHITE HEAVY CHECK MARK @@ -15156,8 +15473,12 @@ x (telephone receiver - 1F4DE) 2707 TAPE DRIVE 2708 AIRPLANE + ~ 2708 FE0E text style + ~ 2708 FE0F emoji style 2709 ENVELOPE x (incoming envelope - 1F4E8) + ~ 2709 FE0E text style + ~ 2709 FE0F emoji style 270A RAISED FIST = rock in Rock, Paper, Scissors game x (fisted hand sign - 1F44A) @@ -15166,19 +15487,29 @@ x (waving hand sign - 1F44B) 270C VICTORY HAND = scissors in Rock, Paper, Scissors game + ~ 270C FE0E text style + ~ 270C FE0F emoji style 270D WRITING HAND 270E LOWER RIGHT PENCIL 270F PENCIL + ~ 270F FE0E text style + ~ 270F FE0F emoji style 2710 UPPER RIGHT PENCIL 2711 WHITE NIB 2712 BLACK NIB + ~ 2712 FE0E text style + ~ 2712 FE0F emoji style 2713 CHECK MARK x (square root - 221A) 2714 HEAVY CHECK MARK + ~ 2714 FE0E text style + ~ 2714 FE0F emoji style 2715 MULTIPLICATION X x (multiplication sign - 00D7) x (box drawings light diagonal cross - 2573) 2716 HEAVY MULTIPLICATION X + ~ 2716 FE0E text style + ~ 2716 FE0F emoji style 2717 BALLOT X x (saltire - 2613) 2718 HEAVY BALLOT X @@ -15191,7 +15522,7 @@ 271E SHADOWED WHITE LATIN CROSS 271F OUTLINED LATIN CROSS 2720 MALTESE CROSS - * Historically, the Maltese cross took many forms; the shape shown in the Zapf Dingbats is similar to one known as the Cross Formée. + * Historically, the Maltese cross took many forms; the shape shown in the Zapf Dingbats is similar to one known as the Cross Formée. @ Stars, asterisks and snowflakes 2721 STAR OF DAVID x (six pointed star with middle dot - 1F52F) @@ -15215,7 +15546,11 @@ x (asterisk - 002A) 2732 OPEN CENTRE ASTERISK 2733 EIGHT SPOKED ASTERISK + ~ 2733 FE0E text style + ~ 2733 FE0F emoji style 2734 EIGHT POINTED BLACK STAR + ~ 2734 FE0E text style + ~ 2734 FE0F emoji style 2735 EIGHT POINTED PINWHEEL STAR 2736 SIX POINTED BLACK STAR = sextile @@ -15235,9 +15570,13 @@ 2742 CIRCLED OPEN CENTRE EIGHT POINTED STAR 2743 HEAVY TEARDROP-SPOKED PINWHEEL ASTERISK 2744 SNOWFLAKE + ~ 2744 FE0E text style + ~ 2744 FE0F emoji style 2745 TIGHT TRIFOLIATE SNOWFLAKE 2746 HEAVY CHEVRON SNOWFLAKE 2747 SPARKLE + ~ 2747 FE0E text style + ~ 2747 FE0F emoji style 2748 HEAVY SPARKLE 2749 BALLOON-SPOKED ASTERISK = jack @@ -15260,6 +15599,8 @@ 2756 BLACK DIAMOND MINUS WHITE X 2757 HEAVY EXCLAMATION MARK SYMBOL = obstacles on the road, ARIB STD B24 + ~ 2757 FE0E text style + ~ 2757 FE0F emoji style 2758 LIGHT VERTICAL BAR x (vertical line - 007C) 2759 MEDIUM VERTICAL BAR @@ -15284,6 +15625,8 @@ 2763 HEAVY HEART EXCLAMATION MARK ORNAMENT 2764 HEAVY BLACK HEART x (black heart suit - 2665) + ~ 2764 FE0E text style + ~ 2764 FE0F emoji style 2765 ROTATED HEAVY BLACK HEART BULLET 2766 FLORAL HEART = Aldus leaf @@ -15368,6 +15711,8 @@ 27A1 BLACK RIGHTWARDS ARROW * fonts may harmonize this glyph with the style for other black arrows x (leftwards black arrow - 2B05) + ~ 27A1 FE0E text style + ~ 27A1 FE0F emoji style 27A2 THREE-D TOP-LIGHTED RIGHTWARDS ARROWHEAD 27A3 THREE-D BOTTOM-LIGHTED RIGHTWARDS ARROWHEAD 27A4 BLACK RIGHTWARDS ARROWHEAD @@ -15402,6 +15747,7 @@ 27BE OPEN-OUTLINED RIGHTWARDS ARROW @ Miscellaneous 27BF DOUBLE CURLY LOOP +@~ Standardized Variation Sequences @@ 27C0 Miscellaneous Mathematical Symbols-A 27EF @ Miscellaneous symbols 27C0 THREE DIMENSIONAL ANGLE @@ -15928,7 +16274,11 @@ x (rightwards wave arrow - 219D) x (wave arrow pointing directly left - 2B3F) 2934 ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS + ~ 2934 FE0E text style + ~ 2934 FE0F emoji style 2935 ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS + ~ 2935 FE0E text style + ~ 2935 FE0F emoji style 2936 ARROW POINTING DOWNWARDS THEN CURVING LEFTWARDS 2937 ARROW POINTING DOWNWARDS THEN CURVING RIGHTWARDS 2938 RIGHT-SIDE ARC CLOCKWISE ARROW @@ -16021,6 +16371,7 @@ 297D RIGHT FISH TAIL 297E UP FISH TAIL 297F DOWN FISH TAIL +@~ Standardized Variation Sequences @@ 2980 Miscellaneous Mathematical Symbols-B 29FF @ Miscellaneous mathematical symbols 2980 TRIPLE VERTICAL BAR DELIMITER @@ -16346,9 +16697,11 @@ 2A3B MULTIPLICATION SIGN IN TRIANGLE 2A3C INTERIOR PRODUCT x (right floor - 230B) + ~ 2A3C FE00 tall variant with narrow foot 2A3D RIGHTHAND INTERIOR PRODUCT x (left floor - 230A) x (turned not sign - 2319) + ~ 2A3D FE00 tall variant with narrow foot 2A3E Z NOTATION RELATIONAL COMPOSITION x (z notation schema composition - 2A1F) 2A3F AMALGAMATION OR COPRODUCT @@ -16486,7 +16839,9 @@ 2A9B DOUBLE-LINE SLANTED EQUAL TO OR LESS-THAN 2A9C DOUBLE-LINE SLANTED EQUAL TO OR GREATER-THAN 2A9D SIMILAR OR LESS-THAN + ~ 2A9D FE00 with similar following the slant of the upper leg 2A9E SIMILAR OR GREATER-THAN + ~ 2A9E FE00 with similar following the slant of the upper leg 2A9F SIMILAR ABOVE LESS-THAN ABOVE EQUALS SIGN 2AA0 SIMILAR ABOVE GREATER-THAN ABOVE EQUALS SIGN 2AA1 DOUBLE NESTED LESS-THAN @@ -16504,7 +16859,9 @@ 2AAA SMALLER THAN 2AAB LARGER THAN 2AAC SMALLER THAN OR EQUAL TO + ~ 2AAC FE00 with slanted equal 2AAD LARGER THAN OR EQUAL TO + ~ 2AAD FE00 with slanted equal 2AAE EQUALS SIGN WITH BUMPY ABOVE x (difference between - 224F) 2AAF PRECEDES ABOVE SINGLE-LINE EQUALS SIGN @@ -16539,7 +16896,9 @@ 2AC9 SUBSET OF ABOVE ALMOST EQUAL TO 2ACA SUPERSET OF ABOVE ALMOST EQUAL TO 2ACB SUBSET OF ABOVE NOT EQUAL TO + ~ 2ACB FE00 with stroke through bottom members 2ACC SUPERSET OF ABOVE NOT EQUAL TO + ~ 2ACC FE00 with stroke through bottom members 2ACD SQUARE LEFT OPEN BOX OPERATOR 2ACE SQUARE RIGHT OPEN BOX OPERATOR 2ACF CLOSED SUBSET @@ -16647,6 +17006,7 @@ = Dijkstra choice 2AFF N-ARY WHITE VERTICAL BAR = n-ary Dijkstra choice +@~ Standardized Variation Sequences @@ 2B00 Miscellaneous Symbols and Arrows 2BFF @ White and black arrows @+ Other white and black arrows to complete this set can be found in the Arrows and Dingbats blocks. @@ -16659,8 +17019,14 @@ x (up down white arrow - 21F3) 2B05 LEFTWARDS BLACK ARROW x (black rightwards arrow - 27A1) + ~ 2B05 FE0E text style + ~ 2B05 FE0F emoji style 2B06 UPWARDS BLACK ARROW + ~ 2B06 FE0E text style + ~ 2B06 FE0F emoji style 2B07 DOWNWARDS BLACK ARROW + ~ 2B07 FE0E text style + ~ 2B07 FE0F emoji style 2B08 NORTH EAST BLACK ARROW 2B09 NORTH WEST BLACK ARROW 2B0A SOUTH EAST BLACK ARROW @@ -16688,8 +17054,12 @@ 2B1A DOTTED SQUARE 2B1B BLACK LARGE SQUARE x (black square - 25A0) + ~ 2B1B FE0E text style + ~ 2B1B FE0F emoji style 2B1C WHITE LARGE SQUARE x (white square - 25A1) + ~ 2B1C FE0E text style + ~ 2B1C FE0F emoji style 2B1D BLACK VERY SMALL SQUARE x (black small square - 25AA) 2B1E WHITE VERY SMALL SQUARE @@ -16791,6 +17161,8 @@ @ Stars 2B50 WHITE MEDIUM STAR x (star operator - 22C6) + ~ 2B50 FE0E text style + ~ 2B50 FE0F emoji style 2B51 BLACK SMALL STAR x (arabic five pointed star - 066D) 2B52 WHITE SMALL STAR @@ -16802,6 +17174,8 @@ = basic symbol for speed limit * forms a game tally pair with 274C x (large circle - 25EF) + ~ 2B55 FE0E text style + ~ 2B55 FE0F emoji style @ Dictionary and map symbols from ARIB STD B24 2B56 HEAVY OVAL WITH OVAL INSIDE = prefectural office @@ -16814,6 +17188,7 @@ 2B59 HEAVY CIRCLED SALTIRE = police station x (n-ary circled times operator - 2A02) +@~ Standardized Variation Sequences @@ 2C00 Glagolitic 2C5F @ Capital letters 2C00 GLAGOLITIC CAPITAL LETTER AZU @@ -17155,8 +17530,9 @@ 2D23 GEORGIAN SMALL LETTER WE 2D24 GEORGIAN SMALL LETTER HAR 2D25 GEORGIAN SMALL LETTER HOE -@ Additional letters for Ossetian +@ Additional letter 2D27 GEORGIAN SMALL LETTER YN +@ Additional letter for Ossetian 2D2D GEORGIAN SMALL LETTER AEN @@ 2D30 Tifinagh 2D7F @ Letters @@ -18342,12 +18718,15 @@ x (squared rising diagonal slash - 29C4) 303D PART ALTERNATION MARK * marks the start of a song part in Japanese + ~ 303D FE0E text style + ~ 303D FE0F emoji style @ Special CJK indicators @+ These are visibly displayed graphic characters, not invisible format control characters. 303E IDEOGRAPHIC VARIATION INDICATOR * visual indicator that the following ideograph is to be taken as a variant of the intended character 303F IDEOGRAPHIC HALF FILL SPACE * visual indicator of a screen space for half of an ideograph +@~ Standardized Variation Sequences @@ 3040 Hiragana 309F @ Hiragana letters 3041 HIRAGANA LETTER SMALL A @@ -19394,10 +19773,14 @@ 3296 CIRCLED IDEOGRAPH FINANCIAL # <circle> 8CA1 3297 CIRCLED IDEOGRAPH CONGRATULATION + ~ 3297 FE0E text style + ~ 3297 FE0F emoji style # <circle> 795D 3298 CIRCLED IDEOGRAPH LABOR # <circle> 52B4 3299 CIRCLED IDEOGRAPH SECRET + ~ 3299 FE0E text style + ~ 3299 FE0F emoji style # <circle> 79D8 329A CIRCLED IDEOGRAPH MALE # <circle> 7537 @@ -19607,6 +19990,7 @@ # <circle> 30F1 32FE CIRCLED KATAKANA WO # <circle> 30F2 +@~ Standardized Variation Sequences @@ 3300 CJK Compatibility 33FF @ Squared Katakana words 3300 SQUARE APAATO @@ -22127,12 +22511,12 @@ A6E6 BAMUM LETTER MO A6E7 BAMUM LETTER MBAA * also used for digit two A6E8 BAMUM LETTER TET - * tèt + * tèt * also used for digit three A6E9 BAMUM LETTER KPA * also used for digit four A6EA BAMUM LETTER TEN - * tèn + * tèn * also used for digit five A6EB BAMUM LETTER NTUU * also used for digit six @@ -22511,6 +22895,7 @@ A855 PHAGS-PA LETTER ZA x (tibetan letter za - 0F5F) A856 PHAGS-PA LETTER SMALL A x (tibetan letter -a - 0F60) + ~ A856 FE00 phags-pa letter reversed shaping small a A857 PHAGS-PA LETTER YA x (tibetan letter ya - 0F61) A858 PHAGS-PA LETTER RA @@ -22524,16 +22909,20 @@ A85B PHAGS-PA LETTER SA x (tibetan letter sa - 0F66) A85C PHAGS-PA LETTER HA x (tibetan letter ha - 0F67) + ~ A85C FE00 phags-pa letter reversed shaping ha @ Letter A A85D PHAGS-PA LETTER A x (tibetan letter a - 0F68) @ Vowels A85E PHAGS-PA LETTER I x (tibetan vowel sign i - 0F72) + ~ A85E FE00 phags-pa letter reversed shaping i A85F PHAGS-PA LETTER U x (tibetan vowel sign u - 0F74) + ~ A85F FE00 phags-pa letter reversed shaping u A860 PHAGS-PA LETTER E x (tibetan vowel sign e - 0F7A) + ~ A860 FE00 phags-pa letter reversed shaping e A861 PHAGS-PA LETTER O x (tibetan vowel sign o - 0F7C) @ Consonants @@ -22557,6 +22946,7 @@ A867 PHAGS-PA SUBJOINED LETTER WA A868 PHAGS-PA SUBJOINED LETTER YA * Chinese, Tibetan, Sanskrit x (tibetan subjoined letter ya - 0FB1) + ~ A868 FE00 phags-pa letter reversed shaping subjoined ya @ Consonant additions for Sanskrit A869 PHAGS-PA LETTER TTA * Sanskrit @@ -22613,6 +23003,7 @@ A876 PHAGS-PA MARK SHAD A877 PHAGS-PA MARK DOUBLE SHAD * Tibetan x (tibetan mark nyis shad - 0F0E) +@~ Standardized Variation Sequences @@ A880 Saurashtra A8DF @ Various signs A880 SAURASHTRA SIGN ANUSVARA @@ -23264,7 +23655,7 @@ AADE TAI VIET SYMBOL HO HOI AADF TAI VIET SYMBOL KOI KOI * marks end of text in songs and poems @@ AAE0 Meetei Mayek Extensions AAFF -@+ The characters in this block are extensions for historical orthographies of Meetei and are not specified in the Manupuri Government order No. 1/2/78-SS/E. +@+ The characters in this block are extensions for historical orthographies of Meetei and are not specified in the Manipuri Government order No. 1/2/78-SS/E. @ Independent vowel signs AAE0 MEETEI MAYEK LETTER E AAE1 MEETEI MAYEK LETTER O @@ -23540,7 +23931,7 @@ D7FB HANGUL JONGSEONG PHIEUPH-THIEUTH @@+ @+ This block, despite its name, contains a number of unified CJK ideographs. Those characters are individually identified by annotations. @+ Subheaders identifying sources for subranges do not indicate required usage or preclude mappings to other sources. For example, many pronunciation variants from KS X 1001:1998 are also mapped to a J source. -@ Pronunciation variants from KS X 1001:1998 +@ Pronunciation variants from KS X 1001:1998 F900 CJK COMPATIBILITY IDEOGRAPH-F900 : 8C48 F901 CJK COMPATIBILITY IDEOGRAPH-F901 @@ -28731,7 +29122,7 @@ FFFF <not a character> 1110D CHAKMA LETTER CHAA = majaraa chaa 1110E CHAKMA LETTER JAA - = dvipadalaa haa + = dvipadalaa jaa 1110F CHAKMA LETTER JHAA = uraauraa jhaa 11110 CHAKMA LETTER NYAA @@ -32635,7 +33026,7 @@ FFFF <not a character> 1D208 GREEK VOCAL NOTATION SYMBOL-9 = Greek instrumental notation symbol-44 * vocal second sharp of G - * instrumental first sharp of e´ + * instrumental first sharp of e´ 1D209 GREEK VOCAL NOTATION SYMBOL-10 * vocal A * this is a modification of 039F and is therefore not the same as 03D8 @@ -32648,7 +33039,7 @@ FFFF <not a character> 1D20D GREEK VOCAL NOTATION SYMBOL-14 = Greek instrumental notation symbol-41 * vocal first sharp of B - * instrumental first sharp of d´ + * instrumental first sharp of d´ x (latin capital letter v - 0056) 1D20E GREEK VOCAL NOTATION SYMBOL-15 = Greek instrumental notation symbol-35 @@ -32680,16 +33071,16 @@ FFFF <not a character> 1D217 GREEK VOCAL NOTATION SYMBOL-24 * vocal second sharp of e 1D218 GREEK VOCAL NOTATION SYMBOL-50 - * vocal first sharp of g´ + * vocal first sharp of g´ 1D219 GREEK VOCAL NOTATION SYMBOL-51 - * vocal second sharp of g´ + * vocal second sharp of g´ 1D21A GREEK VOCAL NOTATION SYMBOL-52 - * vocal a´ + * vocal a´ 1D21B GREEK VOCAL NOTATION SYMBOL-53 - * vocal first sharp of a´ + * vocal first sharp of a´ 1D21C GREEK VOCAL NOTATION SYMBOL-54 = Greek instrumental notation symbol-20 - * vocal second sharp of a´ + * vocal second sharp of a´ * instrumental first sharp of d @ Ancient Greek instrumental notation 1D21D GREEK INSTRUMENTAL NOTATION SYMBOL-1 @@ -32737,37 +33128,37 @@ FFFF <not a character> 1D232 GREEK INSTRUMENTAL NOTATION SYMBOL-36 * instrumental second sharp of b 1D233 GREEK INSTRUMENTAL NOTATION SYMBOL-37 - * instrumental c´ + * instrumental c´ 1D234 GREEK INSTRUMENTAL NOTATION SYMBOL-38 - * instrumental first sharp of c´ + * instrumental first sharp of c´ 1D235 GREEK INSTRUMENTAL NOTATION SYMBOL-39 - * instrumental second sharp of c´ + * instrumental second sharp of c´ 1D236 GREEK INSTRUMENTAL NOTATION SYMBOL-40 - * instrumental d´ + * instrumental d´ 1D237 GREEK INSTRUMENTAL NOTATION SYMBOL-42 - * instrumental second sharp of d´ + * instrumental second sharp of d´ 1D238 GREEK INSTRUMENTAL NOTATION SYMBOL-43 - * instrumental e´ + * instrumental e´ 1D239 GREEK INSTRUMENTAL NOTATION SYMBOL-45 - * instrumental second sharp of e´ + * instrumental second sharp of e´ 1D23A GREEK INSTRUMENTAL NOTATION SYMBOL-47 - * instrumental first sharp of f´ + * instrumental first sharp of f´ * similar but not identical to 002F 1D23B GREEK INSTRUMENTAL NOTATION SYMBOL-48 - * instrumental second sharp of f´ + * instrumental second sharp of f´ * similar but not identical to 005C 1D23C GREEK INSTRUMENTAL NOTATION SYMBOL-49 - * instrumental g´ + * instrumental g´ 1D23D GREEK INSTRUMENTAL NOTATION SYMBOL-50 - * instrumental first sharp of g´ + * instrumental first sharp of g´ 1D23E GREEK INSTRUMENTAL NOTATION SYMBOL-51 - * instrumental second sharp of g´ + * instrumental second sharp of g´ 1D23F GREEK INSTRUMENTAL NOTATION SYMBOL-52 - * instrumental a´ + * instrumental a´ 1D240 GREEK INSTRUMENTAL NOTATION SYMBOL-53 - * instrumental first sharp of a´ + * instrumental first sharp of a´ 1D241 GREEK INSTRUMENTAL NOTATION SYMBOL-54 - * instrumental second sharp of a´ + * instrumental second sharp of a´ @ Further Greek musical notation symbols 1D242 COMBINING GREEK MUSICAL TRISEME x (metrical triseme - 23D7) @@ -35345,6 +35736,8 @@ FFFF <not a character> @ Dragon tiles 1F004 MAHJONG TILE RED DRAGON = hongzhong + ~ 1F004 FE0E text style + ~ 1F004 FE0F emoji style 1F005 MAHJONG TILE GREEN DRAGON = qingfa 1F006 MAHJONG TILE WHITE DRAGON @@ -35400,6 +35793,7 @@ FFFF <not a character> 1F02A MAHJONG TILE JOKER = baida 1F02B MAHJONG TILE BACK +@~ Standardized Variation Sequences @@ 1F030 Domino Tiles 1F09F @ Horizontal tiles 1F030 DOMINO TILE HORIZONTAL BACK @@ -35539,10 +35933,10 @@ FFFF <not a character> = chevalier, Ober, Ritter, cavall, cavaliere = knight of swords 1F0AD PLAYING CARD QUEEN OF SPADES - = dame, Dame, Königin, regina + = dame, Dame, Königin, regina = queen of swords 1F0AE PLAYING CARD KING OF SPADES - = roi, König, re + = roi, König, re = king of swords @ Hearts or cups 1F0B1 PLAYING CARD ACE OF HEARTS @@ -35802,7 +36196,7 @@ FFFF <not a character> = parking space (ARIB STD B24) 1F160 NEGATIVE CIRCLED LATIN CAPITAL LETTER Q 1F161 NEGATIVE CIRCLED LATIN CAPITAL LETTER R - = Raststätte (rest stop) + = Raststätte (rest stop) 1F162 NEGATIVE CIRCLED LATIN CAPITAL LETTER S = Stadtbahn (metropolitan railway) 1F163 NEGATIVE CIRCLED LATIN CAPITAL LETTER T @@ -35823,7 +36217,7 @@ FFFF <not a character> x (trade mark sign - 2122) # <super> 004D 0043 1F16B RAISED MD SIGN - = marque déposée + = marque déposée * used in Canada x (registered sign - 00AE) # <super> 004D 0044 @@ -35856,6 +36250,8 @@ FFFF <not a character> = blood type O 1F17F NEGATIVE SQUARED LATIN CAPITAL LETTER P = parking space empty-full (ARIB STD B24) + ~ 1F17F FE0E text style + ~ 1F17F FE0F emoji style 1F180 NEGATIVE SQUARED LATIN CAPITAL LETTER Q 1F181 NEGATIVE SQUARED LATIN CAPITAL LETTER R 1F182 NEGATIVE SQUARED LATIN CAPITAL LETTER S @@ -35927,6 +36323,7 @@ FFFF <not a character> 1F1FD REGIONAL INDICATOR SYMBOL LETTER X 1F1FE REGIONAL INDICATOR SYMBOL LETTER Y 1F1FF REGIONAL INDICATOR SYMBOL LETTER Z +@~ Standardized Variation Sequences @@ 1F200 Enclosed Ideographic Supplement 1F2FF @ Squared hiragana from ARIB STD B24 1F200 SQUARE HIRAGANA HOKA @@ -35975,6 +36372,8 @@ FFFF <not a character> 1F21A SQUARED CJK UNIFIED IDEOGRAPH-7121 = free broadcasting service = non-existence sign + ~ 1F21A FE0E text style + ~ 1F21A FE0F emoji style # <square> 7121 1F21B SQUARED CJK UNIFIED IDEOGRAPH-6599 = pay broadcasting service @@ -36039,6 +36438,8 @@ FFFF <not a character> 1F22F SQUARED CJK UNIFIED IDEOGRAPH-6307 = designated hitter = reserved sign + ~ 1F22F FE0E text style + ~ 1F22F FE0F emoji style # <square> 6307 1F230 SQUARED CJK UNIFIED IDEOGRAPH-8D70 = runner @@ -36111,6 +36512,7 @@ FFFF <not a character> 1F251 CIRCLED IDEOGRAPH ACCEPT = accept sign # <circle> 53EF +@~ Standardized Variation Sequences @@ 1F300 Miscellaneous Symbols and Pictographs 1F5FF @ Weather, landscape, and sky symbols 1F300 CYCLONE @@ -37326,7 +37728,7 @@ FFFF <not a character> @@ 2A700 CJK Unified Ideographs Extension C 2B734 @@ 2B740 CJK Unified Ideographs Extension D 2B81D @@ 2F800 CJK Compatibility Ideographs Supplement 2FA1F -@ Duplicate characters from CNS 11643-1992 +@ Duplicate characters from CNS 11643-1992 2F800 CJK COMPATIBILITY IDEOGRAPH-2F800 : 4E3D 2F801 CJK COMPATIBILITY IDEOGRAPH-2F801 diff --git a/gnu/usr.bin/perl/lib/unicore/NormalizationCorrections.txt b/gnu/usr.bin/perl/lib/unicore/NormalizationCorrections.txt index 61800b82adc..b53bb408a5b 100644 --- a/gnu/usr.bin/perl/lib/unicore/NormalizationCorrections.txt +++ b/gnu/usr.bin/perl/lib/unicore/NormalizationCorrections.txt @@ -1,10 +1,10 @@ -# NormalizationCorrections-6.1.0.txt -# Date: 2011-06-23, 00:46:00 GMT [KW, LI] +# NormalizationCorrections-6.2.0.txt +# Date: 2012-05-15, 22:25:00 GMT [KW, LI] # # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # The normalization stability policy of the Unicode Consortium @@ -46,3 +46,5 @@ F951;96FB;964B;3.2.0 # Corrigendum 3 2F91F;43AB;243AB;4.0.0 # Corrigendum 4 2F95F;7AAE;7AEE;4.0.0 # Corrigendum 4 2F9BF;4D57;45D7;4.0.0 # Corrigendum 4 + +# EOF diff --git a/gnu/usr.bin/perl/lib/unicore/PropList.txt b/gnu/usr.bin/perl/lib/unicore/PropList.txt index f9dcb2ae74a..9ce7eec9713 100644 --- a/gnu/usr.bin/perl/lib/unicore/PropList.txt +++ b/gnu/usr.bin/perl/lib/unicore/PropList.txt @@ -1,8 +1,8 @@ -# PropList-6.1.0.txt -# Date: 2011-11-30, 01:49:54 GMT [MD] +# PropList-6.2.0.txt +# Date: 2012-05-23, 20:34:59 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ diff --git a/gnu/usr.bin/perl/lib/unicore/PropValueAliases.txt b/gnu/usr.bin/perl/lib/unicore/PropValueAliases.txt index 2f7bde28ec8..d9048fb32f7 100644 --- a/gnu/usr.bin/perl/lib/unicore/PropValueAliases.txt +++ b/gnu/usr.bin/perl/lib/unicore/PropValueAliases.txt @@ -1,8 +1,8 @@ -# PropertyValueAliases-6.1.0.txt -# Date: 2011-12-07, 23:40:57 GMT [MD] +# PropertyValueAliases-6.2.0.txt +# Date: 2012-08-14, 16:05:11 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -73,6 +73,7 @@ age; 5.1 ; V5_1 age; 5.2 ; V5_2 age; 6.0 ; V6_0 age; 6.1 ; V6_1 +age; 6.2 ; V6_2 age; NA ; Unassigned # Alphabetic (Alpha) @@ -382,7 +383,8 @@ ccc; 118; CCC118 ; CCC118 ccc; 122; CCC122 ; CCC122 ccc; 129; CCC129 ; CCC129 ccc; 130; CCC130 ; CCC130 -ccc; 132; CCC133 ; CCC133 +ccc; 132; CCC132 ; CCC132 +ccc; 133; CCC133 ; CCC133 # RESERVED ccc; 200; ATBL ; Attached_Below_Left ccc; 202; ATB ; Attached_Below ccc; 214; ATA ; Attached_Above @@ -592,6 +594,7 @@ GCB; LF ; LF GCB; LV ; LV GCB; LVT ; LVT GCB; PP ; Prepend +GCB; RI ; Regional_Indicator GCB; SM ; SpacingMark GCB; T ; T GCB; V ; V @@ -862,6 +865,7 @@ lb ; OP ; Open_Punctuation lb ; PO ; Postfix_Numeric lb ; PR ; Prefix_Numeric lb ; QU ; Quotation +lb ; RI ; Regional_Indicator lb ; SA ; Complex_Context lb ; SG ; Surrogate lb ; SP ; Space @@ -880,10 +884,6 @@ LOE; Y ; Yes ; T Lower; N ; No ; F ; False Lower; Y ; Yes ; T ; True -# Lowercase_Mapping (lc) - -# @missing: 0000..10FFFF; Lowercase_Mapping; <code point> - # Math (Math) Math; N ; No ; F ; False @@ -1159,10 +1159,6 @@ SD ; Y ; Yes ; T Term; N ; No ; F ; False Term; Y ; Yes ; T ; True -# Titlecase_Mapping (tc) - -# @missing: 0000..10FFFF; Titlecase_Mapping; <code point> - # Unicode_1_Name (na1) # @missing: 0000..10FFFF; Unicode_1_Name; <none> @@ -1177,10 +1173,6 @@ UIdeo; Y ; Yes ; T Upper; N ; No ; F ; False Upper; Y ; Yes ; T ; True -# Uppercase_Mapping (uc) - -# @missing: 0000..10FFFF; Uppercase_Mapping; <code point> - # Variation_Selector (VS) VS ; N ; No ; F ; False @@ -1205,6 +1197,7 @@ WB ; ML ; MidLetter WB ; MN ; MidNum WB ; NL ; Newline WB ; NU ; Numeric +WB ; RI ; Regional_Indicator WB ; XX ; Other # XID_Continue (XIDC) diff --git a/gnu/usr.bin/perl/lib/unicore/PropertyAliases.txt b/gnu/usr.bin/perl/lib/unicore/PropertyAliases.txt index f891ff254e4..81a063b1f61 100644 --- a/gnu/usr.bin/perl/lib/unicore/PropertyAliases.txt +++ b/gnu/usr.bin/perl/lib/unicore/PropertyAliases.txt @@ -1,8 +1,8 @@ -# PropertyAliases-6.1.0.txt -# Date: 2011-12-07, 23:40:57 GMT [MD] +# PropertyAliases-6.2.0.txt +# Date: 2012-05-20, 17:41:20 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -57,7 +57,6 @@ nv ; Numeric_Value # ================================================ # String Properties # ================================================ -bmg ; Bidi_Mirroring_Glyph cf ; Case_Folding cjkCompatibilityVariant ; kCompatibilityVariant dm ; Decomposition_Mapping @@ -74,6 +73,7 @@ uc ; Uppercase_Mapping # ================================================ # Miscellaneous Properties # ================================================ +bmg ; Bidi_Mirroring_Glyph cjkIICore ; kIICore cjkIRG_GSource ; kIRG_GSource cjkIRG_HSource ; kIRG_HSource diff --git a/gnu/usr.bin/perl/lib/unicore/README.perl b/gnu/usr.bin/perl/lib/unicore/README.perl index 88152d2ccdc..d55bfed0156 100644 --- a/gnu/usr.bin/perl/lib/unicore/README.perl +++ b/gnu/usr.bin/perl/lib/unicore/README.perl @@ -1,27 +1,49 @@ -The *.txt files were copied from +# Perl should compile and reasonably run any version of Unicode. That doesn't +# mean that the test suite will run without showing errors. A few of the +# very-Unicode specific test files have been modified to account for different +# versions, but most have not. For example, some tests use characters that +# aren't encoded in all Unicode versions; others have hard-coded the General +# Categories that were correct at the time the test was written. Perl itself +# will not compile under Unicode releases prior to 3.0 without a simple change to +# Unicode::Normalize. mktables contains instructions for this, as well as other +# hints for using older Unicode versions. - ftp://www.unicode.org/Public/UNIDATA +# The *.txt files were copied from -with subdirectories 'extracted' and 'auxiliary' +# ftp://www.unicode.org/Public/UNIDATA -The Unihan files were not included due to space considerations. Also NOT -included were any *.html files. It is possible to add the Unihan files, and -edit mktables (see instructions near its beginning) to look at them. +# (which always points to the latest version) with subdirectories 'extracted' and +# 'auxiliary'. Older versions are located under Public with an appropriate name. -The file 'version' should exist and be a single line with the Unicode version, -like: -5.2.0 +# The Unihan files were not included due to space considerations. Also NOT +# included were any *.html files. It is possible to add the Unihan files, and +# edit mktables (see instructions near its beginning) to look at them. -To be 8.3 filesystem friendly, the names of some of the input files have been -changed from the values that are in the Unicode DB. Not all of the Test files -are currently used, so may not be present, so some of the mv's can fail. The -.html Test files are not touched. +# The file named 'version' should exist and be a single line with the Unicode +# version, like: +# 5.2.0 + +# To be 8.3 filesystem friendly, the names of some of the input files have been +# changed from the values that are in the Unicode DB. Not all of the Test +# files are currently used, so may not be present, so some of the mv's can +# fail. The .html Test files are not touched. mv PropertyValueAliases.txt PropValueAliases.txt mv NamedSequencesProv.txt NamedSqProv.txt +mv NormalizationTest.txt NormTest.txt mv DerivedAge.txt DAge.txt mv DerivedCoreProperties.txt DCoreProperties.txt mv DerivedNormalizationProps.txt DNormalizationProps.txt + +# Some early releases don't have the extracted directory, and hence these files +# should be moved to it. +mkdir extracted 2>/dev/null +mv DerivedBidiClass.txt DerivedBinaryProperties.txt extracted 2>/dev/null +mv DerivedCombiningClass.txt DerivedDecompositionType.txt extracted 2>/dev/null +mv DerivedEastAsianWidth.txt DerivedGeneralCategory.txt extracted 2>/dev/null +mv DerivedJoiningGroup.txt DerivedJoiningType.txt extracted 2>/dev/null +mv DerivedLineBreak.txt DerivedNumericType.txt DerivedNumericValues.txt extracted 2>/dev/null + mv extracted/DerivedBidiClass.txt extracted/DBidiClass.txt mv extracted/DerivedBinaryProperties.txt extracted/DBinaryProperties.txt mv extracted/DerivedCombiningClass.txt extracted/DCombiningClass.txt @@ -39,8 +61,8 @@ mv auxiliary/LineBreakTest.txt auxiliary/LBTest.txt mv auxiliary/SentenceBreakTest.txt auxiliary/SBTest.txt mv auxiliary/WordBreakTest.txt auxiliary/WBTest.txt -If you have the Unihan database (5.2 and above), you should also do the -following: +# If you have the Unihan database (5.2 and above), you should also do the +# following: mv Unihan_DictionaryIndices.txt UnihanIndicesDictionary.txt mv Unihan_DictionaryLikeData.txt UnihanDataDictionaryLike.txt @@ -51,76 +73,74 @@ mv Unihan_RadicalStrokeCounts.txt UnihanRadicalStrokeCounts.txt mv Unihan_Readings.txt UnihanReadings.txt mv Unihan_Variants.txt UnihanVariants.txt -If you download everything, the names of files that are not used by mktables -are not changed by the above, and will not work correctly as-is on 8.3 -filesystems. - -mktables is used to generate the tables used by the rest of Perl. It will warn -you about any *.txt files in the directory substructure that it doesn't know -about. You should remove any so-identified, or edit mktables to add them to -its lists to process. You can run - - mktables -globlist - -to have it try to process these tables generically. - -FOR PUMPKINS - -The files are inter-related. If you take the latest UnicodeData.txt, for -example, but leave the older versions of other files, there can be subtle -problems. So get everything available from Unicode, and delete those which -aren't needed. - -When moving to a new version of Unicode, you need to update 'version' by hand - - p4 edit version - ... - -You should look in the Unicode release notes (which are probably towards the -bottom of http://www.unicode.org/reports/tr44/) to see if any properties have -newly been moved to be Obsolete, Deprecated, or Stabilized. The full names for -these should be added to the respective lists near the beginning of mktables, -using an 'if' to add them for just this Unicode version going forward, so that -mktables can continue to be used for earlier Unicode versions. - -When putting out a new Perl release, think about if any of the Deprecated -properties should be moved to Suppressed. - -perlrecharclass.pod has a list of all the characters that are white space, -which needs to be updated if there are changes. A quick way to check if there -have been changes would be to see if the number of such characters listed in -perluniprops.pod (generated by running mktables) for the property -\p{White_Space} is no longer 26. Further investigation would then be necessary -to classify the new characters as horizontal and vertical. - -The code in regexec.c for the \X match construct is intimately tied to the -regular expression in UAX #29 (http://www.unicode.org/reports/tr29/). You -should see if it has changed, and if so regexec.c should be modified. The -current one is -( CRLF -| Prepend* ( Hangul-syllable | !Control ) - ( Grapheme_Extend | Spacing_Mark)* -| . ) - -mktables has many checks to warn you if there are unexpected or novel things -that it doesn't know how to handle. - -perl.pod should be changed so that it gives the new name (which includes the -Unicode release number) for perluniprops.pod - -Module::CoreList should be changed to include the new release - -Also, you should regen l1_char_class_tab.h, by - -perl regen/mk_L_charclass.pl - -and, regen charclass_invlists.h by - -perl regen/mk_invlists.pl - -Finally: - - p4 submit - --- -jhi@iki.fi; updated by nick@ccl4.org, public@khwilliamson.com +# If you download everything, the names of files that are not used by mktables +# are not changed by the above, and hence may not work correctly as-is on 8.3 +# filesystems. + +# mktables is used to generate the tables used by the rest of Perl. It will +# warn you about any *.txt files in the directory substructure that it doesn't +# know about. You should remove any so-identified, or edit mktables to add +# them to its lists to process. You can run +# +# mktables -globlist +# +#to have it try to process these tables generically. +# +# FOR PUMPKINS +# +# The files are inter-related. If you take the latest UnicodeData.txt, for +# example, but leave the older versions of other files, there can be subtle +# problems. So get everything available from Unicode, and delete those which +# aren't needed. +# +# When moving to a new version of Unicode, you need to update 'version' by hand +# +# p4 edit version +# ... +# +# You should look in the Unicode release notes (which are probably towards the +# bottom of http://www.unicode.org/reports/tr44/) to see if any properties have +# newly been moved to be Obsolete, Deprecated, or Stabilized. The full names +# for these should be added to the respective lists near the beginning of +# mktables, using an 'if' to add them for just this Unicode version going +# forward, so that mktables can continue to be used for earlier Unicode +# versions. +# +# When putting out a new Perl release, think about if any of the Deprecated +# properties should be moved to Suppressed. +# +# perlrecharclass.pod has a list of all the characters that are white space, +# which needs to be updated if there are changes. A quick way to check if +# there have been changes would be to see if the number of such characters +# listed in perluniprops.pod (generated by running mktables) for the property +# \p{White_Space} is no longer 26. Further investigation would then be +# necessary to classify the new characters as horizontal and vertical. +# +# The code in regexec.c for the \X match construct is intimately tied to the +# regular expression in UAX #29 (http://www.unicode.org/reports/tr29/). You +# should see if it has changed, and if so regexec.c should be modified. The +# current one is +# ( CRLF +# | Prepend* ( Hangul-syllable | !Control ) +# ( Grapheme_Extend | Spacing_Mark)* +# | . ) +# +# mktables has many checks to warn you if there are unexpected or novel things +# that it doesn't know how to handle. +# +# Module::CoreList should be changed to include the new release +# +# Also, you should regen l1_char_class_tab.h, by +# +# perl regen/mk_L_charclass.pl +# +# and, regen charclass_invlists.h by +# +# perl regen/mk_invlists.pl +# +# Finally: +# +# p4 submit +# +# -- +# jhi@iki.fi; updated by nick@ccl4.org, public@khwilliamson.com diff --git a/gnu/usr.bin/perl/lib/unicore/ReadMe.txt b/gnu/usr.bin/perl/lib/unicore/ReadMe.txt index 9fd93d89604..370cbd0801e 100644 --- a/gnu/usr.bin/perl/lib/unicore/ReadMe.txt +++ b/gnu/usr.bin/perl/lib/unicore/ReadMe.txt @@ -1,4 +1,4 @@ -# Date: 2012-01-26, 22:03:00 GMT [KW] +# Date: 2012-09-24, 22:40:00 GMT [KW] # # Unicode Character Database # Copyright (c) 1991-2012 Unicode, Inc. @@ -10,4 +10,6 @@ # This directory contains final data files -for the Unicode Character Database (UCD) for Unicode 6.1.0. +for the Unicode Character Database (UCD) for Unicode 6.2.0. + + diff --git a/gnu/usr.bin/perl/lib/unicore/ScriptExtensions.txt b/gnu/usr.bin/perl/lib/unicore/ScriptExtensions.txt index 301ccc21f45..5bff07ece00 100644 --- a/gnu/usr.bin/perl/lib/unicore/ScriptExtensions.txt +++ b/gnu/usr.bin/perl/lib/unicore/ScriptExtensions.txt @@ -1,13 +1,12 @@ -# ScriptExtensions-6.1.0.txt -# Date: 2011-12-05, 22:51:22 GMT [MD] +# ScriptExtensions-6.2.0.txt +# Date: 2012-08-13, 20:52:17 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # The Script_Extensions property indicates which characters are commonly used -# with a limited number of scripts, but with more than one. -# The property is provisional: values are expected to change over time as more information becomes available. +# with more than one script, but with a limited number of scripts. # For each code point, there is one or more property values. Each such value is a Script property value. # For more information, see: # UAX #24: http://www.unicode.org/reports/tr24/ and @@ -24,6 +23,38 @@ # ================================================ +# Script_Extensions=Deva + +1CD0..1CD2 ; Deva # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Deva # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; Deva # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; Deva # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Deva # Mn VEDIC SIGN TIRYAK +1CF2..1CF3 ; Deva # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; Deva # Mn VEDIC TONE CANDRA ABOVE + +# Total code points: 28 + +# ================================================ + +# Script_Extensions=Grek + +0342 ; Grek # Mn COMBINING GREEK PERISPOMENI +0345 ; Grek # Mn COMBINING GREEK YPOGEGRAMMENI +1DC0..1DC1 ; Grek # Mn [2] COMBINING DOTTED GRAVE ACCENT..COMBINING DOTTED ACUTE ACCENT + +# Total code points: 4 + +# ================================================ + +# Script_Extensions=Latn + +0363..036F ; Latn # Mn [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X + +# Total code points: 13 + +# ================================================ + # Script_Extensions=Arab Syrc 064B..0655 ; Arab Syrc # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW @@ -69,6 +100,22 @@ FDFD ; Arab Thaa # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHE # ================================================ +# Script_Extensions=Cyrl Latn + +0485..0486 ; Cyrl Latn # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Deva Latn + +0951..0952 ; Deva Latn # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA + +# Total code points: 2 + +# ================================================ + # Script_Extensions=Hira Kana 3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF @@ -123,17 +170,17 @@ FF9E..FF9F ; Hira Kana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFW # ================================================ -# Script_Extensions=Beng Deva Guru Orya +# Script_Extensions=Buhd Hano Tagb Tglg -0964..0965 ; Beng Deva Guru Orya # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +1735..1736 ; Buhd Hano Tagb Tglg # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION # Total code points: 2 # ================================================ -# Script_Extensions=Buhd Hano Tagb Tglg +# Script_Extensions=Beng Deva Guru Orya Takr -1735..1736 ; Buhd Hano Tagb Tglg # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +0964..0965 ; Beng Deva Guru Orya Takr # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA # Total code points: 2 diff --git a/gnu/usr.bin/perl/lib/unicore/Scripts.txt b/gnu/usr.bin/perl/lib/unicore/Scripts.txt index 2516f889d66..1a8e7229cc6 100644 --- a/gnu/usr.bin/perl/lib/unicore/Scripts.txt +++ b/gnu/usr.bin/perl/lib/unicore/Scripts.txt @@ -1,8 +1,8 @@ -# Scripts-6.1.0.txt -# Date: 2011-11-27, 05:10:50 GMT [MD] +# Scripts-6.2.0.txt +# Date: 2012-06-04, 17:21:29 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -146,7 +146,7 @@ 208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS -20A0..20B9 ; Common # Sc [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN +20A0..20BA ; Common # Sc [27] EURO-CURRENCY SIGN..TURKISH LIRA SIGN 2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2102 ; Common # L& DOUBLE-STRUCK CAPITAL C 2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA @@ -576,7 +576,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 6412 +# Total code points: 6413 # ================================================ @@ -760,7 +760,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU 061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK 0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE 0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH -0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS +0656..065F ; Arabic # Mn [10] ARABIC SUBSCRIPT ALEF..ARABIC WAVY HAMZA BELOW 066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR 066E..066F ; Arabic # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF 0671..06D3 ; Arabic # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE @@ -827,7 +827,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1234 +# Total code points: 1235 # ================================================ @@ -1477,7 +1477,6 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE 0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X 0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA 064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW -065F ; Inherited # Mn ARABIC WAVY HAMZA BELOW 0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF 0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA 1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -1504,7 +1503,7 @@ FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CON 1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 524 +# Total code points: 523 # ================================================ diff --git a/gnu/usr.bin/perl/lib/unicore/SpecialCasing.txt b/gnu/usr.bin/perl/lib/unicore/SpecialCasing.txt index d650b6d9dcd..994043f01bf 100644 --- a/gnu/usr.bin/perl/lib/unicore/SpecialCasing.txt +++ b/gnu/usr.bin/perl/lib/unicore/SpecialCasing.txt @@ -1,8 +1,8 @@ -# SpecialCasing-6.1.0.txt -# Date: 2011-11-27, 05:10:51 GMT [MD] +# SpecialCasing-6.2.0.txt +# Date: 2012-05-23, 20:35:15 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # diff --git a/gnu/usr.bin/perl/lib/unicore/StandardizedVariants.txt b/gnu/usr.bin/perl/lib/unicore/StandardizedVariants.txt index 331b831e32a..179726550e5 100644 --- a/gnu/usr.bin/perl/lib/unicore/StandardizedVariants.txt +++ b/gnu/usr.bin/perl/lib/unicore/StandardizedVariants.txt @@ -1,5 +1,5 @@ -# StandardizedVariants-6.1.0.txt -# Date: 2011-11-10, 20:28:00 GMT [KW, LI] +# StandardizedVariants-6.2.0.txt +# Date: 2012-05-15, 21:53:00 GMT [KW, LI] # # Specification of the variation sequences that are defined in the # Unicode Standard. @@ -7,7 +7,7 @@ # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # Standardized variation sequences are defined in this file. @@ -21,7 +21,7 @@ # # For more information on standardized variation sequences, # see Section 16.4, Variation Selectors, -# in The Unicode Standard, Version 6.1. +# in The Unicode Standard, Version 6.2. # # For more information on the Ideographic Variation Database, # see http://www.unicode.org/ivd/ diff --git a/gnu/usr.bin/perl/lib/unicore/UnicodeData.txt b/gnu/usr.bin/perl/lib/unicore/UnicodeData.txt index 9f204050c6b..086379eb4f3 100644 --- a/gnu/usr.bin/perl/lib/unicore/UnicodeData.txt +++ b/gnu/usr.bin/perl/lib/unicore/UnicodeData.txt @@ -7190,6 +7190,7 @@ 20B7;SPESMILO SIGN;Sc;0;ET;;;;;N;;;;; 20B8;TENGE SIGN;Sc;0;ET;;;;;N;;;;; 20B9;INDIAN RUPEE SIGN;Sc;0;ET;;;;;N;;;;; +20BA;TURKISH LIRA SIGN;Sc;0;ET;;;;;N;;;;; 20D0;COMBINING LEFT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING LEFT HARPOON ABOVE;;;; 20D1;COMBINING RIGHT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING RIGHT HARPOON ABOVE;;;; 20D2;COMBINING LONG VERTICAL LINE OVERLAY;Mn;1;NSM;;;;;N;NON-SPACING LONG VERTICAL BAR OVERLAY;;;; @@ -18703,8 +18704,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1242F;CUNEIFORM NUMERIC SIGN THREE SHARU VARIANT FORM;Nl;0;L;;;;3;N;;;;; 12430;CUNEIFORM NUMERIC SIGN FOUR SHARU;Nl;0;L;;;;4;N;;;;; 12431;CUNEIFORM NUMERIC SIGN FIVE SHARU;Nl;0;L;;;;5;N;;;;; -12432;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS DISH;Nl;0;L;;;;;N;;;;; -12433;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS MIN;Nl;0;L;;;;;N;;;;; +12432;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS DISH;Nl;0;L;;;;216000;N;;;;; +12433;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS MIN;Nl;0;L;;;;432000;N;;;;; 12434;CUNEIFORM NUMERIC SIGN ONE BURU;Nl;0;L;;;;1;N;;;;; 12435;CUNEIFORM NUMERIC SIGN TWO BURU;Nl;0;L;;;;2;N;;;;; 12436;CUNEIFORM NUMERIC SIGN THREE BURU;Nl;0;L;;;;3;N;;;;; @@ -18739,8 +18740,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 12453;CUNEIFORM NUMERIC SIGN FOUR BAN2 VARIANT FORM;Nl;0;L;;;;4;N;;;;; 12454;CUNEIFORM NUMERIC SIGN FIVE BAN2;Nl;0;L;;;;5;N;;;;; 12455;CUNEIFORM NUMERIC SIGN FIVE BAN2 VARIANT FORM;Nl;0;L;;;;5;N;;;;; -12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;;N;;;;; -12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;;N;;;;; +12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;-1;N;;;;; +12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;-1;N;;;;; 12458;CUNEIFORM NUMERIC SIGN ONE ESHE3;Nl;0;L;;;;1;N;;;;; 12459;CUNEIFORM NUMERIC SIGN TWO ESHE3;Nl;0;L;;;;2;N;;;;; 1245A;CUNEIFORM NUMERIC SIGN ONE THIRD DISH;Nl;0;L;;;;1/3;N;;;;; diff --git a/gnu/usr.bin/perl/lib/unicore/auxiliary/GCBTest.txt b/gnu/usr.bin/perl/lib/unicore/auxiliary/GCBTest.txt index 33b859cbff5..90e15fed3ef 100644 --- a/gnu/usr.bin/perl/lib/unicore/auxiliary/GCBTest.txt +++ b/gnu/usr.bin/perl/lib/unicore/auxiliary/GCBTest.txt @@ -1,8 +1,8 @@ -# GraphemeBreakTest-6.1.0.txt -# Date: 2011-12-07, 17:54:39 GMT [MD] +# GraphemeBreakTest-6.2.0.txt +# Date: 2012-08-22, 12:41:15 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -15,7 +15,7 @@ # × wherever there is not. # <comment> the format can change, but currently it shows: # - the sample character name -# - (x) the Grapheme_Break property* for the sample character +# - (x) the Grapheme_Cluster_Break property value for the sample character # - [x] the rule that determines whether there is a break or not # # These samples may be extended or changed in the future. @@ -42,6 +42,8 @@ ÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 0020 ÷ D800 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] @@ -68,6 +70,8 @@ ÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] ÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 000D ÷ D800 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3] @@ -94,6 +98,8 @@ ÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] ÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 000A ÷ D800 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3] @@ -120,6 +126,8 @@ ÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] ÷ 0001 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 0001 ÷ D800 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3] @@ -146,6 +154,8 @@ ÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 0300 ÷ D800 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] @@ -172,6 +182,8 @@ ÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 0903 ÷ D800 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] @@ -198,6 +210,8 @@ ÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 1100 ÷ D800 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] @@ -224,6 +238,8 @@ ÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 1160 ÷ D800 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] @@ -250,6 +266,8 @@ ÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 11A8 ÷ D800 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] @@ -276,6 +294,8 @@ ÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ AC00 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] @@ -302,10 +322,40 @@ ÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ AC01 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ AC01 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1F1E6 ÷ D800 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ D800 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -328,6 +378,8 @@ ÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 0378 ÷ D800 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] @@ -354,8 +406,23 @@ ÷ D800 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ D800 ÷ AC01 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ D800 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ D800 ÷ 1F1E6 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] ÷ D800 ÷ 0378 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] ÷ D800 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ D800 ÷ D800 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3] ÷ D800 ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] -# Lines: 338 +÷ 0061 ÷ 1F1E6 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 1F1F7 × 1F1FA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) ÷ [0.3] +÷ 1F1F7 × 1F1FA × 1F1F8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) ÷ [0.3] +÷ 1F1F7 × 1F1FA × 1F1F8 × 1F1EA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER E (Regional_Indicator) ÷ [0.3] +÷ 1F1F7 × 1F1FA ÷ 200B ÷ 1F1F8 × 1F1EA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) ÷ [5.0] ZERO WIDTH SPACE (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER E (Regional_Indicator) ÷ [0.3] +÷ 1F1E6 × 1F1E7 × 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3] +÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] ZERO WIDTH JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3] +÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) × [9.0] ZERO WIDTH JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3] +÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (Extend) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3] +÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +# +# Lines: 402 +# +# EOF diff --git a/gnu/usr.bin/perl/lib/unicore/auxiliary/GraphemeBreakProperty.txt b/gnu/usr.bin/perl/lib/unicore/auxiliary/GraphemeBreakProperty.txt index d3f480da599..948faa9d5c8 100644 --- a/gnu/usr.bin/perl/lib/unicore/auxiliary/GraphemeBreakProperty.txt +++ b/gnu/usr.bin/perl/lib/unicore/auxiliary/GraphemeBreakProperty.txt @@ -1,8 +1,8 @@ -# GraphemeBreakProperty-6.1.0.txt -# Date: 2011-12-05, 16:44:15 GMT [MD] +# GraphemeBreakProperty-6.2.0.txt +# Date: 2012-08-13, 19:12:02 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -308,6 +308,12 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 # ================================================ +1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z + +# Total code points: 26 + +# ================================================ + 0903 ; SpacingMark # Mc DEVANAGARI SIGN VISARGA 093B ; SpacingMark # Mc DEVANAGARI VOWEL SIGN OOE 093E..0940 ; SpacingMark # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II diff --git a/gnu/usr.bin/perl/lib/unicore/auxiliary/SentenceBreakProperty.txt b/gnu/usr.bin/perl/lib/unicore/auxiliary/SentenceBreakProperty.txt index a5eb0b71c0c..f29dc4e1993 100644 --- a/gnu/usr.bin/perl/lib/unicore/auxiliary/SentenceBreakProperty.txt +++ b/gnu/usr.bin/perl/lib/unicore/auxiliary/SentenceBreakProperty.txt @@ -1,8 +1,8 @@ -# SentenceBreakProperty-6.1.0.txt -# Date: 2011-11-27, 05:10:50 GMT [MD] +# SentenceBreakProperty-6.2.0.txt +# Date: 2012-05-23, 20:35:14 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ diff --git a/gnu/usr.bin/perl/lib/unicore/auxiliary/WordBreakProperty.txt b/gnu/usr.bin/perl/lib/unicore/auxiliary/WordBreakProperty.txt index 7f3225c6a8c..2caa16b46bc 100644 --- a/gnu/usr.bin/perl/lib/unicore/auxiliary/WordBreakProperty.txt +++ b/gnu/usr.bin/perl/lib/unicore/auxiliary/WordBreakProperty.txt @@ -1,8 +1,8 @@ -# WordBreakProperty-6.1.0.txt -# Date: 2011-11-27, 05:10:51 GMT [MD] +# WordBreakProperty-6.2.0.txt +# Date: 2012-08-13, 19:12:09 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -395,6 +395,12 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 # ================================================ +1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z + +# Total code points: 26 + +# ================================================ + 00AD ; Format # Cf SOFT HYPHEN 0600..0604 ; Format # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 06DD ; Format # Cf ARABIC END OF AYAH diff --git a/gnu/usr.bin/perl/lib/unicore/extracted/DBidiClass.txt b/gnu/usr.bin/perl/lib/unicore/extracted/DBidiClass.txt index 270a87e847e..eac65b659e9 100644 --- a/gnu/usr.bin/perl/lib/unicore/extracted/DBidiClass.txt +++ b/gnu/usr.bin/perl/lib/unicore/extracted/DBidiClass.txt @@ -1,8 +1,8 @@ -# DerivedBidiClass-6.1.0.txt -# Date: 2011-12-11, 18:26:53 GMT [MD] +# DerivedBidiClass-6.2.0.txt +# Date: 2012-05-20, 00:42:30 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -807,8 +807,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] <private-use-F0000>..<private-use-FFFFD> 100000..10FFFD; L # Co [65534] <private-use-100000>..<private-use-10FFFD> -# The above property value applies to 858960 code points not listed here. -# Total code points: 1098531 +# The above property value applies to 858959 code points not listed here. +# Total code points: 1098530 # ================================================ @@ -971,7 +971,7 @@ FF0D ; ES # Pd FULLWIDTH HYPHEN-MINUS 0E3F ; ET # Sc THAI CURRENCY SYMBOL BAHT 17DB ; ET # Sc KHMER CURRENCY SYMBOL RIEL 2030..2034 ; ET # Po [5] PER MILLE SIGN..TRIPLE PRIME -20A0..20B9 ; ET # Sc [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN +20A0..20BA ; ET # Sc [27] EURO-CURRENCY SIGN..TURKISH LIRA SIGN 212E ; ET # So ESTIMATED SYMBOL 2213 ; ET # Sm MINUS-OR-PLUS SIGN A838 ; ET # Sc NORTH INDIC RUPEE MARK @@ -985,7 +985,7 @@ FF05 ; ET # Po FULLWIDTH PERCENT SIGN FFE0..FFE1 ; ET # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN -# Total code points: 65 +# Total code points: 66 # ================================================ diff --git a/gnu/usr.bin/perl/lib/unicore/extracted/DBinaryProperties.txt b/gnu/usr.bin/perl/lib/unicore/extracted/DBinaryProperties.txt index 6d23c068a92..815904296df 100644 --- a/gnu/usr.bin/perl/lib/unicore/extracted/DBinaryProperties.txt +++ b/gnu/usr.bin/perl/lib/unicore/extracted/DBinaryProperties.txt @@ -1,8 +1,8 @@ -# DerivedBinaryProperties-6.1.0.txt -# Date: 2011-07-25, 00:54:10 GMT [MD] +# DerivedBinaryProperties-6.2.0.txt +# Date: 2012-05-23, 20:34:43 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ diff --git a/gnu/usr.bin/perl/lib/unicore/extracted/DCombiningClass.txt b/gnu/usr.bin/perl/lib/unicore/extracted/DCombiningClass.txt index 33495d2c507..36aed6ae62c 100644 --- a/gnu/usr.bin/perl/lib/unicore/extracted/DCombiningClass.txt +++ b/gnu/usr.bin/perl/lib/unicore/extracted/DCombiningClass.txt @@ -1,8 +1,8 @@ -# DerivedCombiningClass-6.1.0.txt -# Date: 2011-12-05, 16:44:07 GMT [MD] +# DerivedCombiningClass-6.2.0.txt +# Date: 2012-08-13, 19:56:56 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -718,7 +718,7 @@ 208D ; 0 # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; 0 # Pe SUBSCRIPT RIGHT PARENTHESIS 2090..209C ; 0 # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T -20A0..20B9 ; 0 # Sc [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN +20A0..20BA ; 0 # Sc [27] EURO-CURRENCY SIGN..TURKISH LIRA SIGN 20DD..20E0 ; 0 # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E2..20E4 ; 0 # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE 2100..2101 ; 0 # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT @@ -1514,7 +1514,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] <private-use-F0000>..<private-use-FFFFD> 100000..10FFFD; 0 # Co [65534] <private-use-100000>..<private-use-10FFFD> -# The above property value applies to 866463 code points not listed here. +# The above property value applies to 866462 code points not listed here. # Total code points: 1113459 # ================================================ @@ -1893,7 +1893,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=CCC133 +# Canonical_Combining_Class=CCC132 0F74 ; 132 # Mn TIBETAN VOWEL SIGN U diff --git a/gnu/usr.bin/perl/lib/unicore/extracted/DDecompositionType.txt b/gnu/usr.bin/perl/lib/unicore/extracted/DDecompositionType.txt index 7a40724f0cd..cb66baf3be2 100644 --- a/gnu/usr.bin/perl/lib/unicore/extracted/DDecompositionType.txt +++ b/gnu/usr.bin/perl/lib/unicore/extracted/DDecompositionType.txt @@ -1,8 +1,8 @@ -# DerivedDecompositionType-6.1.0.txt -# Date: 2011-07-25, 00:54:13 GMT [MD] +# DerivedDecompositionType-6.2.0.txt +# Date: 2012-05-23, 20:34:46 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ diff --git a/gnu/usr.bin/perl/lib/unicore/extracted/DEastAsianWidth.txt b/gnu/usr.bin/perl/lib/unicore/extracted/DEastAsianWidth.txt index f55967a2451..5d76aa63e33 100644 --- a/gnu/usr.bin/perl/lib/unicore/extracted/DEastAsianWidth.txt +++ b/gnu/usr.bin/perl/lib/unicore/extracted/DEastAsianWidth.txt @@ -1,8 +1,8 @@ -# DerivedEastAsianWidth-6.1.0.txt -# Date: 2011-11-27, 05:10:22 GMT [MD] +# DerivedEastAsianWidth-6.2.0.txt +# Date: 2012-05-20, 00:42:33 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -519,9 +519,7 @@ 10FB ; N # Po GEORGIAN PARAGRAPH SEPARATOR 10FC ; N # Lm MODIFIER LETTER GEORGIAN NAR 10FD..10FF ; N # Lo [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN -1160..11A2 ; N # Lo [67] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA -11A8..11F9 ; N # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH -1200..1248 ; N # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA +1160..1248 ; N # Lo [233] HANGUL JUNGSEONG FILLER..ETHIOPIC SYLLABLE QWA 124A..124D ; N # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; N # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; N # Lo ETHIOPIC SYLLABLE QHWA @@ -779,7 +777,7 @@ 2090..209C ; N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20A0..20A8 ; N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN 20AA..20AB ; N # Sc [2] NEW SHEQEL SIGN..DONG SIGN -20AD..20B9 ; N # Sc [13] KIP SIGN..INDIAN RUPEE SIGN +20AD..20BA ; N # Sc [14] KIP SIGN..TURKISH LIRA SIGN 20D0..20DC ; N # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; N # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E1 ; N # Mn COMBINING LEFT RIGHT ARROW ABOVE @@ -1189,6 +1187,8 @@ ABEB ; N # Po MEETEI MAYEK CHEIKHEI ABEC ; N # Mc MEETEI MAYEK LUM IYEK ABED ; N # Mn MEETEI MAYEK APUN IYEK ABF0..ABF9 ; N # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +D7B0..D7C6 ; N # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; N # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH FB00..FB06 ; N # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; N # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH FB1D ; N # Lo HEBREW LETTER YOD WITH HIRIQ @@ -1466,8 +1466,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 782918 code points not listed here. -# Total code points: 801811 +# The above property value applies to 782917 code points not listed here. +# Total code points: 801894 # ================================================ @@ -1697,8 +1697,6 @@ FFED..FFEE ; H # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE # East_Asian_Width=Wide 1100..115F ; W # Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER -11A3..11A7 ; W # Lo [5] HANGUL JUNGSEONG A-EU..HANGUL JUNGSEONG O-YAE -11FA..11FF ; W # Lo [6] HANGUL JONGSEONG KIYEOK-NIEUN..HANGUL JONGSEONG SSANGNIEUN 2329 ; W # Ps LEFT-POINTING ANGLE BRACKET 232A ; W # Pe RIGHT-POINTING ANGLE BRACKET 2E80..2E99 ; W # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP @@ -1783,8 +1781,6 @@ A016..A48C ; W # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR A490..A4C6 ; W # So [55] YI RADICAL QOT..YI RADICAL KE A960..A97C ; W # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH AC00..D7A3 ; W # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH -D7B0..D7C6 ; W # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E -D7CB..D7FB ; W # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH F900..FA6D ; W # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA6E..FA6F ; W # Cn [2] <reserved-FA6E>..<reserved-FA6F> FA70..FAD9 ; W # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 @@ -1848,7 +1844,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 2FA1E..2FFFD ; W # Cn [1504] <reserved-2FA1E>..<reserved-2FFFD> 30000..3FFFD ; W # Cn [65534] <reserved-30000>..<reserved-3FFFD> -# Total code points: 173217 +# Total code points: 173134 # ================================================ diff --git a/gnu/usr.bin/perl/lib/unicore/extracted/DGeneralCategory.txt b/gnu/usr.bin/perl/lib/unicore/extracted/DGeneralCategory.txt index 12a346f7537..546a6771372 100644 --- a/gnu/usr.bin/perl/lib/unicore/extracted/DGeneralCategory.txt +++ b/gnu/usr.bin/perl/lib/unicore/extracted/DGeneralCategory.txt @@ -1,8 +1,8 @@ -# DerivedGeneralCategory-6.1.0.txt -# Date: 2011-11-27, 05:10:22 GMT [MD] +# DerivedGeneralCategory-6.2.0.txt +# Date: 2012-05-20, 00:42:34 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -275,7 +275,7 @@ 2072..2073 ; Cn # [2] <reserved-2072>..<reserved-2073> 208F ; Cn # <reserved-208F> 209D..209F ; Cn # [3] <reserved-209D>..<reserved-209F> -20BA..20CF ; Cn # [22] <reserved-20BA>..<reserved-20CF> +20BB..20CF ; Cn # [21] <reserved-20BB>..<reserved-20CF> 20F1..20FF ; Cn # [15] <reserved-20F1>..<reserved-20FF> 218A..218F ; Cn # [6] <reserved-218A>..<reserved-218F> 23F4..23FF ; Cn # [12] <reserved-23F4>..<reserved-23FF> @@ -554,7 +554,7 @@ E01F0..EFFFF ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF> FFFFE..FFFFF ; Cn # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 10FFFE..10FFFF; Cn # [2] <noncharacter-10FFFE>..<noncharacter-10FFFF> -# Total code points: 864415 +# Total code points: 864414 # ================================================ @@ -3230,7 +3230,7 @@ FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW 0BF9 ; Sc # TAMIL RUPEE SIGN 0E3F ; Sc # THAI CURRENCY SYMBOL BAHT 17DB ; Sc # KHMER CURRENCY SYMBOL RIEL -20A0..20B9 ; Sc # [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN +20A0..20BA ; Sc # [27] EURO-CURRENCY SIGN..TURKISH LIRA SIGN A838 ; Sc # NORTH INDIC RUPEE MARK FDFC ; Sc # RIAL SIGN FE69 ; Sc # SMALL DOLLAR SIGN @@ -3238,7 +3238,7 @@ FF04 ; Sc # FULLWIDTH DOLLAR SIGN FFE0..FFE1 ; Sc # [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN FFE5..FFE6 ; Sc # [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN -# Total code points: 48 +# Total code points: 49 # ================================================ diff --git a/gnu/usr.bin/perl/lib/unicore/extracted/DJoinGroup.txt b/gnu/usr.bin/perl/lib/unicore/extracted/DJoinGroup.txt index bf3f10c8eba..c1efad76c16 100644 --- a/gnu/usr.bin/perl/lib/unicore/extracted/DJoinGroup.txt +++ b/gnu/usr.bin/perl/lib/unicore/extracted/DJoinGroup.txt @@ -1,8 +1,8 @@ -# DerivedJoiningGroup-6.1.0.txt -# Date: 2011-07-25, 00:54:14 GMT [MD] +# DerivedJoiningGroup-6.2.0.txt +# Date: 2012-05-23, 20:34:47 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ diff --git a/gnu/usr.bin/perl/lib/unicore/extracted/DJoinType.txt b/gnu/usr.bin/perl/lib/unicore/extracted/DJoinType.txt index f9d7c7af9ce..f8cfd1d41db 100644 --- a/gnu/usr.bin/perl/lib/unicore/extracted/DJoinType.txt +++ b/gnu/usr.bin/perl/lib/unicore/extracted/DJoinType.txt @@ -1,8 +1,8 @@ -# DerivedJoiningType-6.1.0.txt -# Date: 2011-11-27, 05:10:23 GMT [MD] +# DerivedJoiningType-6.2.0.txt +# Date: 2012-05-23, 20:34:48 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ diff --git a/gnu/usr.bin/perl/lib/unicore/extracted/DLineBreak.txt b/gnu/usr.bin/perl/lib/unicore/extracted/DLineBreak.txt index c2bae071d5c..fb1fe48f44a 100644 --- a/gnu/usr.bin/perl/lib/unicore/extracted/DLineBreak.txt +++ b/gnu/usr.bin/perl/lib/unicore/extracted/DLineBreak.txt @@ -1,8 +1,8 @@ -# DerivedLineBreak-6.1.0.txt -# Date: 2011-11-27, 05:10:24 GMT [MD] +# DerivedLineBreak-6.2.0.txt +# Date: 2012-08-13, 19:20:17 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -23,8 +23,8 @@ E000..F8FF ; XX # Co [6400] <private-use-E000>..<private-use-F8FF> F0000..FFFFD ; XX # Co [65534] <private-use-F0000>..<private-use-FFFFD> 100000..10FFFD; XX # Co [65534] <private-use-100000>..<private-use-10FFFD> -# The above property value applies to 780870 code points not listed here. -# Total code points: 918338 +# The above property value applies to 780869 code points not listed here. +# Total code points: 918337 # ================================================ @@ -347,7 +347,7 @@ FE13..FE14 ; IS # Po [2] PRESENTATION FORM FOR VERTICAL COLON..PRESENTATION 17DB ; PR # Sc KHMER CURRENCY SYMBOL RIEL 20A0..20A6 ; PR # Sc [7] EURO-CURRENCY SIGN..NAIRA SIGN 20A8..20B5 ; PR # Sc [14] RUPEE SIGN..CEDI SIGN -20B7..20B9 ; PR # Sc [3] SPESMILO SIGN..INDIAN RUPEE SIGN +20B7..20BA ; PR # Sc [4] SPESMILO SIGN..TURKISH LIRA SIGN 2116 ; PR # So NUMERO SIGN 2212..2213 ; PR # Sm [2] MINUS SIGN..MINUS-OR-PLUS SIGN FE69 ; PR # Sc SMALL DOLLAR SIGN @@ -355,7 +355,7 @@ FF04 ; PR # Sc FULLWIDTH DOLLAR SIGN FFE1 ; PR # Sc FULLWIDTH POUND SIGN FFE5..FFE6 ; PR # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN -# Total code points: 45 +# Total code points: 46 # ================================================ @@ -873,7 +873,8 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 2300..2307 ; AL # So [8] DIAMETER SIGN..WAVY LINE 2308..230B ; AL # Sm [4] LEFT CEILING..RIGHT FLOOR 230C..2311 ; AL # So [6] BOTTOM RIGHT CROP..SQUARE LOZENGE -2313..231F ; AL # So [13] SEGMENT..BOTTOM RIGHT CORNER +2313..2319 ; AL # So [7] SEGMENT..TURNED NOT SIGN +231C..231F ; AL # So [4] TOP LEFT CORNER..BOTTOM RIGHT CORNER 2320..2321 ; AL # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL 2322..2328 ; AL # So [7] FROWN..KEYBOARD 232B..237B ; AL # So [81] ERASE TO THE LEFT..NOT CHECK MARK @@ -882,7 +883,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 239B..23B3 ; AL # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM 23B4..23DB ; AL # So [40] TOP SQUARE BRACKET..FUSE 23DC..23E1 ; AL # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET -23E2..23F3 ; AL # So [18] WHITE TRAPEZIUM..HOURGLASS WITH FLOWING SAND +23E2..23EF ; AL # So [14] WHITE TRAPEZIUM..BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR 2400..2426 ; AL # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO 2440..244A ; AL # So [11] OCR HOOK..OCR DOUBLE BACKSLASH 24FF ; AL # No NEGATIVE CIRCLED DIGIT ZERO @@ -902,26 +903,27 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 25E6..25EE ; AL # So [9] WHITE BULLET..UP-POINTING TRIANGLE WITH RIGHT HALF BLACK 25F0..25F7 ; AL # So [8] WHITE SQUARE WITH UPPER LEFT QUADRANT..WHITE CIRCLE WITH UPPER RIGHT QUADRANT 25F8..25FF ; AL # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE -2600..2604 ; AL # So [5] BLACK SUN WITH RAYS..COMET +2604 ; AL # So COMET 2607..2608 ; AL # So [2] LIGHTNING..THUNDERSTORM 260A..260D ; AL # So [4] ASCENDING NODE..OPPOSITION 2610..2613 ; AL # So [4] BALLOT BOX..SALTIRE -2618..261B ; AL # So [4] SHAMROCK..BLACK RIGHT POINTING INDEX -261D ; AL # So WHITE UP POINTING INDEX -261F..263F ; AL # So [33] WHITE DOWN POINTING INDEX..MERCURY +2619 ; AL # So REVERSED ROTATED FLORAL HEART BULLET +2620..2638 ; AL # So [25] SKULL AND CROSSBONES..WHEEL OF DHARMA +263C..263F ; AL # So [4] WHITE SUN WITH RAYS..MERCURY 2641 ; AL # So EARTH 2643..265F ; AL # So [29] JUPITER..BLACK CHESS PAWN 2662 ; AL # So WHITE DIAMOND SUIT 2666 ; AL # So BLACK DIAMOND SUIT 266B ; AL # So BEAMED EIGHTH NOTES 266E ; AL # So MUSIC NATURAL SIGN -2670..269D ; AL # So [46] WEST SYRIAC CROSS..OUTLINED WHITE STAR -26A0..26BD ; AL # So [30] WARNING SIGN..SOCCER BALL -26C0..26C3 ; AL # So [4] WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING +2670..267E ; AL # So [15] WEST SYRIAC CROSS..PERMANENT PAPER SIGN +2680..269D ; AL # So [30] DIE FACE-1..OUTLINED WHITE STAR +26A0..26BC ; AL # So [29] WARNING SIGN..SESQUIQUADRATE 26CE ; AL # So OPHIUCHUS 26E2 ; AL # So ASTRONOMICAL SYMBOL FOR URANUS 26E4..26E7 ; AL # So [4] PENTAGRAM..INVERTED PENTAGRAM -2701..2756 ; AL # So [86] UPPER BLADE SCISSORS..BLACK DIAMOND MINUS WHITE X +2705..2707 ; AL # So [3] WHITE HEAVY CHECK MARK..TAPE DRIVE +270E..2756 ; AL # So [73] LOWER RIGHT PENCIL..BLACK DIAMOND MINUS WHITE X 2758..275A ; AL # So [3] LIGHT VERTICAL BAR..HEAVY VERTICAL BAR 275F..2761 ; AL # So [3] HEAVY LOW SINGLE COMMA QUOTATION MARK ORNAMENT..CURVED STEM PARAGRAPH SIGN ORNAMENT 2764..2767 ; AL # So [4] HEAVY BLACK HEART..ROTATED FLORAL HEART BULLET @@ -1220,40 +1222,49 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; AL # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -1F000..1F02B ; AL # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK -1F030..1F093 ; AL # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 -1F0A0..1F0AE ; AL # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES -1F0B1..1F0BE ; AL # So [14] PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS -1F0C1..1F0CF ; AL # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER -1F0D1..1F0DF ; AL # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F12E ; AL # So CIRCLED WZ 1F16A..1F16B ; AL # So [2] RAISED MC SIGN..RAISED MD SIGN -1F1E6..1F1FF ; AL # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z -1F300..1F320 ; AL # So [33] CYCLONE..SHOOTING STAR -1F330..1F335 ; AL # So [6] CHESTNUT..CACTUS -1F337..1F37C ; AL # So [70] TULIP..BABY BOTTLE -1F380..1F393 ; AL # So [20] RIBBON..GRADUATION CAP -1F3A0..1F3C4 ; AL # So [37] CAROUSEL HORSE..SURFER -1F3C6..1F3CA ; AL # So [5] TROPHY..SWIMMER -1F3E0..1F3F0 ; AL # So [17] HOUSE BUILDING..EUROPEAN CASTLE -1F400..1F43E ; AL # So [63] RAT..PAW PRINTS -1F440 ; AL # So EYES -1F442..1F4F7 ; AL # So [182] EAR..CAMERA -1F4F9..1F4FC ; AL # So [4] VIDEO CAMERA..VIDEOCASSETTE -1F500..1F53D ; AL # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F3B5..1F3B6 ; AL # So [2] MUSICAL NOTE..MULTIPLE MUSICAL NOTES +1F3BC ; AL # So MUSICAL SCORE +1F4A0 ; AL # So DIAMOND SHAPE WITH A DOT INSIDE +1F4A2 ; AL # So ANGER SYMBOL +1F4A4 ; AL # So SLEEPING SYMBOL +1F4AF ; AL # So HUNDRED POINTS SYMBOL +1F4B1..1F4B2 ; AL # So [2] CURRENCY EXCHANGE..HEAVY DOLLAR SIGN +1F500..1F506 ; AL # So [7] TWISTED RIGHTWARDS ARROWS..HIGH BRIGHTNESS SYMBOL +1F517..1F524 ; AL # So [14] LINK SYMBOL..INPUT SYMBOL FOR LATIN LETTERS +1F532..1F53D ; AL # So [12] BLACK SQUARE BUTTON..DOWN-POINTING SMALL RED TRIANGLE 1F540..1F543 ; AL # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS -1F550..1F567 ; AL # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F640 ; AL # So [70] MOUNT FUJI..WEARY CAT FACE -1F645..1F64F ; AL # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS -1F680..1F6C5 ; AL # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; AL # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE -# Total code points: 16251 +# Total code points: 15355 # ================================================ # Line_Break=Ideographic +231A..231B ; ID # So [2] WATCH..HOURGLASS +23F0..23F3 ; ID # So [4] ALARM CLOCK..HOURGLASS WITH FLOWING SAND +2600..2603 ; ID # So [4] BLACK SUN WITH RAYS..SNOWMAN +2614..2615 ; ID # So [2] UMBRELLA WITH RAIN DROPS..HOT BEVERAGE +2618 ; ID # So SHAMROCK +261A..261F ; ID # So [6] BLACK LEFT POINTING INDEX..WHITE DOWN POINTING INDEX +2639..263B ; ID # So [3] WHITE FROWNING FACE..BLACK SMILING FACE +2668 ; ID # So HOT SPRINGS +267F ; ID # So WHEELCHAIR SYMBOL +26BD..26C8 ; ID # So [12] SOCCER BALL..THUNDER CLOUD AND RAIN +26CD ; ID # So DISABLED CAR +26CF..26D1 ; ID # So [3] PICK..HELMET WITH WHITE CROSS +26D3..26D4 ; ID # So [2] CHAINS..NO ENTRY +26D8..26D9 ; ID # So [2] BLACK LEFT LANE MERGE..WHITE LEFT LANE MERGE +26DC ; ID # So LEFT CLOSED ENTRY +26DF..26E1 ; ID # So [3] BLACK TRUCK..RESTRICTED LEFT ENTRY-2 +26EA ; ID # So CHURCH +26F1..26F5 ; ID # So [5] UMBRELLA ON GROUND..SAILBOAT +26F7..26FA ; ID # So [4] SKIER..TENT +26FD..26FF ; ID # So [3] FUEL PUMP..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE +2701..2704 ; ID # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS +2708..270D ; ID # So [6] AIRPLANE..WRITING HAND 2E80..2E99 ; ID # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP 2E9B..2EF3 ; ID # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 2F00..2FD5 ; ID # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE @@ -1359,10 +1370,40 @@ FFE2 ; ID # Sm FULLWIDTH NOT SIGN FFE3 ; ID # Sk FULLWIDTH MACRON FFE4 ; ID # So FULLWIDTH BROKEN BAR 1B000..1B001 ; ID # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE +1F000..1F02B ; ID # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK +1F030..1F093 ; ID # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F0A0..1F0AE ; ID # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES +1F0B1..1F0BE ; ID # So [14] PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS +1F0C1..1F0CF ; ID # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER +1F0D1..1F0DF ; ID # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F200..1F202 ; ID # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA 1F210..1F23A ; ID # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 1F240..1F248 ; ID # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 1F250..1F251 ; ID # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT +1F300..1F320 ; ID # So [33] CYCLONE..SHOOTING STAR +1F330..1F335 ; ID # So [6] CHESTNUT..CACTUS +1F337..1F37C ; ID # So [70] TULIP..BABY BOTTLE +1F380..1F393 ; ID # So [20] RIBBON..GRADUATION CAP +1F3A0..1F3B4 ; ID # So [21] CAROUSEL HORSE..FLOWER PLAYING CARDS +1F3B7..1F3BB ; ID # So [5] SAXOPHONE..VIOLIN +1F3BD..1F3C4 ; ID # So [8] RUNNING SHIRT WITH SASH..SURFER +1F3C6..1F3CA ; ID # So [5] TROPHY..SWIMMER +1F3E0..1F3F0 ; ID # So [17] HOUSE BUILDING..EUROPEAN CASTLE +1F400..1F43E ; ID # So [63] RAT..PAW PRINTS +1F440 ; ID # So EYES +1F442..1F49F ; ID # So [94] EAR..HEART DECORATION +1F4A1 ; ID # So ELECTRIC LIGHT BULB +1F4A3 ; ID # So BOMB +1F4A5..1F4AE ; ID # So [10] COLLISION SYMBOL..WHITE FLOWER +1F4B0 ; ID # So MONEY BAG +1F4B3..1F4F7 ; ID # So [69] CREDIT CARD..CAMERA +1F4F9..1F4FC ; ID # So [4] VIDEO CAMERA..VIDEOCASSETTE +1F507..1F516 ; ID # So [16] SPEAKER WITH CANCELLATION STROKE..BOOKMARK +1F525..1F531 ; ID # So [13] FIRE..TRIDENT EMBLEM +1F550..1F567 ; ID # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY +1F5FB..1F640 ; ID # So [70] MOUNT FUJI..WEARY CAT FACE +1F645..1F64F ; ID # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS +1F680..1F6C5 ; ID # So [70] ROCKET..LEFT LUGGAGE 20000..2A6D6 ; ID # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A6D7..2A6FF ; ID # Cn [41] <reserved-2A6D7>..<reserved-2A6FF> 2A700..2B734 ; ID # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 @@ -1373,7 +1414,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 2FA1E..2FFFD ; ID # Cn [1504] <reserved-2FA1E>..<reserved-2FFFD> 30000..3FFFD ; ID # Cn [65534] <reserved-30000>..<reserved-3FFFD> -# Total code points: 161793 +# Total code points: 162700 # ================================================ @@ -2057,22 +2098,26 @@ AADE..AADF ; SA # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI 2605..2606 ; AI # So [2] BLACK STAR..WHITE STAR 2609 ; AI # So SUN 260E..260F ; AI # So [2] BLACK TELEPHONE..WHITE TELEPHONE -2614..2617 ; AI # So [4] UMBRELLA WITH RAIN DROPS..BLACK SHOGI PIECE -261C ; AI # So WHITE LEFT POINTING INDEX -261E ; AI # So WHITE RIGHT POINTING INDEX +2616..2617 ; AI # So [2] WHITE SHOGI PIECE..BLACK SHOGI PIECE 2640 ; AI # So FEMALE SIGN 2642 ; AI # So MALE SIGN 2660..2661 ; AI # So [2] BLACK SPADE SUIT..WHITE HEART SUIT 2663..2665 ; AI # So [3] BLACK CLUB SUIT..BLACK HEART SUIT -2667..266A ; AI # So [4] WHITE CLUB SUIT..EIGHTH NOTE +2667 ; AI # So WHITE CLUB SUIT +2669..266A ; AI # So [2] QUARTER NOTE..EIGHTH NOTE 266C..266D ; AI # So [2] BEAMED SIXTEENTH NOTES..MUSIC FLAT SIGN 266F ; AI # Sm MUSIC SHARP SIGN 269E..269F ; AI # So [2] THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT -26BE..26BF ; AI # So [2] BASEBALL..SQUARED KEY -26C4..26CD ; AI # So [10] SNOWMAN WITHOUT SNOW..DISABLED CAR -26CF..26E1 ; AI # So [19] PICK..RESTRICTED LEFT ENTRY-2 +26C9..26CC ; AI # So [4] TURNED WHITE SHOGI PIECE..CROSSING LANES +26D2 ; AI # So CIRCLED CROSSING LANES +26D5..26D7 ; AI # So [3] ALTERNATE ONE-WAY LEFT WAY TRAFFIC..WHITE TWO-WAY LEFT WAY TRAFFIC +26DA..26DB ; AI # So [2] DRIVE SLOW SIGN..HEAVY WHITE DOWN-POINTING TRIANGLE +26DD..26DE ; AI # So [2] SQUARED SALTIRE..FALLING DIAGONAL IN WHITE CIRCLE IN BLACK SQUARE 26E3 ; AI # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE -26E8..26FF ; AI # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE +26E8..26E9 ; AI # So [2] BLACK CROSS ON SHIELD..SHINTO SHRINE +26EB..26F0 ; AI # So [6] CASTLE..MOUNTAIN +26F6 ; AI # So SQUARE FOUR CORNERS +26FB..26FC ; AI # So [2] JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL 2757 ; AI # So HEAVY EXCLAMATION MARK SYMBOL 2776..2793 ; AI # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN 2B55..2B59 ; AI # So [5] HEAVY LARGE CIRCLE..HEAVY CIRCLED SALTIRE @@ -2083,7 +2128,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1F130..1F169 ; AI # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F19A ; AI # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS -# Total code points: 724 +# Total code points: 687 # ================================================ @@ -3025,4 +3070,12 @@ FF70 ; CJ # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK # Total code points: 51 +# ================================================ + +# Line_Break=Regional_Indicator + +1F1E6..1F1FF ; RI # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z + +# Total code points: 26 + # EOF diff --git a/gnu/usr.bin/perl/lib/unicore/extracted/DNumType.txt b/gnu/usr.bin/perl/lib/unicore/extracted/DNumType.txt index 92866603e78..23d2186a992 100644 --- a/gnu/usr.bin/perl/lib/unicore/extracted/DNumType.txt +++ b/gnu/usr.bin/perl/lib/unicore/extracted/DNumType.txt @@ -1,8 +1,8 @@ -# DerivedNumericType-6.1.0.txt -# Date: 2011-08-23, 00:47:14 GMT [MD] +# DerivedNumericType-6.2.0.txt +# Date: 2012-08-13, 19:20:20 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -131,9 +131,7 @@ F9FD ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 10B78..10B7F ; Numeric # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND 10E69..10E7E ; Numeric # No [22] RUMI NUMBER TEN..RUMI FRACTION TWO THIRDS 1105B..11065 ; Numeric # No [11] BRAHMI NUMBER TEN..BRAHMI NUMBER ONE THOUSAND -12400..12431 ; Numeric # Nl [50] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN FIVE SHARU -12434..12455 ; Numeric # Nl [34] CUNEIFORM NUMERIC SIGN ONE BURU..CUNEIFORM NUMERIC SIGN FIVE BAN2 VARIANT FORM -12458..12462 ; Numeric # Nl [11] CUNEIFORM NUMERIC SIGN ONE ESHE3..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER +12400..12462 ; Numeric # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 1D360..1D371 ; Numeric # No [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE 20001 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20001 20064 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20064 @@ -152,7 +150,7 @@ F9FD ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 2626D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2626D 2F890 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 637 +# Total code points: 641 # ================================================ diff --git a/gnu/usr.bin/perl/lib/unicore/extracted/DNumValues.txt b/gnu/usr.bin/perl/lib/unicore/extracted/DNumValues.txt index 02d408eb4b6..517bede77b9 100644 --- a/gnu/usr.bin/perl/lib/unicore/extracted/DNumValues.txt +++ b/gnu/usr.bin/perl/lib/unicore/extracted/DNumValues.txt @@ -1,33 +1,41 @@ -# DerivedNumericValues-6.1.0.txt -# Date: 2011-08-19, 17:58:36 GMT [MD] +# DerivedNumericValues-6.2.0.txt +# Date: 2012-08-13, 19:20:22 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # ================================================ # Derived Property: Numeric_Value -# The values are based on field 8 of UnicodeData.txt, plus the fields -# kAccountingNumeric, kOtherNumeric, kPrimaryNumeric in the Unicode Han Database (Unihan). -# The derivations for these values are as follows. -# Numeric_Value = the value of kAccountingNumeric, kOtherNumeric, or kPrimaryNumeric, if they exist; otherwise -# Numeric_Value = the value of field 8, if it exists; otherwise -# Numeric_Value = NaN +# Field 1: +# The values are based on field 8 of UnicodeData.txt, plus the fields +# kAccountingNumeric, kOtherNumeric, kPrimaryNumeric in the Unicode Han Database (Unihan). +# The derivations for these values are as follows. +# Numeric_Value = the value of kAccountingNumeric, kOtherNumeric, or kPrimaryNumeric, if they exist; otherwise +# Numeric_Value = the value of field 8, if it exists; otherwise +# Numeric_Value = NaN +# Field 2: +# This field is empty; it used to be a copy of the numeric type. # -# WARNING: Certain values, such as 0.16666667, are repeating fractions +# Field 3: +# This field was added to this extracted data as of Unicode 5.1.0, +# expressing the same numeric value either as a whole integer +# where possible, or as a rational fraction such as "1/6". +# +# WARNING: Certain values, such as 0.16666667, are repeating fractions. # Although they are only printed with a limited number of decimal places # in this file, they should be expressed to the limits of the precision # available when used. # -# The third field is empty; it used to be a copy of the numeric type. -# -# A fourth field was added to this extracted data as of -# Unicode 5.1.0, expressing the same numeric value either as -# a whole integer where possible or as a rational fraction, e.g. "1/6". -# -# @missing: 0000..10FFFF; ; NaN +# @missing: 0000..10FFFF; NaN; ; NaN + +# ================================================ + +12456..12457 ; -1.0 ; ; -1 # Nl [2] CUNEIFORM NUMERIC SIGN NIGIDAMIN..CUNEIFORM NUMERIC SIGN NIGIDAESH + +# Total code points: 2 # ================================================ @@ -1776,6 +1784,18 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD # ================================================ +12432 ; 216000.0 ; ; 216000 # Nl CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS DISH + +# Total code points: 1 + +# ================================================ + +12433 ; 432000.0 ; ; 432000 # Nl CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS MIN + +# Total code points: 1 + +# ================================================ + 4EBF ; 100000000.0 ; ; 100000000 # Lo CJK UNIFIED IDEOGRAPH-4EBF 5104 ; 100000000.0 ; ; 100000000 # Lo CJK UNIFIED IDEOGRAPH-5104 diff --git a/gnu/usr.bin/perl/lib/unicore/mktables b/gnu/usr.bin/perl/lib/unicore/mktables index 458d4ec26d3..808760d002c 100644 --- a/gnu/usr.bin/perl/lib/unicore/mktables +++ b/gnu/usr.bin/perl/lib/unicore/mktables @@ -40,7 +40,7 @@ my $debugging_build = $Config{"ccflags"} =~ /-DDEBUGGING/; # # mktables -- create the runtime Perl Unicode files (lib/unicore/.../*.pl), # from the Unicode database files (lib/unicore/.../*.txt), It also generates -# a pod file and a .t file +# a pod file and .t files, depending on option parameters. # # The structure of this file is: # First these introductory comments; then @@ -52,10 +52,10 @@ my $debugging_build = $Config{"ccflags"} =~ /-DDEBUGGING/; # the small actual loop to process the input files and finish up; then # a __DATA__ section, for the .t tests # -# This program works on all releases of Unicode through at least 6.0. The -# outputs have been scrutinized most intently for release 5.1. The others -# have been checked for somewhat more than just sanity. It can handle all -# existing Unicode character properties in those releases. +# This program works on all releases of Unicode so far. The outputs have been +# scrutinized most intently for release 5.1. The others have been checked for +# somewhat more than just sanity. It can handle all non-provisional Unicode +# character properties in those releases. # # This program is mostly about Unicode character (or code point) properties. # A property describes some attribute or quality of a code point, like if it @@ -65,8 +65,8 @@ my $debugging_build = $Config{"ccflags"} =~ /-DDEBUGGING/; # into some corresponding value. In the case of it being lowercase or not, # the mapping is either to 'Y' or 'N' (or various synonyms thereof). Each # property maps each Unicode code point to a single value, called a "property -# value". (Hence each Unicode property is a true mathematical function with -# exactly one value per code point.) +# value". (Some more recently defined properties, map a code point to a set +# of values.) # # When using a property in a regular expression, what is desired isn't the # mapping of the code point to its property's value, but the reverse (or the @@ -119,7 +119,7 @@ my $map_directory = 'To'; # Where map files go. # are for mappings that don't fit into the normal scheme of things. Mappings # that require a hash entry to communicate with utf8.c are one example; # another example is mappings for charnames.pm to use which indicate a name -# that is algorithmically determinable from its code point (and vice-versa). +# that is algorithmically determinable from its code point (and the reverse). # These are used to significantly compact these tables, instead of listing # each one of the tens of thousands individually. # @@ -131,8 +131,8 @@ my $map_directory = 'To'; # Where map files go. # # Actually, there are two types of range lists, "Range_Map" is the one # associated with map tables, and "Range_List" with match tables. -# Again, this is so that methods can be defined on one and not the other so as -# to prevent operating on them in incorrect ways. +# Again, this is so that methods can be defined on one and not the others so +# as to prevent operating on them in incorrect ways. # # Eventually, most tables are written out to files to be read by utf8_heavy.pl # in the perl core. All tables could in theory be written, but some are @@ -154,24 +154,29 @@ my $map_directory = 'To'; # Where map files go. # takes every code point and maps it to Y or N (but having ranges cuts the # number of entries in that table way down), and two match tables, one # which has a list of all the code points that map to Y, and one for all the -# code points that map to N. (For each of these, a third table is also +# code points that map to N. (For each binary property, a third table is also # generated for the pseudo Perl property. It contains the identical code -# points as the Y table, but can be written, not in the compound form, but in -# a "single" form like \p{IsUppercase}.) Many properties are binary, but some -# properties have several possible values, some have many, and properties like -# Name have a different value for every named code point. Those will not, -# unless the controlling lists are changed, have their match tables written -# out. But all the ones which can be used in regular expression \p{} and \P{} -# constructs will. Prior to 5.14, generally a property would have either its -# map table or its match tables written but not both. Again, what gets -# written is controlled by lists which can easily be changed. Starting in -# 5.14, advantage was taken of this, and all the map tables needed to -# reconstruct the Unicode db are now written out, while suppressing the -# Unicode .txt files that contain the data. Our tables are much more compact -# than the .txt files, so a significant space savings was achieved. - -# Properties have a 'Type', like binary, or string, or enum depending on how -# many match tables there are and the content of the maps. This 'Type' is +# points as the Y table, but can be written in regular expressions, not in the +# compound form, but in a "single" form like \p{IsUppercase}.) Many +# properties are binary, but some properties have several possible values, +# some have many, and properties like Name have a different value for every +# named code point. Those will not, unless the controlling lists are changed, +# have their match tables written out. But all the ones which can be used in +# regular expression \p{} and \P{} constructs will. Prior to 5.14, generally +# a property would have either its map table or its match tables written but +# not both. Again, what gets written is controlled by lists which can easily +# be changed. Starting in 5.14, advantage was taken of this, and all the map +# tables needed to reconstruct the Unicode db are now written out, while +# suppressing the Unicode .txt files that contain the data. Our tables are +# much more compact than the .txt files, so a significant space savings was +# achieved. Also, tables are not written out that are trivially derivable +# from tables that do get written. So, there typically is no file containing +# the code points not matched by a binary property (the table for \P{} versus +# lowercase \p{}), since you just need to invert the True table to get the +# False table. + +# Properties have a 'Type', like 'binary', or 'string', or 'enum' depending on +# how many match tables there are and the content of the maps. This 'Type' is # different than a range 'Type', so don't get confused by the two concepts # having the same name. # @@ -181,21 +186,22 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # As stated earlier, this program will work on any release of Unicode so far. # Most obvious problems in earlier data have NOT been corrected except when -# necessary to make Perl or this program work reasonably. For example, no -# folding information was given in early releases, so this program substitutes -# lower case instead, just so that a regular expression with the /i option -# will do something that actually gives the right results in many cases. -# There are also a couple other corrections for version 1.1.5, commented at -# the point they are made. As an example of corrections that weren't made -# (but could be) is this statement from DerivedAge.txt: "The supplementary -# private use code points and the non-character code points were assigned in -# version 2.0, but not specifically listed in the UCD until versions 3.0 and -# 3.1 respectively." (To be precise it was 3.0.1 not 3.0.0) More information -# on Unicode version glitches is further down in these introductory comments. +# necessary to make Perl or this program work reasonably, and to keep out +# potential security issues. For example, no folding information was given in +# early releases, so this program substitutes lower case instead, just so that +# a regular expression with the /i option will do something that actually +# gives the right results in many cases. There are also a couple other +# corrections for version 1.1.5, commented at the point they are made. As an +# example of corrections that weren't made (but could be) is this statement +# from DerivedAge.txt: "The supplementary private use code points and the +# non-character code points were assigned in version 2.0, but not specifically +# listed in the UCD until versions 3.0 and 3.1 respectively." (To be precise +# it was 3.0.1 not 3.0.0) More information on Unicode version glitches is +# further down in these introductory comments. # -# This program works on all non-provisional properties as of 6.0, though the -# files for some are suppressed from apparent lack of demand for them. You -# can change which are output by changing lists in this program. +# This program works on all non-provisional properties as of the current +# Unicode release, though the files for some are suppressed for various +# reasons. You can change which are output by changing lists in this program. # # The old version of mktables emphasized the term "Fuzzy" to mean Unicode's # loose matchings rules (from Unicode TR18): @@ -207,6 +213,7 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # recognized, and that loose matching of property names be used, # whereby the case distinctions, whitespace, hyphens, and underbar # are ignored. +# # The program still allows Fuzzy to override its determination of if loose # matching should be used, but it isn't currently used, as it is no longer # needed; the calculations it makes are good enough. @@ -227,12 +234,13 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # values. That is, they list code points and say what the mapping # is under the given property. Some files give the mappings for # just one property; and some for many. This program goes through -# each file and populates the properties from them. Some properties -# are listed in more than one file, and Unicode has set up a -# precedence as to which has priority if there is a conflict. Thus -# the order of processing matters, and this program handles the -# conflict possibility by processing the overriding input files -# last, so that if necessary they replace earlier values. +# each file and populates the properties and their map tables from +# them. Some properties are listed in more than one file, and +# Unicode has set up a precedence as to which has priority if there +# is a conflict. Thus the order of processing matters, and this +# program handles the conflict possibility by processing the +# overriding input files last, so that if necessary they replace +# earlier values. # After this is all done, the program creates the property mappings not # furnished by Unicode, but derivable from what it does give. # The tables of code points that match each property value in each @@ -315,6 +323,11 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # can't just take the intersection of two map tables, for example, as that # is nonsensical. # +# What about 'fate' and 'status'. The concept of a table's fate was created +# late when it became clear that something more was needed. The difference +# between this and 'status' is unclean, and could be improved if someone +# wanted to spend the effort. +# # DEBUGGING # # This program is written so it will run under miniperl. Occasionally changes @@ -327,12 +340,12 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # # local $to_trace = 1 if main::DEBUG; # -# can be added to enable tracing in its lexical scope or until you insert -# another line: +# can be added to enable tracing in its lexical scope (plus dynamic) or until +# you insert another line: # # local $to_trace = 0 if main::DEBUG; # -# then use a line like "trace $a, @b, %c, ...; +# To actually trace, use a line like "trace $a, @b, %c, ...; # # Some of the more complex subroutines already have trace statements in them. # Permanent trace statements should be like: @@ -345,7 +358,8 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # my $debug_skip = 0; # # to 1, and every file whose object is in @input_file_objects and doesn't have -# a, 'non_skip => 1,' in its constructor will be skipped. +# a, 'non_skip => 1,' in its constructor will be skipped. However, skipping +# Jamo.txt or UnicodeData.txt will likely cause fatal errors. # # To compare the output tables, it may be useful to specify the -annotate # flag. This causes the tables to expand so there is one entry for each @@ -430,7 +444,7 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # ones. The program should warn you if its name will clash with others on # restrictive file systems, like DOS. If so, figure out a better name, and # add lines to the README.perl file giving that. If the file is a character -# property, it should be in the format that Unicode has by default +# property, it should be in the format that Unicode has implicitly # standardized for such files for the more recently introduced ones. # If so, the Input_file constructor for @input_file_objects can just be the # file name and release it first appeared in. If not, then it should be @@ -463,9 +477,24 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # # Here are some observations about some of the issues in early versions: # -# The number of code points in \p{alpha} halved in 2.1.9. It turns out that -# the reason is that the CJK block starting at 4E00 was removed from PropList, -# and was not put back in until 3.1.0 +# Prior to version 3.0, there were 3 character decompositions. These are not +# handled by Unicode::Normalize, nor will it compile when presented a version +# that has them. However, you can trivially get it to compile by simply +# ignoring those decompositions, by changing the croak to a carp. At the time +# of this writing, the line (in cpan/Unicode-Normalize/mkheader) reads +# +# croak("Weird Canonical Decomposition of U+$h"); +# +# Simply change to a carp. It will compile, but will not know about any three +# character decomposition. + +# The number of code points in \p{alpha=True} halved in 2.1.9. It turns out +# that the reason is that the CJK block starting at 4E00 was removed from +# PropList, and was not put back in until 3.1.0. The Perl extension (the +# single property name \p{alpha}) has the correct values. But the compound +# form is simply not generated until 3.1, as it can be argued that prior to +# this release, this was not an official property. The comments for +# filter_old_style_proplist() give more details. # # Unicode introduced the synonym Space for White_Space in 4.1. Perl has # always had a \p{Space}. In release 3.2 only, they are not synonymous. The @@ -474,11 +503,11 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # reclassified it correctly. # # Another change between 3.2 and 4.0 is the CCC property value ATBL. In 3.2 -# this was erroneously a synonym for 202. In 4.0, ATB became 202, and ATBL -# was left with no code points, as all the ones that mapped to 202 stayed -# mapped to 202. Thus if your program used the numeric name for the class, -# it would not have been affected, but if it used the mnemonic, it would have -# been. +# this was erroneously a synonym for 202 (it should be 200). In 4.0, ATB +# became 202, and ATBL was left with no code points, as all the ones that +# mapped to 202 stayed mapped to 202. Thus if your program used the numeric +# name for the class, it would not have been affected, but if it used the +# mnemonic, it would have been. # # \p{Script=Hrkt} (Katakana_Or_Hiragana) came in 4.0.1. Before that code # points which eventually came to have this script property value, instead @@ -490,6 +519,12 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # tries to do the best it can for earlier releases. It is done in # process_PropertyAliases() # +# In version 2.1.2, the entry in UnicodeData.txt: +# 0275;LATIN SMALL LETTER BARRED O;Ll;0;L;;;;;N;;;;019F; +# should instead be +# 0275;LATIN SMALL LETTER BARRED O;Ll;0;L;;;;;N;;;019F;;019F +# Without this change, there are casing problems for this character. +# ############################################################################## my $UNDEF = ':UNDEF:'; # String to print out for undefined values in tracing @@ -506,6 +541,10 @@ my $MAX_LINE_WIDTH = 78; # before normal completion. my $debug_skip = 0; + +# Normally these are suppressed. +my $write_Unicode_deprecated_tables = 0; + # Set to 1 to enable tracing. our $to_trace = 0; @@ -608,6 +647,7 @@ sub uniques { $0 = File::Spec->canonpath($0); my $make_test_script = 0; # ? Should we output a test script +my $make_norm_test_script = 0; # ? Should we output a normalization test script my $write_unchanged_files = 0; # ? Should we update the output files even if # we don't think they have changed my $use_directory = ""; # ? Should we chdir somewhere. @@ -668,6 +708,10 @@ while (@ARGV) { { $make_test_script = 1; } + elsif ($arg eq '-makenormtest') + { + $make_norm_test_script = 1; + } elsif ($arg eq '-makelist') { $make_list = 1; } @@ -770,6 +814,8 @@ push @tables_that_may_be_empty, 'Script_Extensions=Katakana_Or_Hiragana' if $v_version ge v6.0.0; push @tables_that_may_be_empty, 'Grapheme_Cluster_Break=Prepend' if $v_version ge v6.1.0; +push @tables_that_may_be_empty, 'Canonical_Combining_Class=CCC133' + if $v_version ge v6.2.0; # The lists below are hashes, so the key is the item in the list, and the # value is the reason why it is in the list. This makes generation of @@ -950,6 +996,13 @@ my %why_obsolete; # Documentation only } } +if ($write_Unicode_deprecated_tables) { + foreach my $property (keys %why_suppressed) { + delete $why_suppressed{$property} if $property =~ + / ^ Other | Grapheme /x; + } +} + if ($v_version ge 4.0.0) { $why_stabilized{'Hyphen'} = 'Use the Line_Break property instead; see www.unicode.org/reports/tr14'; if ($v_version ge 6.0.0) { @@ -968,7 +1021,7 @@ if ($v_version ge v4.1.0) { $why_suppressed{'Script=Katakana_Or_Hiragana'} = 'Obsolete. All code points previously matched by this have been moved to "Script=Common".'; } if ($v_version ge v6.0.0) { - $why_suppressed{'Script=Katakana_Or_Hiragana'} .= ' Consider instead using "Script_Extensions=Katakana" or "Script_Extensions=Hiragana (or both)"'; + $why_suppressed{'Script=Katakana_Or_Hiragana'} .= ' Consider instead using "Script_Extensions=Katakana" or "Script_Extensions=Hiragana" (or both)'; $why_suppressed{'Script_Extensions=Katakana_Or_Hiragana'} = 'All code points that would be matched by this are matched by either "Script_Extensions=Katakana" or "Script_Extensions=Hiragana"'; } @@ -1025,7 +1078,7 @@ END # The input files don't list every code point. Those not listed are to be # defaulted to some value. Below are hard-coded what those values are for # non-binary properties as of 5.1. Starting in 5.0, there are -# machine-parsable comment lines in the files the give the defaults; so this +# machine-parsable comment lines in the files that give the defaults; so this # list shouldn't have to be extended. The claim is that all missing entries # for binary properties will default to 'N'. Unicode tried to change that in # 5.2, but the beta period produced enough protest that they backed off. @@ -1075,7 +1128,17 @@ my %default_mapping = ( Word_Break => 'Other', ); -# Below are files that Unicode furnishes, but this program ignores, and why +# Below are files that Unicode furnishes, but this program ignores, and why. +# NormalizationCorrections.txt requires some more explanation. It documents +# the cumulative fixes to erroneous normalizations in earlier Unicode +# versions. Its main purpose is so that someone running on an earlier version +# can use this file to override what got published in that earlier release. +# It would be easy for mktables to read and handle this file. But all the +# corrections in it should already be in the other files for the release it +# is. To get it to actually mean something useful, someone would have to be +# using an earlier Unicode release, and copy it to the files for that release +# and recomplile. So far there has been no demand to do that, so this hasn't +# been implemented. my %ignored_files = ( 'CJKRadicals.txt' => 'Maps the kRSUnicode property values to corresponding code points', 'Index.txt' => 'Alphabetical index of Unicode characters', @@ -1086,6 +1149,8 @@ my %ignored_files = ( 'ReadMe.txt' => 'Documentation', 'StandardizedVariants.txt' => 'Certain glyph variations for character display are standardized. This lists the non-Unihan ones; the Unihan ones are also not used by Perl, and are in a separate Unicode data base L<http://www.unicode.org/ivd>', 'EmojiSources.txt' => 'Maps certain Unicode code points to their legacy Japanese cell-phone values', + 'USourceData.txt' => 'Documentation of status and cross reference of proposals for encoding by Unicode of Unihan characters', + 'USourceData.pdf' => 'Documentation of status and cross reference of proposals for encoding by Unicode of Unihan characters', 'auxiliary/WordBreakTest.html' => 'Documentation of validation tests', 'auxiliary/SentenceBreakTest.html' => 'Documentation of validation tests', 'auxiliary/GraphemeBreakTest.html' => 'Documentation of validation tests', @@ -1228,13 +1293,16 @@ my $ORDINARY = 0; # The normal fate. my $MAP_PROXIED = 1; # The map table for the property isn't written out, # but there is a file written that can be used to # reconstruct this table -my $SUPPRESSED = 3; # The file for this table is not written out. -my $INTERNAL_ONLY = 4; # The file for this table is written out, but it is +my $INTERNAL_ONLY = 2; # The file for this table is written out, but it is # for Perl's internal use only -my $PLACEHOLDER = 5; # A property that is defined as a placeholder in a - # Unicode version that doesn't have it, but we need it - # to be defined, if empty, to have things work. - # Implies no pod entry generated +my $SUPPRESSED = 3; # The file for this table is not written out, and as a + # result, we don't bother to do many computations on + # it. +my $PLACEHOLDER = 4; # Like $SUPPRESSED, but we go through all the + # computations anyway, as the values are needed for + # things to work. This happens when we have Perl + # extensions that depend on Unicode tables that + # wouldn't normally be in a given Unicode version. # The format of the values of the tables: my $EMPTY_FORMAT = ""; @@ -1345,6 +1413,9 @@ my %loose_names_ending_in_code_point; # Same as above, but has blanks, dashes # anonymous hash. my @code_points_ending_in_code_point; +# To hold Unicode's normalization test suite +my @normalization_tests; + # Boolean: does this Unicode version have the hangul syllables, and are we # writing out a table for them? my $has_hangul_syllables = 0; @@ -1471,20 +1542,16 @@ sub populate_char_info ($) { # point of the range. my $end; if (! $viacode[$i]) { - if ($gc-> table('Surrogate')->contains($i)) { - $viacode[$i] = 'Surrogate'; - $annotate_char_type[$i] = $SURROGATE_TYPE; - $printable[$i] = 0; - $end = $gc->table('Surrogate')->containing_range($i)->end; - } - elsif ($gc-> table('Private_use')->contains($i)) { + my $nonchar; + if ($gc-> table('Private_use')->contains($i)) { $viacode[$i] = 'Private Use'; $annotate_char_type[$i] = $PRIVATE_USE_TYPE; $printable[$i] = 0; $end = $gc->table('Private_Use')->containing_range($i)->end; } - elsif (Property::property_ref('Noncharacter_Code_Point')-> table('Y')-> - contains($i)) + elsif ((defined ($nonchar = + Property::property_ref('Noncharacter_Code_Point')) + && $nonchar->table('Y')->contains($i))) { $viacode[$i] = 'Noncharacter'; $annotate_char_type[$i] = $NONCHARACTER_TYPE; @@ -1493,24 +1560,40 @@ sub populate_char_info ($) { containing_range($i)->end; } elsif ($gc-> table('Control')->contains($i)) { - $viacode[$i] = 'Control'; + $viacode[$i] = property_ref('Name_Alias')->value_of($i) || 'Control'; $annotate_char_type[$i] = $CONTROL_TYPE; $printable[$i] = 0; - $end = 0x81 if $i == 0x80; # Hard-code this one known case } elsif ($gc-> table('Unassigned')->contains($i)) { - $viacode[$i] = 'Unassigned, block=' . $block-> value_of($i); $annotate_char_type[$i] = $UNASSIGNED_TYPE; $printable[$i] = 0; - - # Because we name the unassigned by the blocks they are in, it - # can't go past the end of that block, and it also can't go past - # the unassigned range it is in. The special table makes sure - # that the non-characters, which are unassigned, are separated - # out. - $end = min($block->containing_range($i)->end, - $unassigned_sans_noncharacters-> containing_range($i)-> - end); + if ($v_version lt v2.0.0) { # No blocks in earliest releases + $viacode[$i] = 'Unassigned'; + $end = $gc-> table('Unassigned')->containing_range($i)->end; + } + else { + $viacode[$i] = 'Unassigned, block=' . $block-> value_of($i); + + # Because we name the unassigned by the blocks they are in, it + # can't go past the end of that block, and it also can't go + # past the unassigned range it is in. The special table makes + # sure that the non-characters, which are unassigned, are + # separated out. + $end = min($block->containing_range($i)->end, + $unassigned_sans_noncharacters-> + containing_range($i)->end); + } + } + elsif ($v_version lt v2.0.0) { # No surrogates in earliest releases + $viacode[$i] = $gc->value_of($i); + $annotate_char_type[$i] = $UNKNOWN_TYPE; + $printable[$i] = 0; + } + elsif ($gc-> table('Surrogate')->contains($i)) { + $viacode[$i] = 'Surrogate'; + $annotate_char_type[$i] = $SURROGATE_TYPE; + $printable[$i] = 0; + $end = $gc->table('Surrogate')->containing_range($i)->end; } else { Carp::my_carp_bug("Can't figure out how to annotate " @@ -1959,10 +2042,10 @@ package Input_file; # basically be a while(next_line()) {...} loop. # # You can also set up handlers to -# 1) call before the first line is read for pre processing +# 1) call before the first line is read, for pre processing # 2) call to adjust each line of the input before the main handler gets them # 3) call upon EOF before the main handler exits its loop -# 4) call at the end for post processing +# 4) call at the end, for post processing # # $_ is used to store the input line, and is to be filtered by the # each_line_handler()s. So, if the format of the line is not in the desired @@ -2013,7 +2096,7 @@ sub trace { return main::trace(@_); } my %property; # name of property this file is for. defaults to none, meaning not # applicable, or is otherwise determinable, for example, from each line. - main::set_access('property', \%property, qw{ c }); + main::set_access('property', \%property, qw{ c r }); my %optional; # If this is true, the file is optional. If not present, no warning is @@ -2174,6 +2257,7 @@ sub trace { return main::trace(@_); } fallback => 0, qw("") => "_operator_stringify", "." => \&main::_operator_dot, + ".=" => \&main::_operator_dot_equal, ; sub _operator_stringify { @@ -2840,10 +2924,6 @@ sub trace { return main::trace(@_); } Carp::carp_extra_args(\%args) if main::DEBUG && %args; - if (! $type{$addr}) { - $standard_form{$addr} = main::standardize($value); - } - return $self; } @@ -2851,6 +2931,7 @@ sub trace { return main::trace(@_); } fallback => 0, qw("") => "_operator_stringify", "." => \&main::_operator_dot, + ".=" => \&main::_operator_dot_equal, ; sub _operator_stringify { @@ -2872,8 +2953,11 @@ sub trace { return main::trace(@_); } } sub standard_form { - # The standard form is the value itself if the standard form is - # undefined (that is if the value is special) + # Calculate the standard form only if needed, and cache the result. + # The standard form is the value itself if the type is special. + # This represents a considerable CPU and memory saving - at the time + # of writing there are 368676 non-special objects, but the standard + # form is only requested for 22047 of them - ie about 6%. my $self = shift; Carp::carp_extra_args(\@_) if main::DEBUG && @_; @@ -2881,7 +2965,10 @@ sub trace { return main::trace(@_); } my $addr = do { no overloading; pack 'J', $self; }; return $standard_form{$addr} if defined $standard_form{$addr}; - return $value{$addr}; + + my $value = $value{$addr}; + return $value if $type{$addr}; + return $standard_form{$addr} = main::standardize($value); } sub dump { @@ -2932,6 +3019,10 @@ sub trace { return main::trace(@_); } our $addr; + # Max is initialized to a negative value that isn't adjacent to 0, for + # simpler tests + my $max_init = -2; + main::setup_package(); my %ranges; @@ -2987,9 +3078,7 @@ sub trace { return main::trace(@_); } Carp::carp_extra_args(\%args) if main::DEBUG && %args; - # Max is initialized to a negative value that isn't adjacent to 0, - # for simpler tests - $max{$addr} = -2; + $max{$addr} = $max_init; $_search_ranges_cache{$addr} = 0; $ranges{$addr} = []; @@ -3001,6 +3090,7 @@ sub trace { return main::trace(@_); } fallback => 0, qw("") => "_operator_stringify", "." => \&main::_operator_dot, + ".=" => \&main::_operator_dot_equal, ; sub _operator_stringify { @@ -3073,7 +3163,7 @@ sub trace { return main::trace(@_); } no overloading; $message .= $owner_name_of{pack 'J', $self}; } - Carp::my_carp_bug($message .= "Undefined argument to _union. No union done."); + Carp::my_carp_bug($message . "Undefined argument to _union. No union done."); return; } @@ -3444,6 +3534,9 @@ sub trace { return main::trace(@_); } Carp::my_carp_bug("$owner_name_of{$addr}End of range (" . sprintf("%04X", $end) . ") must not be before start (" . sprintf("%04X", $start) . "). No action taken."); return; } + if ($end > $MAX_UNICODE_CODEPOINT && $operation eq '+') { + Carp::my_carp("$owner_name_of{$addr}Warning: Range '" . sprintf("%04X..%04X", $start, $end) . ") is above the Unicode maximum of " . sprintf("%04X", $MAX_UNICODE_CODEPOINT) . ". Adding it anyway"); + } #local $to_trace = 1 if main::DEBUG; if ($operation eq '-') { @@ -3471,7 +3564,7 @@ sub trace { return main::trace(@_); } # structured so this is common. if ($start > $max) { - trace "$owner_name_of{$addr} $operation", sprintf("%04X", $start) . '..' . sprintf("%04X", $end) . " ($value) type=$type" if main::DEBUG && $to_trace; + trace "$owner_name_of{$addr} $operation", sprintf("%04X..%04X (%s) type=%d; prev max=%04X", $start, $end, $value, $type, $max) if main::DEBUG && $to_trace; return if $operation eq '-'; # Deleting a non-existing range is a # no-op @@ -4070,7 +4163,12 @@ sub trace { return main::trace(@_); } # otherwise recalculate it. This is done too rarely to worry about # performance. if ($operation eq '-' && @return) { - $max{$addr} = $r->[-1]->end; + if (@$r) { + $max{$addr} = $r->[-1]->end; + } + else { # Now empty + $max{$addr} = $max_init; + } } return @return; } @@ -4174,11 +4272,41 @@ sub trace { return main::trace(@_); } return $self->_union($other) }, + '+=' => sub { my $self = shift; + my $other = shift; + my $reversed = shift; + + if ($reversed) { + Carp::my_carp_bug("Bad news. Can't cope with '" + . ref($other) + . ' += ' + . ref($self) + . "'. undef returned."); + return; + } + + return $self->_union($other) + }, '&' => sub { my $self = shift; my $other = shift; return $self->_intersect($other, 0); }, + '&=' => sub { my $self = shift; + my $other = shift; + my $reversed = shift; + + if ($reversed) { + Carp::my_carp_bug("Bad news. Can't cope with '" + . ref($other) + . ' &= ' + . ref($self) + . "'. undef returned."); + return; + } + + return $self->_intersect($other, 0); + }, '~' => "_invert", '-' => "_subtract", ; @@ -4223,10 +4351,12 @@ sub trace { return main::trace(@_); } Carp::carp_extra_args(\@_) if main::DEBUG && @_; if ($reversed) { - Carp::my_carp_bug("Can't cope with a " - . __PACKAGE__ - . " being the second parameter in a '-'. Subtraction ignored."); - return $self; + Carp::my_carp_bug("Bad news. Can't cope with '" + . ref($other) + . ' - ' + . ref($self) + . "'. undef returned."); + return; } my $new = Range_List->new(Initialize => $self); @@ -4860,6 +4990,7 @@ END use overload fallback => 0, "." => \&main::_operator_dot, + ".=" => \&main::_operator_dot_equal, '!=' => \&main::_operator_not_equal, '==' => \&main::_operator_equal, ; @@ -6178,7 +6309,7 @@ END my $comment = ""; my $status = $self->status; - if ($status) { + if ($status && $status ne $PLACEHOLDER) { my $warn = uc $status_past_participles{$status}; $comment .= <<END; @@ -6797,6 +6928,16 @@ sub trace { return main::trace(@_); } '+=' => sub { my $self = shift; my $other = shift; + my $reversed = shift; + + if ($reversed) { + Carp::my_carp_bug("Bad news. Can't cope with '" + . ref($other) + . ' += ' + . ref($self) + . "'. undef returned."); + return; + } return if $self->carp_if_locked; @@ -6814,14 +6955,33 @@ sub trace { return main::trace(@_); } } return $self; }, + '&=' => sub { + my $self = shift; + my $other = shift; + my $reversed = shift; + + if ($reversed) { + Carp::my_carp_bug("Bad news. Can't cope with '" + . ref($other) + . ' &= ' + . ref($self) + . "'. undef returned."); + return; + } + + return if $self->carp_if_locked; + $self->_set_range_list($self->_range_list & $other); + return $self; + }, '-' => sub { my $self = shift; my $other = shift; my $reversed = shift; - if ($reversed) { - Carp::my_carp_bug("Can't cope with a " - . __PACKAGE__ - . " being the first parameter in a '-'. Subtraction ignored."); + Carp::my_carp_bug("Bad news. Can't cope with '" + . ref($other) + . ' - ' + . ref($self) + . "'. undef returned."); return; } @@ -7263,7 +7423,9 @@ END my $flag = $property->status || $table->status || $table_alias_object->status; - $flags{$flag} = $status_past_participles{$flag} if $flag; + if ($flag && $flag ne $PLACEHOLDER) { + $flags{$flag} = $status_past_participles{$flag}; + } $loose_count++; @@ -7661,6 +7823,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } fallback => 0, qw("") => "_operator_stringify", "." => \&main::_operator_dot, + ".=" => \&main::_operator_dot_equal, '==' => \&main::_operator_equal, '!=' => \&main::_operator_not_equal, '=' => sub { return shift }, @@ -7680,16 +7843,16 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } Carp::carp_extra_args(\@_) if main::DEBUG && @_; if (ref $other) { - Carp::my_carp_bug("Can't cope with a " + Carp::my_carp_bug("Bad news. Can't cope with a " . ref($other) . " argument to '-='. Subtraction ignored."); return $self; } elsif ($reversed) { # Shouldn't happen in a -=, but just in case - Carp::my_carp_bug("Can't cope with a " - . __PACKAGE__ - . " being the first parameter in a '-='. Subtraction ignored."); - return $self; + Carp::my_carp_bug("Bad news. Can't cope with subtracting a " + . ref $self + . " from a non-object. undef returned."); + return; } else { no overloading; @@ -8752,6 +8915,24 @@ sub _operator_dot { : "$self$other"; } +sub _operator_dot_equal { + # Overloaded '.=' method that is common to all packages. + + my $self = shift; + my $other = shift; + my $reversed = shift; + Carp::carp_extra_args(\@_) if main::DEBUG && @_; + + $other = "" unless defined $other; + + if ($reversed) { + return $other .= "$self"; + } + else { + return "$self" . "$other"; + } +} + sub _operator_equal { # Generic overloaded '==' routine. To be equal, they must be the exact # same object @@ -8806,6 +8987,11 @@ sub process_PropertyAliases($) { } } + + my $scf = property_ref("Simple_Case_Folding"); + $scf->add_alias("scf"); + $scf->add_alias("sfc"); + return; } @@ -8816,11 +9002,20 @@ sub finish_property_setup { Carp::carp_extra_args(\@_) if main::DEBUG && @_; # This entry was missing from this file in earlier Unicode versions - if (-e 'Jamo.txt') { - my $jsn = property_ref('JSN'); - if (! defined $jsn) { - $jsn = Property->new('JSN', Full_Name => 'Jamo_Short_Name'); - } + if (-e 'Jamo.txt' && ! defined property_ref('JSN')) { + Property->new('JSN', Full_Name => 'Jamo_Short_Name'); + } + + # These two properties must be defined in all releases so we can generate + # the tables from them to make regex \X work, but suppress their output so + # aren't application visible prior to releases where they should be + if (! defined property_ref('GCB')) { + Property->new('GCB', Full_Name => 'Grapheme_Cluster_Break', + Fate => $PLACEHOLDER); + } + if (! defined property_ref('hst')) { + Property->new('hst', Full_Name => 'Hangul_Syllable_Type', + Fate => $PLACEHOLDER); } # These are used so much, that we set globals for them. @@ -9001,7 +9196,7 @@ na ; Name na1 ; Unicode_1_Name nt ; Numeric_Type nv ; Numeric_Value -sfc ; Simple_Case_Folding +scf ; Simple_Case_Folding slc ; Simple_Lowercase_Mapping stc ; Simple_Titlecase_Mapping suc ; Simple_Uppercase_Mapping @@ -9022,7 +9217,6 @@ END # This first set is in the original old-style proplist. push @return, split /\n/, <<'END'; -Alpha ; Alphabetic Bidi_C ; Bidi_Control Dash ; Dash Dia ; Diacritic @@ -9093,6 +9287,7 @@ END } if (-e 'DCoreProperties.txt') { push @return, split /\n/, <<'END'; +Alpha ; Alphabetic IDS ; ID_Start XIDC ; XID_Continue XIDS ; XID_Start @@ -9143,6 +9338,34 @@ sub process_PropValueAliases { $file->insert_lines(get_old_property_value_aliases()); } + if ($v_version lt 4.0.0) { + $file->insert_lines(split /\n/, <<'END' +hst; L ; Leading_Jamo +hst; LV ; LV_Syllable +hst; LVT ; LVT_Syllable +hst; NA ; Not_Applicable +hst; T ; Trailing_Jamo +hst; V ; Vowel_Jamo +END + ); + } + if ($v_version lt 4.1.0) { + $file->insert_lines(split /\n/, <<'END' +GCB; CN ; Control +GCB; CR ; CR +GCB; EX ; Extend +GCB; L ; L +GCB; LF ; LF +GCB; LV ; LV +GCB; LVT ; LVT +GCB; T ; T +GCB; V ; V +GCB; XX ; Other +END + ); + } + + # Add any explicit cjk values $file->insert_lines(@cjk_property_values); @@ -9267,6 +9490,9 @@ bc ; ON ; Other_Neutral bc ; R ; Right_To_Left bc ; WS ; White_Space +Bidi_M; N; No; F; False +Bidi_M; Y; Yes; T; True + # The standard combining classes are very much different in v1, so only use # ones that look right (not checked thoroughly) ccc; 0; NR ; Not_Reordered @@ -9553,6 +9779,33 @@ END return @return; } +sub process_NormalizationsTest { + + # Each line looks like: + # source code point; NFC; NFD; NFKC; NFKD + # e.g. + # 1E0A;1E0A;0044 0307;1E0A;0044 0307; + + my $file= shift; + Carp::carp_extra_args(\@_) if main::DEBUG && @_; + + # Process each line of the file ... + while ($file->next_line) { + + next if /^@/; + + my ($c1, $c2, $c3, $c4, $c5) = split /\s*;\s*/; + + foreach my $var (\$c1, \$c2, \$c3, \$c4, \$c5) { + $$var = pack "U0U*", map { hex } split " ", $$var; + $$var =~ s/(\\)/$1$1/g; + } + + push @normalization_tests, + "Test_N(q$c1, q$c2, q$c3, q$c4, q$c5);\n"; + } # End of looping through the file +} + sub output_perl_charnames_line ($$) { # Output the entries in Perl_charnames specially, using 5 digits instead @@ -10324,6 +10577,7 @@ END $file->carp_bad_line("'$fields[$NUMERIC]' should be a whole or rational number. Processing as if it were") if $fields[$NUMERIC] !~ qr{ ^ -? \d+ ( / \d+ )? $ }x; if ($fields[$PERL_DECIMAL_DIGIT] ne "") { $file->carp_bad_line("$fields[$PERL_DECIMAL_DIGIT] should equal $fields[$NUMERIC]. Processing anyway") if $fields[$PERL_DECIMAL_DIGIT] != $fields[$NUMERIC]; + $file->carp_bad_line("$fields[$PERL_DECIMAL_DIGIT] should be empty since the general category ($fields[$CATEGORY]) isn't 'Nd'. Processing as Decimal") if $fields[$CATEGORY] ne "Nd"; $fields[$NUMERIC_TYPE_OTHER_DIGIT] = 'Decimal'; } elsif ($fields[$NUMERIC_TYPE_OTHER_DIGIT] ne "") { @@ -10579,7 +10833,7 @@ END Carp::carp_extra_args(\@_) if main::DEBUG && @_; # Flush the buffers. - foreach my $i (1 .. $last_field) { + foreach my $i (0 .. $last_field) { $file->insert_adjusted_lines("$start[$i]..$previous_cp; $field_names[$i]; $previous_fields[$i]"); } @@ -10597,8 +10851,9 @@ END # into it the Hangul syllable mappings. This is to avoid having # to publish a subroutine in it to compute them. (which would # essentially be this code.) This uses the algorithm published by - # Unicode. - if (property_ref('Decomposition_Mapping')->to_output_map) { + # Unicode. (No hangul syllables in version 1) + if ($v_version ge v2.0.0 + && property_ref('Decomposition_Mapping')->to_output_map) { for (my $S = $SBase; $S < $SBase + $SCount; $S++) { use integer; my $SIndex = $S - $SBase; @@ -10638,11 +10893,15 @@ END # the syntax is changed as well as the types to their later # terminology. Otherwise normalize.pm would be very unhappy # 5) Many ccc classes are different. These are left intact. - # 6) U+FF10 - U+FF19 are missing their numeric values in all three + # 6) U+FF10..U+FF19 are missing their numeric values in all three # fields. These are unchanged because it doesn't really cause # problems for Perl. # 7) A number of code points, such as controls, don't have their - # Unicode Version 1 Names in this file. These are unchanged. + # Unicode Version 1 Names in this file. These are added. + # 8) A number of Symbols were marked as Lm. This changes those in + # the Latin1 range, so that regexes work. + # 9) The odd characters U+03DB .. U+03E1 weren't encoded but are + # referred to by their lc equivalents. Not fixed. my @corrected_lines = split /\n/, <<'END'; 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;; @@ -10671,7 +10930,12 @@ END $file->insert_lines(@copy); } + elsif ($code_point =~ /^00/ && $fields[$CATEGORY] eq 'Lm') { + # There are no Lm characters in Latin1; these should be 'Sk', but + # there isn't that in V1. + $fields[$CATEGORY] = 'So'; + } if ($fields[$NUMERIC] eq '-') { $fields[$NUMERIC] = '-1'; # This is what 2.0 made it. @@ -10701,7 +10965,7 @@ END # If is like '<+circled> 0052 <-circled>', convert to # '<circled> 0052' $fields[$PERL_DECOMPOSITION] =~ - s/ < \+ ( .*? ) > \s* (.*?) \s* <-\1> /<$1> $2/x; + s/ < \+ ( .*? ) > \s* (.*?) \s* <-\1> /<$1> $2/xg; # Convert '<join> HHHH HHHH <join>' to '<medial> HHHH HHHH', etc. $fields[$PERL_DECOMPOSITION] =~ @@ -10725,6 +10989,9 @@ END # One entry has weird braces $fields[$PERL_DECOMPOSITION] =~ s/[{}]//g; + + # One entry at U+2116 has an extra <sup> + $fields[$PERL_DECOMPOSITION] =~ s/( < .*? > .* ) < .*? > \ * /$1/x; } $_ = join ';', $code_point, @fields; @@ -10732,6 +10999,108 @@ END return; } + sub filter_bad_Nd_ucd { + # Early versions specified a value in the decimal digit field even + # though the code point wasn't a decimal digit. Clear the field in + # that situation, so that the main code doesn't think it is a decimal + # digit. + + my ($code_point, @fields) = split /\s*;\s*/, $_, -1; + if ($fields[$PERL_DECIMAL_DIGIT] ne "" && $fields[$CATEGORY] ne 'Nd') { + $fields[$PERL_DECIMAL_DIGIT] = ""; + $_ = join ';', $code_point, @fields; + } + return; + } + + my @U1_control_names = split /\n/, <<'END'; +NULL +START OF HEADING +START OF TEXT +END OF TEXT +END OF TRANSMISSION +ENQUIRY +ACKNOWLEDGE +BELL +BACKSPACE +HORIZONTAL TABULATION +LINE FEED +VERTICAL TABULATION +FORM FEED +CARRIAGE RETURN +SHIFT OUT +SHIFT IN +DATA LINK ESCAPE +DEVICE CONTROL ONE +DEVICE CONTROL TWO +DEVICE CONTROL THREE +DEVICE CONTROL FOUR +NEGATIVE ACKNOWLEDGE +SYNCHRONOUS IDLE +END OF TRANSMISSION BLOCK +CANCEL +END OF MEDIUM +SUBSTITUTE +ESCAPE +FILE SEPARATOR +GROUP SEPARATOR +RECORD SEPARATOR +UNIT SEPARATOR +DELETE +BREAK PERMITTED HERE +NO BREAK HERE +INDEX +NEXT LINE +START OF SELECTED AREA +END OF SELECTED AREA +CHARACTER TABULATION SET +CHARACTER TABULATION WITH JUSTIFICATION +LINE TABULATION SET +PARTIAL LINE DOWN +PARTIAL LINE UP +REVERSE LINE FEED +SINGLE SHIFT TWO +SINGLE SHIFT THREE +DEVICE CONTROL STRING +PRIVATE USE ONE +PRIVATE USE TWO +SET TRANSMIT STATE +CANCEL CHARACTER +MESSAGE WAITING +START OF GUARDED AREA +END OF GUARDED AREA +START OF STRING +SINGLE CHARACTER INTRODUCER +CONTROL SEQUENCE INTRODUCER +STRING TERMINATOR +OPERATING SYSTEM COMMAND +PRIVACY MESSAGE +APPLICATION PROGRAM COMMAND +END + + sub filter_early_U1_names { + # Very early versions did not have the Unicode_1_name field specified. + # They differed in which ones were present; make sure a U1 name + # exists, so that Unicode::UCD::charinfo will work + + my ($code_point, @fields) = split /\s*;\s*/, $_, -1; + + + # @U1_control names above are entirely positional, so we pull them out + # in the exact order required, with gaps for the ones that don't have + # names. + if ($code_point =~ /^00[01]/ + || $code_point eq '007F' + || $code_point =~ /^008[2-9A-F]/ + || $code_point =~ /^009[0-8A-F]/) + { + my $u1_name = shift @U1_control_names; + $fields[$UNICODE_1_NAME] = $u1_name unless $fields[$UNICODE_1_NAME]; + $_ = join ';', $code_point, @fields; + } + return; + } + sub filter_v2_1_5_ucd { # A dozen entries in this 2.1.5 file had the mirrored and numeric # columns swapped; These all had mirrored be 'N'. So if the numeric @@ -10748,8 +11117,9 @@ END sub filter_v6_ucd { - # Unicode 6.0 co-opted the name BELL for U+1F514, but we haven't - # accepted that yet to allow for some deprecation cycles. + # Unicode 6.0 co-opted the name BELL for U+1F514, but until 5.17, + # it wasn't accepted, to allow for some deprecation cycles. This + # function is not called after 5.16 return if $_ !~ /^(?:0007|1F514|070F);/; @@ -10761,7 +11131,7 @@ END # http://www.unicode.org/versions/corrigendum8.html $fields[$BIDI] = "AL"; } - elsif ($^V lt v5.17.0) { # For 5.18 will convert to use Unicode's name + elsif ($^V lt v5.18.0) { # For 5.18 will convert to use Unicode's name $fields[$CHARNAME] = ""; } @@ -10883,6 +11253,7 @@ sub filter_arabic_shaping_line { my $lc; # Table for lowercase mapping my $tc; my $uc; + my %special_casing_code_points; sub setup_special_casing { # SpecialCasing.txt contains the non-simple case change mappings. The @@ -10949,15 +11320,26 @@ END my $simple_name = 's' . $full_name; my $simple = property_ref($simple_name); $simple->initialize($full_table) if $simple->to_output_map(); - - unless ($simple->to_output_map()) { - $full_table->set_proxy_for($simple_name); - } } return; } + sub filter_2_1_8_special_casing_line { + + # This version had duplicate entries in this file. Delete all but the + # first one + my @fields = split /\s*;\s*/, $_, -1; # -1 => retain trailing null + # fields + if (exists $special_casing_code_points{$fields[0]}) { + $_ = ""; + return; + } + + $special_casing_code_points{$fields[0]} = 1; + filter_special_casing_line(@_); + } + sub filter_special_casing_line { # Change the format of $_ from SpecialCasing.txt into something that # the generic handler understands. Each input line contains three @@ -11105,7 +11487,7 @@ sub filter_old_style_case_folding { # Create the map for simple only if are going to output it, for otherwise # it takes no part in anything we do. my $to_output_simple; - my $non_final_folds; + my $all_folds; sub setup_case_folding($) { # Read in the case foldings in CaseFolding.txt. This handles both @@ -11118,10 +11500,10 @@ sub filter_old_style_case_folding { property_ref('Case_Folding')->set_proxy_for('Simple_Case_Folding'); } - $non_final_folds = $perl->add_match_table("_Perl_Non_Final_Folds", + $all_folds = $perl->add_match_table("_Perl_Any_Folds", Perl_Extension => 1, Fate => $INTERNAL_ONLY, - Description => "Code points that particpate in a multi-char fold and are not the final character of said fold", + Description => "Code points that particpate in some fold", ); # If we ever wanted to show that these tables were combined, a new @@ -11161,7 +11543,7 @@ END return; } - if ($type eq 'T') { # Skip Turkic case folding, is locale dependent + if ($type =~ / ^ [IT] $/x) { # Skip Turkic case folding, is locale dependent $_ = ""; return; } @@ -11174,14 +11556,21 @@ END # so that _swash_inversion_hash() is able to construct closures # without having to worry about F mappings. if ($type eq 'C' || $type eq 'F' || $type eq 'I' || $type eq 'S') { + my $from = hex $range; # Assumes range is single + $all_folds->add_range($from, $from); $_ = "$range; Case_Folding; " . "$CMD_DELIM$REPLACE_CMD=$MULTIPLE_BEFORE$CMD_DELIM$map"; + if ($type eq 'F') { my @string = split " ", $map; for my $i (0 .. @string - 1 -1) { - $non_final_folds->add_range(hex $string[$i], hex $string[$i]); + my $decimal = hex $string[$i]; + $all_folds->add_range($decimal, $decimal); } } + else { + $all_folds->add_range(hex $map, hex $map); + } } else { $_ = ""; @@ -11468,26 +11857,26 @@ sub filter_blocks_lines { # PropList.txt has been in Unicode since version 2.0. Until 3.1, it # was in a completely different syntax. Ken Whistler of Unicode says # that it was something he used as an aid for his own purposes, but - # was never an official part of the standard. However, comments in - # DAge.txt indicate that non-character code points were available in - # the UCD as of 3.1. It is unclear to me (khw) how they could be - # there except through this file (but on the other hand, they first - # appeared there in 3.0.1), so maybe it was part of the UCD, and maybe - # not. But the claim is that it was published as an aid to others who - # might want some more information than was given in the official UCD - # of the time. Many of the properties in it were incorporated into - # the later PropList.txt, but some were not. This program uses this - # early file to generate property tables that are otherwise not - # accessible in the early UCD's, and most were probably not really - # official at that time, so one could argue that it should be ignored, - # and you can easily modify things to skip this. And there are bugs - # in this file in various versions. (For example, the 2.1.9 version - # removes from Alphabetic the CJK range starting at 4E00, and they - # weren't added back in until 3.1.0.) Many of this file's properties - # were later sanctioned, so this code generates tables for those - # properties that aren't otherwise in the UCD of the time but - # eventually did become official, and throws away the rest. Here is a - # list of all the ones that are thrown away: + # was never an official part of the standard. Many of the properties + # in it were incorporated into the later PropList.txt, but some were + # not. This program uses this early file to generate property tables + # that are otherwise not accessible in the early UCD's. It does this + # for the ones that eventually became official, and don't appear to be + # too different in their contents from the later official version, and + # throws away the rest. It could be argued that the ones it generates + # were probably not really official at that time, so should be + # ignored. You can easily modify things to skip all of them by + # changing this function to just set $_ to "", and return; and to skip + # certain of them by by simply removing their declarations from + # get_old_property_aliases(). + # + # Here is a list of all the ones that are thrown away: + # Alphabetic The definitions for this are very + # defective, so better to not mislead + # people into thinking it works. + # Instead the Perl extension of the + # same name is constructed from first + # principles. # Bidi=* duplicates UnicodeData.txt # Combining never made into official property; # is \P{ccc=0} @@ -11520,7 +11909,7 @@ sub filter_blocks_lines { # Space different definition than eventual # one. # Titlecase duplicates UnicodeData.txt: gc=lt - # Unassigned Code Value duplicates UnicodeData.txt: gc=cc + # Unassigned Code Value duplicates UnicodeData.txt: gc=cn # Zero-width never made into official property; # subset of gc=cf # Most of the properties have the same names in this file as in later @@ -11657,18 +12046,148 @@ sub filter_script_extensions_line { return; } +sub generate_hst { + + # Populates the Hangul Syllable Type property from first principles + + my $file= shift; + Carp::carp_extra_args(\@_) if main::DEBUG && @_; + + # These few ranges are hard-coded in. + $file->insert_lines(split /\n/, <<'END' +1100..1159 ; L +115F ; L +1160..11A2 ; V +11A8..11F9 ; T +END +); + + # The Hangul syllables in version 1 are completely different than what came + # after, so just ignore them there. + if ($v_version lt v2.0.0) { + my $property = property_ref($file->property); + push @tables_that_may_be_empty, $property->table('LV')->complete_name; + push @tables_that_may_be_empty, $property->table('LVT')->complete_name; + return; + } + + # The algorithmically derived syllables are almost all LVT ones, so + # initialize the whole range with that. + $file->insert_lines(sprintf "%04X..%04X; LVT\n", + $SBase, $SBase + $SCount -1); + + # Those ones that aren't LVT are LV, and they occur at intervals of + # $TCount code points, starting with the first code point, at $SBase. + for (my $i = $SBase; $i < $SBase + $SCount; $i += $TCount) { + $file->insert_lines(sprintf "%04X..%04X; LV\n", $i, $i); + } + + return; +} + +sub generate_GCB { + + # Populates the Grapheme Cluster Break property from first principles + + my $file= shift; + Carp::carp_extra_args(\@_) if main::DEBUG && @_; + + # All these definitions are from + # http://www.unicode.org/reports/tr29/tr29-3.html with confirmation + # from http://www.unicode.org/reports/tr29/tr29-4.html + + foreach my $range ($gc->ranges) { + + # Extend includes gc=Me and gc=Mn, while Control includes gc=Cc + # and gc=Cf + if ($range->value =~ / ^ M [en] $ /x) { + $file->insert_lines(sprintf "%04X..%04X; Extend", + $range->start, $range->end); + } + elsif ($range->value =~ / ^ C [cf] $ /x) { + $file->insert_lines(sprintf "%04X..%04X; Control", + $range->start, $range->end); + } + } + $file->insert_lines("2028; Control"); # Line Separator + $file->insert_lines("2029; Control"); # Paragraph Separator + + $file->insert_lines("000D; CR"); + $file->insert_lines("000A; LF"); + + # Also from http://www.unicode.org/reports/tr29/tr29-3.html. + foreach my $code_point ( qw{ + 40000 + 09BE 09D7 0B3E 0B57 0BBE 0BD7 0CC2 0CD5 0CD6 + 0D3E 0D57 0DCF 0DDF FF9E FF9F 1D165 1D16E 1D16F + } + ) { + my $category = $gc->value_of(hex $code_point); + next if ! defined $category || $category eq 'Cn'; # But not if + # unassigned in this + # release + $file->insert_lines("$code_point; Extend"); + } + + my $hst = property_ref('Hangul_Syllable_Type'); + if ($hst->count > 0) { + foreach my $range ($hst->ranges) { + $file->insert_lines(sprintf "%04X..%04X; %s", + $range->start, $range->end, $range->value); + } + } + else { + generate_hst($file); + } + + return; +} + sub setup_early_name_alias { my $file= shift; Carp::carp_extra_args(\@_) if main::DEBUG && @_; + # This has the effect of pretending that the Name_Alias property was + # available in all Unicode releases. Strictly speaking, this property + # should not be availabe in early releases, but doing this allows + # charnames.pm to work on older releases without change. Prior to v5.16 + # it had these names hard-coded inside it. Unicode 6.1 came along and + # created these names, and so they were removed from charnames. + my $aliases = property_ref('Name_Alias'); - $aliases = Property->new('Name_Alias') if ! defined $aliases; + if (! defined $aliases) { + $aliases = Property->new('Name_Alias', Default_Map => ""); + } + $file->insert_lines(get_old_name_aliases()); return; } sub get_old_name_aliases () { + + # The Unicode_1_Name field, contains most of these names. One would + # expect, given the field's name, that its values would be fixed across + # versions, giving the true Unicode version 1 name for the character. + # Sadly, this is not the case. Actually Version 1.1.5 had no names for + # any of the controls; Version 2.0 introduced names for the C0 controls, + # and 3.0 introduced C1 names. 3.0.1 removed the name INDEX; and 3.2 + # changed some names: it + # changed to parenthesized versions like "NEXT LINE" to + # "NEXT LINE (NEL)"; + # changed PARTIAL LINE DOWN to PARTIAL LINE FORWARD + # changed PARTIAL LINE UP to PARTIAL LINE BACKWARD;; + # changed e.g. FILE SEPARATOR to INFORMATION SEPARATOR FOUR + # This list contains all the names that were defined so that + # charnames::vianame(), etc. understand them all EVEN if this version of + # Unicode didn't specify them (this could be construed as a bug). + # mktables elsewhere gives preference to the Unicode_1_Name field over + # these names, so that viacode() will return the correct value for that + # version of Unicode, except when that version doesn't define a name, + # viacode() will return one anyway (this also could be construed as a + # bug). But these potential "bugs" allow for the smooth working of code + # on earlier Unicode releases. + my @return = split /\n/, <<'END'; 0000;NULL;control 0000;NUL;abbreviation @@ -11684,7 +12203,6 @@ sub get_old_name_aliases () { 0005;ENQ;abbreviation 0006;ACKNOWLEDGE;control 0006;ACK;abbreviation -0007;ALERT;control 0007;BELL;control 0007;BEL;abbreviation 0008;BACKSPACE;control @@ -11877,6 +12395,19 @@ END } } + # ALERT did not come along until 6.0, at which point it became preferred + # over BELL, and was never in the Unicode_1_Name field. For the same + # reasons, that the other names are made known to all releases by this + # function, we make ALERT known too. By inserting it + # last in early releases, BELL is preferred over it; and vice-vers in 6.0 + my $alert = '0007; ALERT; control'; + if ($v_version lt v6.0.0) { + push @return, $alert; + } + else { + unshift @return, $alert; + } + return @return; } @@ -11915,16 +12446,53 @@ sub filter_early_version_name_alias_line { sub finish_Unicode() { # This routine should be called after all the Unicode files have been read # in. It: - # 1) Adds the mappings for code points missing from the files which have + # 1) Creates properties that are missing from the version of Unicode being + # compiled, and which, for whatever reason, are needed for the Perl + # core to function properly. These are minimally populated as + # necessary. + # 2) Adds the mappings for code points missing from the files which have # defaults specified for them. - # 2) At this this point all mappings are known, so it computes the type of + # 3) At this this point all mappings are known, so it computes the type of # each property whose type hasn't been determined yet. - # 3) Calculates all the regular expression match tables based on the + # 4) Calculates all the regular expression match tables based on the # mappings. - # 3) Calculates and adds the tables which are defined by Unicode, but + # 5) Calculates and adds the tables which are defined by Unicode, but # which aren't derived by them, and certain derived tables that Perl # uses. + # Folding information was introduced later into Unicode data. To get + # Perl's case ignore (/i) to work at all in releases that don't have + # folding, use the best available alternative, which is lower casing. + my $fold = property_ref('Case_Folding'); + if ($fold->is_empty) { + $fold->initialize(property_ref('Lowercase_Mapping')); + $fold->add_note(join_lines(<<END +WARNING: This table uses lower case as a substitute for missing fold +information +END + )); + } + + # Multiple-character mapping was introduced later into Unicode data, so it + # is by default the simple version. If to output the simple versions and + # not present, just use the regular (which in these Unicode versions is + # the simple as well). + foreach my $map (qw { Uppercase_Mapping + Lowercase_Mapping + Titlecase_Mapping + Case_Folding + } ) + { + my $simple = property_ref("Simple_$map"); + next if ! $simple->is_empty; + if ($simple->to_output_map) { + $simple->initialize(property_ref($map)); + } + else { + property_ref($map)->set_proxy_for($simple->name); + } + } + # For each property, fill in any missing mappings, and calculate the re # match tables. If a property has more than one missing mapping, the # default is a reference to a data structure, and requires data from other @@ -12141,41 +12709,6 @@ END my $Cs = $gc->table('Cs'); - - # Folding information was introduced later into Unicode data. To get - # Perl's case ignore (/i) to work at all in releases that don't have - # folding, use the best available alternative, which is lower casing. - my $fold = property_ref('Simple_Case_Folding'); - if ($fold->is_empty) { - $fold->initialize(property_ref('Simple_Lowercase_Mapping')); - $fold->add_note(join_lines(<<END -WARNING: This table uses lower case as a substitute for missing fold -information -END - )); - } - - # Multiple-character mapping was introduced later into Unicode data. If - # missing, use the single-characters maps as best available alternative - foreach my $map (qw { Uppercase_Mapping - Lowercase_Mapping - Titlecase_Mapping - Case_Folding - } ) - { - my $full = property_ref($map); - if ($full->is_empty) { - my $simple = property_ref('Simple_' . $map); - $full->initialize($simple); - $full->add_comment($simple->comment) if ($simple->comment); - $full->add_note(join_lines(<<END -WARNING: This table uses simple mapping (single-character only) as a -substitute for missing multiple-character information -END - )); - } - } - # Create digit and case fold tables with the original file names for # backwards compatibility with applications that read them directly. my $Digit = Property->new("Legacy_Perl_Decimal_Digit", @@ -12243,6 +12776,29 @@ END return; } +sub pre_3_dot_1_Nl () { + + # Return a range list for gc=nl for Unicode versions prior to 3.1, which + # is when Unicode's became fully usable. These code points were + # determined by inspection and experimentation. gc=nl is important for + # certain Perl-extension properties that should be available in all + # releases. + + my $Nl = Range_List->new(); + if (defined (my $official = $gc->table('Nl'))) { + $Nl += $official; + } + else { + $Nl->add_range(0x2160, 0x2182); + $Nl->add_range(0x3007, 0x3007); + $Nl->add_range(0x3021, 0x3029); + } + $Nl->add_range(0xFE20, 0xFE23); + $Nl->add_range(0x16EE, 0x16F0) if $v_version ge v3.0.0; # 3.0 was when + # these were added + return $Nl; +} + sub compile_perl() { # Create perl-defined tables. Almost all are part of the pseudo-property # named 'perl' internally to this program. Many of these are recommended @@ -12305,24 +12861,50 @@ sub compile_perl() { # Very early releases didn't have blocks, so initialize ASCII ourselves if # necessary if ($ASCII->is_empty) { - $ASCII->initialize([ 0..127 ]); + $ASCII->add_range(0, 127); } # Get the best available case definitions. Early Unicode versions didn't # have Uppercase and Lowercase defined, so use the general category - # instead for them. + # instead for them, modified by hard-coding in the code points each is + # missing. my $Lower = $perl->add_match_table('Lower'); my $Unicode_Lower = property_ref('Lowercase'); if (defined $Unicode_Lower && ! $Unicode_Lower->is_empty) { $Lower->set_equivalent_to($Unicode_Lower->table('Y'), Related => 1); - $Unicode_Lower->table('Y')->set_caseless_equivalent(property_ref('Cased')->table('Y')); - $Unicode_Lower->table('N')->set_caseless_equivalent(property_ref('Cased')->table('N')); - $Lower->set_caseless_equivalent(property_ref('Cased')->table('Y')); } else { - $Lower->set_equivalent_to($gc->table('Lowercase_Letter'), - Related => 1); + $Lower += $gc->table('Lowercase_Letter'); + + # There are quite a few code points in Lower, that aren't in gc=lc, + # and not all are in all releases. + foreach my $code_point ( 0x00AA, + 0x00BA, + 0x02B0 .. 0x02B8, + 0x02C0 .. 0x02C1, + 0x02E0 .. 0x02E4, + 0x0345, + 0x037A, + 0x1D2C .. 0x1D6A, + 0x1D78, + 0x1D9B .. 0x1DBF, + 0x2071, + 0x207F, + 0x2090 .. 0x209C, + 0x2170 .. 0x217F, + 0x24D0 .. 0x24E9, + 0x2C7C .. 0x2C7D, + 0xA770, + 0xA7F8 .. 0xA7F9, + ) { + # Don't include the code point unless it is assigned in this + # release + my $category = $gc->value_of(hex $code_point); + next if ! defined $category || $category eq 'Cn'; + + $Lower += $code_point; + } } $Lower->add_alias('XPosixLower'); my $Posix_Lower = $perl->add_match_table("PosixLower", @@ -12334,13 +12916,14 @@ sub compile_perl() { my $Unicode_Upper = property_ref('Uppercase'); if (defined $Unicode_Upper && ! $Unicode_Upper->is_empty) { $Upper->set_equivalent_to($Unicode_Upper->table('Y'), Related => 1); - $Unicode_Upper->table('Y')->set_caseless_equivalent(property_ref('Cased')->table('Y')); - $Unicode_Upper->table('N')->set_caseless_equivalent(property_ref('Cased')->table('N')); - $Upper->set_caseless_equivalent(property_ref('Cased')->table('Y')); } else { - $Upper->set_equivalent_to($gc->table('Uppercase_Letter'), - Related => 1); + + # Unlike Lower, there are only two ranges in Upper that aren't in + # gc=Lu, and all code points were assigned in all releases. + $Upper += $gc->table('Uppercase_Letter'); + $Upper->add_range(0x2160, 0x216F); # Uppercase Roman numerals + $Upper->add_range(0x24B6, 0x24CF); # Circled Latin upper case letters } $Upper->add_alias('XPosixUpper'); my $Posix_Upper = $perl->add_match_table("PosixUpper", @@ -12358,21 +12941,56 @@ sub compile_perl() { # identical code points, but their caseless equivalents are not the same, # one being 'Cased' and the other being 'LC', and so now must be kept as # separate entities. - $Title += $lt if defined $lt; + if (defined $lt) { + $Title += $lt; + } + else { + push @tables_that_may_be_empty, $Title->complete_name; + } - # If this Unicode version doesn't have Cased, set up our own. From - # Unicode 5.1: Definition D120: A character C is defined to be cased if - # and only if C has the Lowercase or Uppercase property or has a - # General_Category value of Titlecase_Letter. my $Unicode_Cased = property_ref('Cased'); - unless (defined $Unicode_Cased) { + if (defined $Unicode_Cased) { + my $yes = $Unicode_Cased->table('Y'); + my $no = $Unicode_Cased->table('N'); + $Title->set_caseless_equivalent($yes); + if (defined $Unicode_Upper) { + $Unicode_Upper->table('Y')->set_caseless_equivalent($yes); + $Unicode_Upper->table('N')->set_caseless_equivalent($no); + } + $Upper->set_caseless_equivalent($yes); + if (defined $Unicode_Lower) { + $Unicode_Lower->table('Y')->set_caseless_equivalent($yes); + $Unicode_Lower->table('N')->set_caseless_equivalent($no); + } + $Lower->set_caseless_equivalent($yes); + } + else { + # If this Unicode version doesn't have Cased, set up the Perl + # extension from first principles. From Unicode 5.1: Definition D120: + # A character C is defined to be cased if and only if C has the + # Lowercase or Uppercase property or has a General_Category value of + # Titlecase_Letter. my $cased = $perl->add_match_table('Cased', Initialize => $Lower + $Upper + $Title, Description => 'Uppercase or Lowercase or Titlecase', ); - $Unicode_Cased = $cased; + # $notcased is purely for the caseless equivalents below + my $notcased = $perl->add_match_table('_Not_Cased', + Initialize => ~ $cased, + Fate => $INTERNAL_ONLY, + Description => 'All not-cased code points'); + $Title->set_caseless_equivalent($cased); + if (defined $Unicode_Upper) { + $Unicode_Upper->table('Y')->set_caseless_equivalent($cased); + $Unicode_Upper->table('N')->set_caseless_equivalent($notcased); + } + $Upper->set_caseless_equivalent($cased); + if (defined $Unicode_Lower) { + $Unicode_Lower->table('Y')->set_caseless_equivalent($cased); + $Unicode_Lower->table('N')->set_caseless_equivalent($notcased); + } + $Lower->set_caseless_equivalent($cased); } - $Title->set_caseless_equivalent($Unicode_Cased->table('Y')); # Similarly, set up our own Case_Ignorable property if this Unicode # version doesn't have it. From Unicode 5.1: Definition D121: A character @@ -12437,16 +13055,83 @@ sub compile_perl() { } else { - # For early releases, we don't get it exactly right. The below - # includes more than it should, which in 5.2 terms is: L + Nl + - # Other_Alphabetic. Other_Alphabetic contains many characters from - # Mn and Mc. It's better to match more than we should, than less than - # we should. + # The Alphabetic property doesn't exist for early releases, so + # generate it. The actual definition, in 5.2 terms is: + # + # gc=L + gc=Nl + Other_Alphabetic + # + # Other_Alphabetic is also not defined in these early releases, but it + # contains one gc=So range plus most of gc=Mn and gc=Mc, so we add + # those last two as well, then subtract the relatively few of them that + # shouldn't have been added. (The gc=So range is the circled capital + # Latin characters. Early releases mistakenly didn't also include the + # lower-case versions of these characters, and so we don't either, to + # maintain consistency with those releases that first had this + # property. $Alpha->initialize($gc->table('Letter') - + $gc->table('Mn') - + $gc->table('Mc')); - $Alpha += $gc->table('Nl') if defined $gc->table('Nl'); + + pre_3_dot_1_Nl() + + $gc->table('Mn') + + $gc->table('Mc') + ); + $Alpha->add_range(0x24D0, 0x24E9); # gc=So + foreach my $range ( [ 0x0300, 0x0344 ], + [ 0x0346, 0x034E ], + [ 0x0360, 0x0362 ], + [ 0x0483, 0x0486 ], + [ 0x0591, 0x05AF ], + [ 0x06DF, 0x06E0 ], + [ 0x06EA, 0x06EC ], + [ 0x0740, 0x074A ], + 0x093C, + 0x094D, + [ 0x0951, 0x0954 ], + 0x09BC, + 0x09CD, + 0x0A3C, + 0x0A4D, + 0x0ABC, + 0x0ACD, + 0x0B3C, + 0x0B4D, + 0x0BCD, + 0x0C4D, + 0x0CCD, + 0x0D4D, + 0x0DCA, + [ 0x0E47, 0x0E4C ], + 0x0E4E, + [ 0x0EC8, 0x0ECC ], + [ 0x0F18, 0x0F19 ], + 0x0F35, + 0x0F37, + 0x0F39, + [ 0x0F3E, 0x0F3F ], + [ 0x0F82, 0x0F84 ], + [ 0x0F86, 0x0F87 ], + 0x0FC6, + 0x1037, + 0x1039, + [ 0x17C9, 0x17D3 ], + [ 0x20D0, 0x20DC ], + 0x20E1, + [ 0x302A, 0x302F ], + [ 0x3099, 0x309A ], + [ 0xFE20, 0xFE23 ], + [ 0x1D165, 0x1D169 ], + [ 0x1D16D, 0x1D172 ], + [ 0x1D17B, 0x1D182 ], + [ 0x1D185, 0x1D18B ], + [ 0x1D1AA, 0x1D1AD ], + ) { + if (ref $range) { + $Alpha->delete_range($range->[0], $range->[1]); + } + else { + $Alpha->delete_range($range, $range); + } + } $Alpha->add_description('Alphabetic'); + $Alpha->add_alias('Alphabetic'); } $Alpha->add_alias('XPosixAlpha'); my $Posix_Alpha = $perl->add_match_table("PosixAlpha", @@ -12473,7 +13158,19 @@ sub compile_perl() { ); $Word->add_alias('XPosixWord'); my $Pc = $gc->table('Connector_Punctuation'); # 'Pc' Not in release 1 - $Word += $Pc if defined $Pc; + if (defined $Pc) { + $Word += $Pc; + } + else { + $Word += ord('_'); # Make sure this is a $Word + } + my $JC = property_ref('Join_Control'); # Wasn't in release 1 + if (defined $JC) { + $Word += $JC->table('Y'); + } + else { + $Word += 0x200C + 0x200D; + } # This is a Perl extension, so the name doesn't begin with Posix. my $PerlWord = $perl->add_match_table('PerlWord', @@ -12512,23 +13209,24 @@ sub compile_perl() { # No Posix equivalent for vertical space my $Space = $perl->add_match_table('Space', - Description => '\s including beyond ASCII plus vertical tab', + Description => '\s including beyond ASCII and vertical tab', Initialize => $Blank + $VertSpace, ); $Space->add_alias('XPosixSpace'); - $perl->add_match_table("PosixSpace", + my $posix_space = $perl->add_match_table("PosixSpace", Description => "\\t, \\n, \\cK, \\f, \\r, and ' '. (\\cK is vertical tab)", Initialize => $Space & $ASCII, ); - # Perl's traditional space doesn't include Vertical Tab + # Perl's traditional space doesn't include Vertical Tab prior to v5.18 my $XPerlSpace = $perl->add_match_table('XPerlSpace', Description => '\s, including beyond ASCII', - Initialize => $Space - 0x000B, + #Initialize => $Space - 0x000B, + Initialize => $Space, ); $XPerlSpace->add_alias('SpacePerl'); # A pre-existing synonym my $PerlSpace = $perl->add_match_table('PerlSpace', - Description => '\s, restricted to ASCII = [ \f\n\r\t]', + Description => '\s, restricted to ASCII = [ \f\n\r\t] plus vertical tab', Initialize => $XPerlSpace & $ASCII, ); @@ -12618,6 +13316,8 @@ sub compile_perl() { } else { $PosixXDigit->initialize($Xdigit & $ASCII); + $PosixXDigit->add_alias('AHex'); + $PosixXDigit->add_alias('Ascii_Hex_Digit'); } $PosixXDigit->add_description('[0-9A-Fa-f]'); @@ -12641,7 +13341,8 @@ sub compile_perl() { } else { - # This list came from 3.2 Soft_Dotted. + # This list came from 3.2 Soft_Dotted; all of these code points are in + # all releases $CanonDCIJ->initialize([ 0x0069, 0x006A, 0x012F, @@ -12654,98 +13355,250 @@ sub compile_perl() { $CanonDCIJ = $CanonDCIJ & $Assigned; } - # These are used in Unicode's definition of \X - my $begin = $perl->add_match_table('_X_Begin', Perl_Extension => 1, - Fate => $INTERNAL_ONLY); - my $extend = $perl->add_match_table('_X_Extend', Perl_Extension => 1, + # For backward compatibility, Perl has its own definition for IDStart. + # It is regular XID_Start plus the underscore, but all characters must be + # Word characters as well + my $XID_Start = property_ref('XID_Start'); + my $perl_xids = $perl->add_match_table('_Perl_IDStart', + Perl_Extension => 1, + Fate => $INTERNAL_ONLY, + Initialize => ord('_') + ); + if (defined $XID_Start + || defined ($XID_Start = property_ref('ID_Start'))) + { + $perl_xids += $XID_Start->table('Y'); + } + else { + # For Unicode versions that don't have the property, construct our own + # from first principles. The actual definition is: + # Letters + # + letter numbers (Nl) + # - Pattern_Syntax + # - Pattern_White_Space + # + stability extensions + # - NKFC modifications + # + # What we do in the code below is to include the identical code points + # that are in the first release that had Unicode's version of this + # property, essentially extrapolating backwards. There were no + # stability extensions until v4.1, so none are included; likewise in + # no Unicode version so far do subtracting PatSyn and PatWS make any + # difference, so those also are ignored. + $perl_xids += $gc->table('Letter') + pre_3_dot_1_Nl(); + + # We do subtract the NFKC modifications that are in the first version + # that had this property. We don't bother to test if they are in the + # version in question, because if they aren't, the operation is a + # no-op. The NKFC modifications are discussed in + # http://www.unicode.org/reports/tr31/#NFKC_Modifications + foreach my $range ( 0x037A, + 0x0E33, + 0x0EB3, + [ 0xFC5E, 0xFC63 ], + [ 0xFDFA, 0xFE70 ], + [ 0xFE72, 0xFE76 ], + 0xFE78, + 0xFE7A, + 0xFE7C, + 0xFE7E, + [ 0xFF9E, 0xFF9F ], + ) { + if (ref $range) { + $perl_xids->delete_range($range->[0], $range->[1]); + } + else { + $perl_xids->delete_range($range, $range); + } + } + } + + $perl_xids &= $Word; + + my $perl_xidc = $perl->add_match_table('_Perl_IDCont', + Perl_Extension => 1, Fate => $INTERNAL_ONLY); + my $XIDC = property_ref('XID_Continue'); + if (defined $XIDC + || defined ($XIDC = property_ref('ID_Continue'))) + { + $perl_xidc += $XIDC->table('Y'); + } + else { + # Similarly, we construct our own XIDC if necessary for early Unicode + # versions. The definition is: + # everything in XIDS + # + Gc=Mn + # + Gc=Mc + # + Gc=Nd + # + Gc=Pc + # - Pattern_Syntax + # - Pattern_White_Space + # + stability extensions + # - NFKC modifications + # + # The same thing applies to this as with XIDS for the PatSyn, PatWS, + # and stability extensions. There is a somewhat different set of NFKC + # mods to remove (and add in this case). The ones below make this + # have identical code points as in the first release that defined it. + $perl_xidc += $perl_xids + + $gc->table('L') + + $gc->table('Mn') + + $gc->table('Mc') + + $gc->table('Nd') + + 0x00B7 + ; + if (defined (my $pc = $gc->table('Pc'))) { + $perl_xidc += $pc; + } + else { # 1.1.5 didn't have Pc, but these should have been in it + $perl_xidc += 0xFF3F; + $perl_xidc->add_range(0x203F, 0x2040); + $perl_xidc->add_range(0xFE33, 0xFE34); + $perl_xidc->add_range(0xFE4D, 0xFE4F); + } + + # Subtract the NFKC mods + foreach my $range ( 0x037A, + [ 0xFC5E, 0xFC63 ], + [ 0xFDFA, 0xFE1F ], + 0xFE70, + [ 0xFE72, 0xFE76 ], + 0xFE78, + 0xFE7A, + 0xFE7C, + 0xFE7E, + ) { + if (ref $range) { + $perl_xidc->delete_range($range->[0], $range->[1]); + } + else { + $perl_xidc->delete_range($range, $range); + } + } + } - # For backward compatibility, Perl has its own definition for IDStart - # First, we include the underscore, and then the regular XID_Start also - # have to be Words - $perl->add_match_table('_Perl_IDStart', - Perl_Extension => 1, - Fate => $INTERNAL_ONLY, - Initialize => - ord('_') - + (property_ref('XID_Start')->table('Y') & $Word) - ); + $perl_xidc &= $Word; - my $gcb = property_ref('Grapheme_Cluster_Break'); + my $charname_begin = $perl->add_match_table('_Perl_Charname_Begin', + Perl_Extension => 1, + Fate => $INTERNAL_ONLY, + Initialize => $gc->table('Letter') & $Alpha & $perl_xids, + ); - # The 'extended' grapheme cluster came in 5.1. The non-extended - # definition differs too much from the traditional Perl one to use. - if (defined $gcb && defined $gcb->table('SpacingMark')) { - - # Note that assumes HST is defined; it came in an earlier release than - # GCB. In the line below, two negatives means: yes hangul - $begin += ~ property_ref('Hangul_Syllable_Type') - ->table('Not_Applicable') - + ~ ($gcb->table('Control') - + $gcb->table('CR') - + $gcb->table('LF')); - $begin->add_comment('For use in \X; matches: Hangul_Syllable | ! Control'); - - $extend += $gcb->table('Extend') + $gcb->table('SpacingMark'); - $extend->add_comment('For use in \X; matches: Extend | SpacingMark'); - } - else { # Old definition, used on early releases. - $extend += $gc->table('Mark') - + 0x200C # ZWNJ - + 0x200D; # ZWJ - $begin += ~ $extend; - - # Here we may have a release that has the regular grapheme cluster - # defined, or a release that doesn't have anything defined. - # We set things up so the Perl core degrades gracefully, possibly with - # placeholders that match nothing. - - if (! defined $gcb) { - $gcb = Property->new('GCB', Status => $PLACEHOLDER); - } - my $hst = property_ref('HST'); - if (!defined $hst) { - $hst = Property->new('HST', Status => $PLACEHOLDER); - $hst->add_match_table('Not_Applicable', - Initialize => $Any, - Matches_All => 1); - } - - # On some releases, here we may not have the needed tables for the - # perl core, in some releases we may. - foreach my $name (qw{ L LV LVT T V prepend }) { - my $table = $gcb->table($name); - if (! defined $table) { - $table = $gcb->add_match_table($name); - push @tables_that_may_be_empty, $table->complete_name; - } + my $charname_continue = $perl->add_match_table('_Perl_Charname_Continue', + Perl_Extension => 1, + Fate => $INTERNAL_ONLY, + Initialize => $perl_xidc + + 0x0020 # SPACE + + 0x0028 # ( + + 0x0029 # ) + + 0x002D # - + + 0x00A0 # NBSP + ); - # The HST property predates the GCB one, and has identical tables - # for some of them, so use it if we can. - if ($table->is_empty - && defined $hst - && defined $hst->table($name)) - { - $table += $hst->table($name); - } + # These two tables are for matching \X, which is based on the 'extended' + # grapheme cluster, which came in 5.1; create empty ones if not already + # present. The straight 'grapheme cluster' (non-extended) is used prior + # to 5.1, and differs from the extended (see + # http://www.unicode.org/reports/tr29/) only by these two tables, so we + # get the older definition automatically when they are empty. + my $gcb = property_ref('Grapheme_Cluster_Break'); + my $perl_prepend = $perl->add_match_table('_X_GCB_Prepend', + Perl_Extension => 1, + Fate => $INTERNAL_ONLY); + if (defined (my $gcb_prepend = $gcb->table('Prepend'))) { + $perl_prepend->set_equivalent_to($gcb_prepend, Related => 1); + } + else { + push @tables_that_may_be_empty, $perl_prepend->complete_name; + } + + # All the tables with _X_ in their names are used in defining \X handling, + # and are based on the Unicode GCB property. Basically, \X matches: + # CR LF + # | Prepend* Begin Extend* + # | . + # Begin is: ( Special_Begin | ! Control ) + # Begin is also: ( Regular_Begin | Special_Begin ) + # where Regular_Begin is defined as ( ! Control - Special_Begin ) + # Special_Begin is: ( Regional-Indicator+ | Hangul-syllable ) + # Extend is: ( Grapheme_Extend | Spacing_Mark ) + # Control is: [ GCB_Control | CR | LF ] + # Hangul-syllable is: ( T+ | ( L* ( L | ( LVT | ( V | LV ) V* ) T* ) )) + + foreach my $gcb_name (qw{ L V T LV LVT }) { + + # The perl internal extension's name is the gcb table name prepended + # with an '_X_' + my $perl_table = $perl->add_match_table('_X_GCB_' . $gcb_name, + Perl_Extension => 1, + Fate => $INTERNAL_ONLY, + Initialize => $gcb->table($gcb_name), + ); + # Version 1 had mostly different Hangul syllables that were removed + # from later versions, so some of the tables may not apply. + if ($v_version lt v2.0) { + push @tables_that_may_be_empty, $perl_table->complete_name; } } - # More GCB. If we found some hangul syllables, populate a combined - # table. + # More GCB. Populate a combined hangul syllables table my $lv_lvt_v = $perl->add_match_table('_X_LV_LVT_V', Perl_Extension => 1, Fate => $INTERNAL_ONLY); - my $LV = $gcb->table('LV'); - if ($LV->is_empty) { - push @tables_that_may_be_empty, $lv_lvt_v->complete_name; - } else { - $lv_lvt_v += $LV + $gcb->table('LVT') + $gcb->table('V'); - $lv_lvt_v->add_comment('For use in \X; matches: HST=LV | HST=LVT | HST=V'); + $lv_lvt_v += $gcb->table('LV') + $gcb->table('LVT') + $gcb->table('V'); + $lv_lvt_v->add_comment('For use in \X; matches: gcb=LV | gcb=LVT | gcb=V'); + + my $ri = $perl->add_match_table('_X_RI', Perl_Extension => 1, + Fate => $INTERNAL_ONLY); + if ($v_version ge v6.2) { + $ri += $gcb->table('RI'); + } + else { + push @tables_that_may_be_empty, $ri->full_name; + } + + my $specials_begin = $perl->add_match_table('_X_Special_Begin_Start', + Perl_Extension => 1, + Fate => $INTERNAL_ONLY, + Initialize => $lv_lvt_v + + $gcb->table('L') + + $gcb->table('T') + + $ri + ); + $specials_begin->add_comment(join_lines( <<END +For use in \\X; matches first (perhaps only) character of potential +multi-character sequences that can begin an extended grapheme cluster. They +need special handling because of their complicated nature. +END + )); + my $regular_begin = $perl->add_match_table('_X_Regular_Begin', + Perl_Extension => 1, + Fate => $INTERNAL_ONLY, + Initialize => ~ $gcb->table('Control') + - $specials_begin + - $gcb->table('CR') + - $gcb->table('LF') + ); + $regular_begin->add_comment(join_lines( <<END +For use in \\X; matches first character of anything that can begin an extended +grapheme cluster, except those that require special handling. +END + )); + + my $extend = $perl->add_match_table('_X_Extend', Perl_Extension => 1, + Fate => $INTERNAL_ONLY, + Initialize => $gcb->table('Extend') + ); + if (defined (my $sm = $gcb->table('SpacingMark'))) { + $extend += $sm; } + $extend->add_comment('For use in \X; matches: Extend | SpacingMark'); - # Was previously constructed to contain both Name and Unicode_1_Name - my @composition = ('Name', 'Unicode_1_Name'); + # End of GCB \X processing + + my @composition = ('Name', 'Unicode_1_Name', 'Name_Alias'); if (@named_sequences) { push @composition, 'Named_Sequence'; @@ -12757,56 +13610,54 @@ sub compile_perl() { my $alias_sentence = ""; my %abbreviations; my $alias = property_ref('Name_Alias'); - if (defined $alias) { - push @composition, 'Name_Alias'; - $perl_charname->set_proxy_for('Name_Alias'); - - # Add each entry in Name_Alias to Perl_Charnames. Where these go with - # respect to any existing entry depends on the entry type. - # Corrections go before said entry, as they should be returned in - # preference over the existing entry. (A correction to a correction - # should be later in the Name_Alias table, so it will correctly - # precede the erroneous correction in Perl_Charnames.) - # - # Abbreviations go after everything else, so they are saved - # temporarily in a hash for later. - # - # Controls are currently added afterwards. This is because Perl has - # previously used the Unicode1 name, and so should still use that. - # (Most of them will be the same anyway, in which case we don't add a - # duplicate) - - $alias->reset_each_range; - while (my ($range) = $alias->each_range) { - next if $range->value eq ""; - my $code_point = $range->start; - if ($code_point != $range->end) { - Carp::my_carp_bug("Bad News. Expecting only one code point in the range $range. Just to keep going, using only the first code point;"); - } - my ($value, $type) = split ': ', $range->value; - my $replace_type; - if ($type eq 'correction') { - $replace_type = $MULTIPLE_BEFORE; - } - elsif ($type eq 'abbreviation') { - - # Save for later - $abbreviations{$value} = $code_point; - next; - } - elsif ($type eq 'control') { - $replace_type = $MULTIPLE_AFTER; - } - else { - $replace_type = $MULTIPLE_AFTER; - } + $perl_charname->set_proxy_for('Name_Alias'); + + # Add each entry in Name_Alias to Perl_Charnames. Where these go with + # respect to any existing entry depends on the entry type. Corrections go + # before said entry, as they should be returned in preference over the + # existing entry. (A correction to a correction should be later in the + # Name_Alias table, so it will correctly precede the erroneous correction + # in Perl_Charnames.) + # + # Abbreviations go after everything else, so they are saved temporarily in + # a hash for later. + # + # Everything else is added added afterwards, which preserves the input + # ordering - # Actually add; before or after current entry(ies) as determined - # above. + foreach my $range ($alias->ranges) { + next if $range->value eq ""; + my $code_point = $range->start; + if ($code_point != $range->end) { + Carp::my_carp_bug("Bad News. Expecting only one code point in the range $range. Just to keep going, using only the first code point;"); + } + my ($value, $type) = split ': ', $range->value; + my $replace_type; + if ($type eq 'correction') { + $replace_type = $MULTIPLE_BEFORE; + } + elsif ($type eq 'abbreviation') { - $perl_charname->add_duplicate($code_point, $value, Replace => $replace_type); + # Save for later + $abbreviations{$value} = $code_point; + next; } + else { + $replace_type = $MULTIPLE_AFTER; + } + + # Actually add; before or after current entry(ies) as determined + # above. + + $perl_charname->add_duplicate($code_point, $value, Replace => $replace_type); } + $alias_sentence = <<END; +The Name_Alias property adds duplicate code point entries that are +alternatives to the original name. If an addition is a corrected +name, it will be physically first in the table. The original (less correct, +but still valid) name will be next; then any alternatives, in no particular +order; and finally any abbreviations, again in no particular order. +END # Now add the Unicode_1 names for the controls. The Unicode_1 names had # precedence before 6.1, so should be first in the file; the other names @@ -12831,23 +13682,27 @@ sub compile_perl() { # We only add in the controls. next if $gc->value_of($code_point) ne 'Cc'; + # We reject this Unicode1 name for later Perls, as it is used for + # another code point + next if $unicode_1_value eq 'BELL' && $^V ge v5.17.0; + # This won't add an exact duplicate. $perl_charname->add_duplicate($code_point, $unicode_1_value, Replace => $before_or_after); } + # But in this version only, the ALERT has precedence over BELL, the + # Unicode_1_Name that would otherwise have precedence. + if ($v_version eq v6.0.0) { + $perl_charname->add_duplicate(7, 'ALERT', Replace => $MULTIPLE_BEFORE); + } + # Now that have everything added, add in abbreviations after - # everything else. - foreach my $value (keys %abbreviations) { + # everything else. Sort so results don't change between runs of this + # program + foreach my $value (sort keys %abbreviations) { $perl_charname->add_duplicate($abbreviations{$value}, $value, Replace => $MULTIPLE_AFTER); - $alias_sentence = <<END; -The Name_Alias property adds duplicate code point entries that are -alternatives to the original name. If an addition is a corrected -name, it will be physically first in the table. The original (less correct, -but still valid) name will be next; then any alternatives, in no particular -order; and finally any abbreviations, again in no particular order. -END } my $comment; @@ -13001,8 +13856,10 @@ END # This separates out the non-characters from the other unassigneds, so # can give different annotations for each. $unassigned_sans_noncharacters = Range_List->new( - Initialize => $gc->table('Unassigned') - & property_ref('Noncharacter_Code_Point')->table('N')); + Initialize => $gc->table('Unassigned')); + if (defined (my $nonchars = property_ref('Noncharacter_Code_Point'))) { + $unassigned_sans_noncharacters &= $nonchars->table('N'); + } for (my $i = 0; $i <= $MAX_UNICODE_CODEPOINT; $i++ ) { $i = populate_char_info($i); # Note sets $i so may cause skips @@ -13237,8 +14094,8 @@ sub add_perl_synonyms() { && ($actual->property != $block || $prefix eq 'In_')) { print simple_fold(join_lines(<<END -There is already an alias named $proposed_name (from " . $pre_existing . "), -so not creating this alias for " . $actual +There is already an alias named $proposed_name (from $pre_existing), +so not creating this alias for $actual END ), "", 4); } @@ -13662,6 +14519,11 @@ sub make_re_pod_entries($) { my $status_info = $input_table->status_info; my $caseless_equivalent = $input_table->caseless_equivalent; + # Don't mention a placeholder equivalent as it isn't to be listed in the + # pod + $caseless_equivalent = 0 if $caseless_equivalent != 0 + && $caseless_equivalent->fate > $ORDINARY; + my $entry_for_first_table; # The entry for the first table output. # Almost certainly, it is the parent. @@ -13675,7 +14537,10 @@ sub make_re_pod_entries($) { # First, gather all the info that applies to this table as a whole. - push @zero_match_tables, $table if $count == 0; + push @zero_match_tables, $table if $count == 0 + # Don't mention special tables + # as being zero length + && $table->fate == $ORDINARY; my $table_property = $table->property; @@ -14480,7 +15345,7 @@ the left brace completely changes the meaning of the construct, from "match" (for C<\\p{}>) to "doesn't match" (for C<\\P{}>). Casing in this document is for improved legibility. -Also, white space, hyphens, and underscores are also normally ignored +Also, white space, hyphens, and underscores are normally ignored everywhere between the {braces}, and hence can be freely added or removed even if the C</x> modifier hasn't been specified on the regular expression. But $a_bold_stricter at the beginning of an entry in the table below @@ -14955,8 +15820,8 @@ package charnames; my \$run_on_code_point_re = qr/$run_on_code_point_re/; my \$code_point_re = qr/$code_point_re/; - # In the following hash, the keys are the bases of names which includes - # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The values + # In the following hash, the keys are the bases of names which include + # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The value # of each key is another hash which is used to get the low and high ends # for each range of code points that apply to the name. my %names_ending_in_code_point = ( @@ -15192,37 +16057,42 @@ sub make_UCD () { # Make a list of all combinations of properties/values that are suppressed. my @suppressed; - foreach my $property_name (keys %why_suppressed) { + if (! $debug_skip) { # This tends to fail in this debug mode + foreach my $property_name (keys %why_suppressed) { - # Just the value - my $value_name = $1 if $property_name =~ s/ = ( .* ) //x; + # Just the value + my $value_name = $1 if $property_name =~ s/ = ( .* ) //x; - # The hash may contain properties not in this release of Unicode - next unless defined (my $property = property_ref($property_name)); + # The hash may contain properties not in this release of Unicode + next unless defined (my $property = property_ref($property_name)); - # Find all combinations - foreach my $prop_alias ($property->aliases) { - my $prop_alias_name = standardize($prop_alias->name); + # Find all combinations + foreach my $prop_alias ($property->aliases) { + my $prop_alias_name = standardize($prop_alias->name); - # If no =value, there's just one combination possibe for this - if (! $value_name) { + # If no =value, there's just one combination possibe for this + if (! $value_name) { - # The property may be suppressed, but there may be a proxy for - # it, so it shouldn't be listed as suppressed - next if $prop_alias->ucd; - push @suppressed, $prop_alias_name; - } - else { # Otherwise - foreach my $value_alias ($property->table($value_name)->aliases) - { - next if $value_alias->ucd; + # The property may be suppressed, but there may be a proxy + # for it, so it shouldn't be listed as suppressed + next if $prop_alias->ucd; + push @suppressed, $prop_alias_name; + } + else { # Otherwise + foreach my $value_alias + ($property->table($value_name)->aliases) + { + next if $value_alias->ucd; - push @suppressed, "$prop_alias_name=" - . standardize($value_alias->name); + push @suppressed, "$prop_alias_name=" + . standardize($value_alias->name); + } } } } } + @suppressed = sort @suppressed; # So doesn't change between runs of this + # program # Convert the structure below (designed for Name.pm) to a form that UCD # wants, so it doesn't have to modify it at all; i.e. so that it includes @@ -15373,17 +16243,19 @@ sub write_all_tables() { # For each property ... # (sort so that if there is an immutable file name, it has precedence, so - # some other property can't come in and take over its file name. If b's - # file name is defined, will return 1, meaning to take it first; don't - # care if both defined, as they had better be different anyway. And the - # property named 'Perl' needs to be first (it doesn't have any immutable - # file name) because empty properties are defined in terms of it's table - # named 'Any'.) + # some other property can't come in and take over its file name. (We + # don't care if both defined, as they had better be different anyway.) + # The property named 'Perl' needs to be first (it doesn't have any + # immutable file name) because empty properties are defined in terms of + # it's table named 'Any'.) We also sort by the property's name. This is + # just for repeatability of the outputs between runs of this program, but + # does not affect correctness. PROPERTY: - foreach my $property (sort { return -1 if $a == $perl; - return 1 if $b == $perl; - return defined $b->file - } property_ref('*')) + foreach my $property ($perl, + sort { return -1 if defined $a->file; + return 1 if defined $b->file; + return $a->name cmp $b->name; + } grep { $_ != $perl } property_ref('*')) { my $type = $property->type; @@ -15549,17 +16421,18 @@ sub write_all_tables() { } } } - elsif ($count == $MAX_UNICODE_CODEPOINTS) { - if ($table == $property || $table->leader == $table) { + elsif ($count == $MAX_UNICODE_CODEPOINTS + && ($table == $property || $table->leader == $table) + && $table->property->status ne $PLACEHOLDER) + { Carp::my_carp("$table unexpectedly matches all Unicode code points. Proceeding anyway."); - } } - if ($table->fate == $SUPPRESSED) { + if ($table->fate >= $SUPPRESSED) { if (! $is_property) { my @children = $table->children; foreach my $child (@children) { - if ($child->fate != $SUPPRESSED) { + if ($child->fate < $SUPPRESSED) { Carp::my_carp_bug("'$table' is suppressed and has a child '$child' which isn't"); } } @@ -15825,6 +16698,7 @@ sub write_all_tables() { make_UCD; make_property_test_script() if $make_test_script; + make_normalization_test_script() if $make_norm_test_script; return; } @@ -16110,8 +16984,10 @@ sub make_property_test_script() { # or multiple lines. main::write doesn't count the lines. my @output; - foreach my $property (property_ref('*')) { - foreach my $table ($property->tables) { + # Sort these so get results in same order on different runs of this + # program + foreach my $property (sort { $a->name cmp $b->name } property_ref('*')) { + foreach my $table (sort { $a->name cmp $b->name } $property->tables) { # Find code points that match, and don't match this table. my $valid = $table->get_valid_code_point; @@ -16275,6 +17151,82 @@ sub make_property_test_script() { return; } +sub make_normalization_test_script() { + print "Making normalization test script\n" if $verbosity >= $PROGRESS; + + my $n_path = 'TestNorm.pl'; + + unshift @normalization_tests, <<'END'; +use utf8; +use Test::More; + +sub ord_string { # Convert packed ords to printable string + use charnames (); + return "'" . join("", map { '\N{' . charnames::viacode($_) . '}' } + unpack "U*", shift) . "'"; + #return "'" . join(" ", map { sprintf "%04X", $_ } unpack "U*", shift) . "'"; +} + +sub Test_N { + my ($source, $nfc, $nfd, $nfkc, $nfkd) = @_; + my $display_source = ord_string($source); + my $display_nfc = ord_string($nfc); + my $display_nfd = ord_string($nfd); + my $display_nfkc = ord_string($nfkc); + my $display_nfkd = ord_string($nfkd); + + use Unicode::Normalize; + # NFC + # nfc == toNFC(source) == toNFC(nfc) == toNFC(nfd) + # nfkc == toNFC(nfkc) == toNFC(nfkd) + # + # NFD + # nfd == toNFD(source) == toNFD(nfc) == toNFD(nfd) + # nfkd == toNFD(nfkc) == toNFD(nfkd) + # + # NFKC + # nfkc == toNFKC(source) == toNFKC(nfc) == toNFKC(nfd) == + # toNFKC(nfkc) == toNFKC(nfkd) + # + # NFKD + # nfkd == toNFKD(source) == toNFKD(nfc) == toNFKD(nfd) == + # toNFKD(nfkc) == toNFKD(nfkd) + + is(NFC($source), $nfc, "NFC($display_source) eq $display_nfc"); + is(NFC($nfc), $nfc, "NFC($display_nfc) eq $display_nfc"); + is(NFC($nfd), $nfc, "NFC($display_nfd) eq $display_nfc"); + is(NFC($nfkc), $nfkc, "NFC($display_nfkc) eq $display_nfkc"); + is(NFC($nfkd), $nfkc, "NFC($display_nfkd) eq $display_nfkc"); + + is(NFD($source), $nfd, "NFD($display_source) eq $display_nfd"); + is(NFD($nfc), $nfd, "NFD($display_nfc) eq $display_nfd"); + is(NFD($nfd), $nfd, "NFD($display_nfd) eq $display_nfd"); + is(NFD($nfkc), $nfkd, "NFD($display_nfkc) eq $display_nfkd"); + is(NFD($nfkd), $nfkd, "NFD($display_nfkd) eq $display_nfkd"); + + is(NFKC($source), $nfkc, "NFKC($display_source) eq $display_nfkc"); + is(NFKC($nfc), $nfkc, "NFKC($display_nfc) eq $display_nfkc"); + is(NFKC($nfd), $nfkc, "NFKC($display_nfd) eq $display_nfkc"); + is(NFKC($nfkc), $nfkc, "NFKC($display_nfkc) eq $display_nfkc"); + is(NFKC($nfkd), $nfkc, "NFKC($display_nfkd) eq $display_nfkc"); + + is(NFKD($source), $nfkd, "NFKD($display_source) eq $display_nfkd"); + is(NFKD($nfc), $nfkd, "NFKD($display_nfc) eq $display_nfkd"); + is(NFKD($nfd), $nfkd, "NFKD($display_nfd) eq $display_nfkd"); + is(NFKD($nfkc), $nfkd, "NFKD($display_nfkc) eq $display_nfkd"); + is(NFKD($nfkd), $nfkd, "NFKD($display_nfkd) eq $display_nfkd"); +} +END + + &write($n_path, + 1, # Is utf8; + [ + @normalization_tests, + 'done_testing();' + ]); + return; +} + # This is a list of the input files and how to handle them. The files are # processed in their order in this list. Some reordering is possible if # desired, but the v0 files should be first, and the extracted before the @@ -16364,10 +17316,25 @@ my @input_file_objects = ( # And for 5.14 Perls with 6.0, # have to also make changes - : ($v_version ge v6.0.0) + : ($v_version ge v6.0.0 + && $^V lt v5.17.0) ? \&filter_v6_ucd : undef), + # Early versions did not have the + # proper Unicode_1 names for the + # controls + (($v_version lt v3.0.0) + ? \&filter_early_U1_names + : undef), + + # Early versions did not correctly + # use the later method for giving + # decimal digit values + (($v_version le v3.2.0) + ? \&filter_bad_Nd_ucd + : undef), + # And the main filter \&filter_UnicodeData_line, ], @@ -16398,7 +17365,9 @@ my @input_file_objects = ( Each_Line_Handler => \&filter_unihan_line, ), Input_file->new('SpecialCasing.txt', v2.1.8, - Each_Line_Handler => \&filter_special_casing_line, + Each_Line_Handler => ($v_version eq 2.1.8) + ? \&filter_2_1_8_special_casing_line + : \&filter_special_casing_line, Pre_Handler => \&setup_special_casing, Has_Missings_Defaults => $IGNORED, ), @@ -16424,9 +17393,17 @@ my @input_file_objects = ( ), Input_file->new('BidiMirroring.txt', v3.0.1, Property => 'Bidi_Mirroring_Glyph', + Has_Missings_Defaults => ($v_version lt v6.2.0) + ? $NO_DEFAULTS + # Is <none> which doesn't mean + # anything to us, we will use the + # null string + : $IGNORED, + ), - Input_file->new("NormalizationTest.txt", v3.0.1, - Skip => 'Validation Tests', + Input_file->new("NormTest.txt", v3.0.0, + Handler => \&process_NormalizationsTest, + Skip => ($make_norm_test_script) ? 0 : 'Validation Tests', ), Input_file->new('CaseFolding.txt', v3.0.1, Pre_Handler => \&setup_case_folding, @@ -16454,16 +17431,23 @@ my @input_file_objects = ( ? \&filter_old_style_normalization_lines : undef), ), - Input_file->new('HangulSyllableType.txt', v4.0.0, + Input_file->new('HangulSyllableType.txt', v0, Has_Missings_Defaults => $NOT_IGNORED, - Property => 'Hangul_Syllable_Type'), + Property => 'Hangul_Syllable_Type', + Pre_Handler => ($v_version lt v4.0.0) + ? \&generate_hst + : undef, + ), Input_file->new("$AUXILIARY/WordBreakProperty.txt", v4.1.0, Property => 'Word_Break', Has_Missings_Defaults => $NOT_IGNORED, ), - Input_file->new("$AUXILIARY/GraphemeBreakProperty.txt", v4.1.0, + Input_file->new("$AUXILIARY/GraphemeBreakProperty.txt", v0, Property => 'Grapheme_Cluster_Break', Has_Missings_Defaults => $NOT_IGNORED, + Pre_Handler => ($v_version lt v4.1.0) + ? \&generate_GCB + : undef, ), Input_file->new("$AUXILIARY/GCBTest.txt", v4.1.0, Handler => \&process_GCB_test, @@ -16484,7 +17468,7 @@ my @input_file_objects = ( Input_file->new('NamedSequences.txt', v4.1.0, Handler => \&process_NamedSequences ), - Input_file->new('NameAliases.txt', v5.0.0, + Input_file->new('NameAliases.txt', v0, Property => 'Name_Alias', Pre_Handler => ($v_version le v6.0.0) ? \&setup_early_name_alias @@ -16700,7 +17684,7 @@ END # Create the list of input files from the objects we have defined, plus # version -my @input_files = 'version'; +my @input_files = qw(version Makefile); foreach my $object (@input_file_objects) { my $file = $object->file; next if ! defined $file; # Not all objects have files @@ -16731,6 +17715,10 @@ foreach my $in (@input_files) { } } +# We use 'Makefile' just to see if it has changed since the last time we +# rebuilt. Now discard it. +@input_files = grep { $_ ne 'Makefile' } @input_files; + my $rebuild = $write_unchanged_files # Rebuild: if unconditional rebuild || ! scalar @mktables_list_output_files # or if no outputs known || $old_start_time < $most_recent; # or out-of-date diff --git a/gnu/usr.bin/perl/lib/unicore/version b/gnu/usr.bin/perl/lib/unicore/version index dfda3e0b4f0..6abaeb2f907 100644 --- a/gnu/usr.bin/perl/lib/unicore/version +++ b/gnu/usr.bin/perl/lib/unicore/version @@ -1 +1 @@ -6.1.0 +6.2.0 diff --git a/gnu/usr.bin/perl/lib/utf8.pm b/gnu/usr.bin/perl/lib/utf8.pm index b59eabfbd7c..1d6992ccf03 100644 --- a/gnu/usr.bin/perl/lib/utf8.pm +++ b/gnu/usr.bin/perl/lib/utf8.pm @@ -2,7 +2,7 @@ package utf8; $utf8::hint_bits = 0x00800000; -our $VERSION = '1.09'; +our $VERSION = '1.10'; sub import { $^H |= $utf8::hint_bits; @@ -170,14 +170,14 @@ L<Encode>. =item * $flag = utf8::is_utf8(STRING) -(Since Perl 5.8.1) Test whether STRING is in UTF-8 internally. +(Since Perl 5.8.1) Test whether STRING is encoded internally in UTF-8. Functionally the same as Encode::is_utf8(). =item * $flag = utf8::valid(STRING) [INTERNAL] Test whether STRING is in a consistent state regarding -UTF-8. Will return true is well-formed UTF-8 and has the UTF-8 flag -on B<or> if string is held as bytes (both these states are 'consistent'). +UTF-8. Will return true if it is well-formed UTF-8 and has the UTF-8 flag +on B<or> if STRING is held as bytes (both these states are 'consistent'). Main reason for this routine is to allow Perl's testsuite to check that operations have left strings in a consistent state. You most probably want to use utf8::is_utf8() instead. diff --git a/gnu/usr.bin/perl/lib/utf8_heavy.pl b/gnu/usr.bin/perl/lib/utf8_heavy.pl index 23e06f9a9ac..fc422835abc 100644 --- a/gnu/usr.bin/perl/lib/utf8_heavy.pl +++ b/gnu/usr.bin/perl/lib/utf8_heavy.pl @@ -1,6 +1,8 @@ package utf8; use strict; use warnings; +use re "/aa"; # So we won't even try to look at above Latin1, potentially + # resulting in a recursive call sub DEBUG () { 0 } $|=1 if DEBUG; @@ -244,7 +246,7 @@ sub _loose_name ($) { # minus # Remove underscores between digits. - $part =~ s/( ?<= [0-9] ) _ (?= [0-9] ) //xg; + $part =~ s/(?<= [0-9] ) _ (?= [0-9] ) //xg; # No leading zeros (but don't make a single '0' # into a null string) diff --git a/gnu/usr.bin/perl/lib/version.pm b/gnu/usr.bin/perl/lib/version.pm index 3fcc5aadc6b..27774bd9c28 100644 --- a/gnu/usr.bin/perl/lib/version.pm +++ b/gnu/usr.bin/perl/lib/version.pm @@ -6,7 +6,7 @@ use strict; use vars qw(@ISA $VERSION $CLASS $STRICT $LAX *declare *qv); -$VERSION = 0.99; +$VERSION = 0.9902; $CLASS = 'version'; diff --git a/gnu/usr.bin/perl/lib/version/t/01base.t b/gnu/usr.bin/perl/lib/version/t/01base.t index f5784cd568c..9aa8052a303 100644 --- a/gnu/usr.bin/perl/lib/version/t/01base.t +++ b/gnu/usr.bin/perl/lib/version/t/01base.t @@ -9,7 +9,7 @@ use Test::More qw/no_plan/; BEGIN { (my $coretests = $0) =~ s'[^/]+\.t'coretests.pm'; require $coretests; - use_ok('version', 0.97); + use_ok('version', 0.9902); } diag "Tests with base class" unless $ENV{PERL_CORE}; @@ -32,3 +32,15 @@ my $v = eval { return IO::Handle->VERSION; }; ok defined($v), 'Fix for RT #47980'; + +{ # https://rt.cpan.org/Ticket/Display.html?id=81085 + eval { version::new() }; + like $@, qr'Usage: version::new\(class, version\)', + 'No bus err when called as function'; + eval { $x = 1; print version::new }; + like $@, qr'Usage: version::new\(class, version\)', + 'No implicit object creation when called as function'; + eval { $x = "version"; print version::new }; + like $@, qr'Usage: version::new\(class, version\)', + 'No implicit object creation when called as function'; +} diff --git a/gnu/usr.bin/perl/lib/version/t/02derived.t b/gnu/usr.bin/perl/lib/version/t/02derived.t index 3ea847eb140..c7afe0f9af8 100644 --- a/gnu/usr.bin/perl/lib/version/t/02derived.t +++ b/gnu/usr.bin/perl/lib/version/t/02derived.t @@ -10,7 +10,7 @@ use File::Temp qw/tempfile/; BEGIN { (my $coretests = $0) =~ s'[^/]+\.t'coretests.pm'; require $coretests; - use_ok("version", 0.97); + use_ok("version", 0.9902); # If we made it this far, we are ok. } diff --git a/gnu/usr.bin/perl/lib/version/t/03require.t b/gnu/usr.bin/perl/lib/version/t/03require.t index 90d33ebabeb..66c6bd3a85b 100644 --- a/gnu/usr.bin/perl/lib/version/t/03require.t +++ b/gnu/usr.bin/perl/lib/version/t/03require.t @@ -14,7 +14,7 @@ BEGIN { # Don't want to use, because we need to make sure that the import doesn't # fire just yet (some code does this to avoid importing qv() and delare()). require_ok("version"); -is $version::VERSION, 0.99, "Make sure we have the correct class"; +is $version::VERSION, 0.9902, "Make sure we have the correct class"; ok(!"main"->can("qv"), "We don't have the imported qv()"); ok(!"main"->can("declare"), "We don't have the imported declare()"); diff --git a/gnu/usr.bin/perl/lib/version/t/05sigdie.t b/gnu/usr.bin/perl/lib/version/t/05sigdie.t index 2a333392e5e..188f185587a 100644 --- a/gnu/usr.bin/perl/lib/version/t/05sigdie.t +++ b/gnu/usr.bin/perl/lib/version/t/05sigdie.t @@ -15,7 +15,7 @@ BEGIN { BEGIN { - use version 0.97; + use version 0.9902; } pass "Didn't get caught by the wrong DIE handler, which is a good thing"; diff --git a/gnu/usr.bin/perl/lib/version/t/06noop.t b/gnu/usr.bin/perl/lib/version/t/06noop.t index 63f707300b1..9d113ed6e41 100644 --- a/gnu/usr.bin/perl/lib/version/t/06noop.t +++ b/gnu/usr.bin/perl/lib/version/t/06noop.t @@ -7,7 +7,7 @@ use Test::More qw/no_plan/; BEGIN { - use_ok('version', 0.97); + use_ok('version', 0.9902); } my $v1 = version->new('1.2'); diff --git a/gnu/usr.bin/perl/lib/version/t/07locale.t b/gnu/usr.bin/perl/lib/version/t/07locale.t index 506b1bf50f8..3b67f3d77af 100644 --- a/gnu/usr.bin/perl/lib/version/t/07locale.t +++ b/gnu/usr.bin/perl/lib/version/t/07locale.t @@ -8,13 +8,15 @@ use File::Basename; use File::Temp qw/tempfile/; use POSIX qw/locale_h/; use Test::More tests => 7; +use Config; BEGIN { - use_ok('version', 0.97); + use_ok('version', 0.9902); } SKIP: { skip 'No locale testing for Perl < 5.6.0', 6 if $] < 5.006; + skip 'No locale testing without d_setlocale', 6 if(!$Config{d_setlocale}); # test locale handling my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; diff --git a/gnu/usr.bin/perl/lib/version/t/coretests.pm b/gnu/usr.bin/perl/lib/version/t/coretests.pm index 3f785e47263..15a1f1ff8c9 100644 --- a/gnu/usr.bin/perl/lib/version/t/coretests.pm +++ b/gnu/usr.bin/perl/lib/version/t/coretests.pm @@ -505,6 +505,7 @@ EOF } { + local $Data::Dumper::Sortkeys= 1; # http://rt.cpan.org/Public/Bug/Display.html?id=30004 my $v1 = $CLASS->$method("v0.1_1"); (my $alpha1 = Dumper($v1)) =~ s/.+'alpha' => ([^,]+),.+/$1/ms; @@ -538,9 +539,10 @@ EOF { # https://rt.cpan.org/Ticket/Display.html?id=72365 # https://rt.perl.org/rt3/Ticket/Display.html?id=102586 + # https://rt.cpan.org/Ticket/Display.html?id=78328 eval 'my $v = $CLASS->$method("version")'; like $@, qr/Invalid version format/, - 'The string "version" is not a version'; + "The string 'version' is not a version for $method"; eval 'my $v = $CLASS->$method("ver510n")'; like $@, qr/Invalid version format/, 'All strings starting with "v" are not versions'; @@ -593,6 +595,14 @@ SKIP: { eval { _112478->VERSION(9e99) }; unlike $@, qr/panic/, '->VERSION(9e99) does not panic'; } + + { # https://rt.cpan.org/Ticket/Display.html?id=79259 + my $v = $CLASS->new("0.52_0"); + ok $v->is_alpha, 'Just checking'; + is $v->numify, '0.520', 'Correctly nummified'; + } + } 1; + diff --git a/gnu/usr.bin/perl/lib/warnings.pm b/gnu/usr.bin/perl/lib/warnings.pm index 3b2d87dc82d..7d988cbd992 100644 --- a/gnu/usr.bin/perl/lib/warnings.pm +++ b/gnu/usr.bin/perl/lib/warnings.pm @@ -5,7 +5,7 @@ package warnings; -our $VERSION = '1.13'; +our $VERSION = '1.18'; # Verify that we're called correctly so that warnings will work. # see also strict.pm. @@ -225,119 +225,138 @@ our %Offsets = ( 'non_unicode' => 96, 'nonchar' => 98, 'surrogate' => 100, + + # Warnings Categories added in Perl 5.017 + + 'experimental' => 102, + 'experimental::lexical_subs'=> 104, + 'experimental::lexical_topic'=> 106, + 'experimental::regex_sets'=> 108, + 'experimental::smartmatch'=> 110, ); our %Bits = ( - 'all' => "\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55\x15", # [0..50] - 'ambiguous' => "\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00", # [29] - 'bareword' => "\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00", # [30] - 'closed' => "\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [6] - 'closure' => "\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [1] - 'debugging' => "\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00", # [22] - 'deprecated' => "\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [2] - 'digit' => "\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00", # [31] - 'exec' => "\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [7] - 'exiting' => "\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [3] - 'glob' => "\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [4] - 'illegalproto' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00", # [47] - 'imprecision' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00", # [46] - 'inplace' => "\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00", # [23] - 'internal' => "\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00", # [24] - 'io' => "\x00\x54\x55\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [5..11] - 'layer' => "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [8] - 'malloc' => "\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00", # [25] - 'misc' => "\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [12] - 'newline' => "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [9] - 'non_unicode' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", # [48] - 'nonchar' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04", # [49] - 'numeric' => "\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [13] - 'once' => "\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [14] - 'overflow' => "\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [15] - 'pack' => "\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00", # [16] - 'parenthesis' => "\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00", # [32] - 'pipe' => "\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [10] - 'portable' => "\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00", # [17] - 'precedence' => "\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00", # [33] - 'printf' => "\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00", # [34] - 'prototype' => "\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00", # [35] - 'qw' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00", # [36] - 'recursion' => "\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00", # [18] - 'redefine' => "\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00", # [19] - 'regexp' => "\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00", # [20] - 'reserved' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00", # [37] - 'semicolon' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00", # [38] - 'severe' => "\x00\x00\x00\x00\x00\x54\x05\x00\x00\x00\x00\x00\x00", # [21..25] - 'signal' => "\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00", # [26] - 'substr' => "\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00", # [27] - 'surrogate' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10", # [50] - 'syntax' => "\x00\x00\x00\x00\x00\x00\x00\x55\x55\x15\x00\x40\x00", # [28..38,47] - 'taint' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00", # [39] - 'threads' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00", # [40] - 'uninitialized' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00", # [41] - 'unopened' => "\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [11] - 'unpack' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00", # [42] - 'untie' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00", # [43] - 'utf8' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x15", # [44,48..50] - 'void' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00", # [45] + 'all' => "\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55", # [0..55] + 'ambiguous' => "\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00", # [29] + 'bareword' => "\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00", # [30] + 'closed' => "\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [6] + 'closure' => "\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [1] + 'debugging' => "\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00", # [22] + 'deprecated' => "\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [2] + 'digit' => "\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00", # [31] + 'exec' => "\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [7] + 'exiting' => "\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [3] + 'experimental' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x55", # [51..55] + 'experimental::lexical_subs'=> "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", # [52] + 'experimental::lexical_topic'=> "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04", # [53] + 'experimental::regex_sets'=> "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10", # [54] + 'experimental::smartmatch'=> "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40", # [55] + 'glob' => "\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [4] + 'illegalproto' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00", # [47] + 'imprecision' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00", # [46] + 'inplace' => "\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00", # [23] + 'internal' => "\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00", # [24] + 'io' => "\x00\x54\x55\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [5..11] + 'layer' => "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [8] + 'malloc' => "\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00", # [25] + 'misc' => "\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [12] + 'newline' => "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [9] + 'non_unicode' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00", # [48] + 'nonchar' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00", # [49] + 'numeric' => "\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [13] + 'once' => "\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [14] + 'overflow' => "\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [15] + 'pack' => "\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [16] + 'parenthesis' => "\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00", # [32] + 'pipe' => "\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [10] + 'portable' => "\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [17] + 'precedence' => "\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00", # [33] + 'printf' => "\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00", # [34] + 'prototype' => "\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00", # [35] + 'qw' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00", # [36] + 'recursion' => "\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [18] + 'redefine' => "\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [19] + 'regexp' => "\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00", # [20] + 'reserved' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00", # [37] + 'semicolon' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00", # [38] + 'severe' => "\x00\x00\x00\x00\x00\x54\x05\x00\x00\x00\x00\x00\x00\x00", # [21..25] + 'signal' => "\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00", # [26] + 'substr' => "\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00", # [27] + 'surrogate' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00", # [50] + 'syntax' => "\x00\x00\x00\x00\x00\x00\x00\x55\x55\x15\x00\x40\x00\x00", # [28..38,47] + 'taint' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00", # [39] + 'threads' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00", # [40] + 'uninitialized' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00", # [41] + 'unopened' => "\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [11] + 'unpack' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00", # [42] + 'untie' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00", # [43] + 'utf8' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x15\x00", # [44,48..50] + 'void' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00", # [45] ); our %DeadBits = ( - 'all' => "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\x2a", # [0..50] - 'ambiguous' => "\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00", # [29] - 'bareword' => "\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00", # [30] - 'closed' => "\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [6] - 'closure' => "\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [1] - 'debugging' => "\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00", # [22] - 'deprecated' => "\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [2] - 'digit' => "\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00", # [31] - 'exec' => "\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [7] - 'exiting' => "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [3] - 'glob' => "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [4] - 'illegalproto' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00", # [47] - 'imprecision' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00", # [46] - 'inplace' => "\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00", # [23] - 'internal' => "\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00", # [24] - 'io' => "\x00\xa8\xaa\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [5..11] - 'layer' => "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [8] - 'malloc' => "\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00", # [25] - 'misc' => "\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [12] - 'newline' => "\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [9] - 'non_unicode' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02", # [48] - 'nonchar' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08", # [49] - 'numeric' => "\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [13] - 'once' => "\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [14] - 'overflow' => "\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [15] - 'pack' => "\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00", # [16] - 'parenthesis' => "\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00", # [32] - 'pipe' => "\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [10] - 'portable' => "\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00", # [17] - 'precedence' => "\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00", # [33] - 'printf' => "\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00", # [34] - 'prototype' => "\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00", # [35] - 'qw' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00", # [36] - 'recursion' => "\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00", # [18] - 'redefine' => "\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00", # [19] - 'regexp' => "\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00", # [20] - 'reserved' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00", # [37] - 'semicolon' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00", # [38] - 'severe' => "\x00\x00\x00\x00\x00\xa8\x0a\x00\x00\x00\x00\x00\x00", # [21..25] - 'signal' => "\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00", # [26] - 'substr' => "\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00", # [27] - 'surrogate' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20", # [50] - 'syntax' => "\x00\x00\x00\x00\x00\x00\x00\xaa\xaa\x2a\x00\x80\x00", # [28..38,47] - 'taint' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00", # [39] - 'threads' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00", # [40] - 'uninitialized' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00", # [41] - 'unopened' => "\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [11] - 'unpack' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00", # [42] - 'untie' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00", # [43] - 'utf8' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x2a", # [44,48..50] - 'void' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00", # [45] + 'all' => "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa", # [0..55] + 'ambiguous' => "\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00", # [29] + 'bareword' => "\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00", # [30] + 'closed' => "\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [6] + 'closure' => "\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [1] + 'debugging' => "\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00", # [22] + 'deprecated' => "\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [2] + 'digit' => "\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00", # [31] + 'exec' => "\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [7] + 'exiting' => "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [3] + 'experimental' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\xaa", # [51..55] + 'experimental::lexical_subs'=> "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02", # [52] + 'experimental::lexical_topic'=> "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08", # [53] + 'experimental::regex_sets'=> "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20", # [54] + 'experimental::smartmatch'=> "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80", # [55] + 'glob' => "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [4] + 'illegalproto' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00", # [47] + 'imprecision' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00", # [46] + 'inplace' => "\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00", # [23] + 'internal' => "\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00", # [24] + 'io' => "\x00\xa8\xaa\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [5..11] + 'layer' => "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [8] + 'malloc' => "\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00", # [25] + 'misc' => "\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [12] + 'newline' => "\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [9] + 'non_unicode' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00", # [48] + 'nonchar' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00", # [49] + 'numeric' => "\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [13] + 'once' => "\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [14] + 'overflow' => "\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [15] + 'pack' => "\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [16] + 'parenthesis' => "\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00", # [32] + 'pipe' => "\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [10] + 'portable' => "\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [17] + 'precedence' => "\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00", # [33] + 'printf' => "\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00", # [34] + 'prototype' => "\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00", # [35] + 'qw' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00", # [36] + 'recursion' => "\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [18] + 'redefine' => "\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [19] + 'regexp' => "\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00", # [20] + 'reserved' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00", # [37] + 'semicolon' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00", # [38] + 'severe' => "\x00\x00\x00\x00\x00\xa8\x0a\x00\x00\x00\x00\x00\x00\x00", # [21..25] + 'signal' => "\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00", # [26] + 'substr' => "\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00", # [27] + 'surrogate' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00", # [50] + 'syntax' => "\x00\x00\x00\x00\x00\x00\x00\xaa\xaa\x2a\x00\x80\x00\x00", # [28..38,47] + 'taint' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00", # [39] + 'threads' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00", # [40] + 'uninitialized' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00", # [41] + 'unopened' => "\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [11] + 'unpack' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00", # [42] + 'untie' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00", # [43] + 'utf8' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x2a\x00", # [44,48..50] + 'void' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00", # [45] ); -$NONE = "\0\0\0\0\0\0\0\0\0\0\0\0\0"; -$LAST_BIT = 102 ; -$BYTES = 13 ; +$NONE = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; +$DEFAULT = "\x10\x01\x00\x00\x00\x50\x04\x00\x00\x00\x00\x00\x00\x55", # [2,52..55,4,22,23,25] +$LAST_BIT = 112 ; +$BYTES = 14 ; $All = "" ; vec($All, $Offsets{'all'}, 2) = 3 ; @@ -387,7 +406,7 @@ sub import { shift; - my $mask = ${^WARNING_BITS} // ($^W ? $Bits{all} : $NONE) ; + my $mask = ${^WARNING_BITS} // ($^W ? $Bits{all} : $DEFAULT) ; if (vec($mask, $Offsets{'all'}, 1)) { $mask |= $Bits{'all'} ; @@ -403,7 +422,7 @@ sub unimport shift; my $catmask ; - my $mask = ${^WARNING_BITS} // ($^W ? $Bits{all} : $NONE) ; + my $mask = ${^WARNING_BITS} // ($^W ? $Bits{all} : $DEFAULT) ; if (vec($mask, $Offsets{'all'}, 1)) { $mask |= $Bits{'all'} ; @@ -482,8 +501,11 @@ sub __chk $i = _error_loc(); # see where Carp will allocate the error } - # Defaulting this to 0 reduces complexity in code paths below. - my $callers_bitmask = (caller($i))[9] || 0 ; + # Default to 0 if caller returns nothing. Default to $DEFAULT if it + # explicitly returns undef. + my(@callers_bitmask) = (caller($i))[9] ; + my $callers_bitmask = + @callers_bitmask ? $callers_bitmask[0] // $DEFAULT : 0 ; my @results; foreach my $type (FATAL, NORMAL) { @@ -561,7 +583,7 @@ sub warnif # These are not part of any public interface, so we can delete them to save # space. -delete $warnings::{$_} foreach qw(NORMAL FATAL MESSAGE); +delete @warnings::{qw(NORMAL FATAL MESSAGE)}; 1; |