diff options
author | 2014-03-24 15:05:12 +0000 | |
---|---|---|
committer | 2014-03-24 15:05:12 +0000 | |
commit | e9ce384231aabe5c5a622aa68cef46f2c5bfdb4a (patch) | |
tree | 5029388537325eaad6674da4dab6714cda1644e5 /gnu/usr.bin/perl/cpan/Encode | |
parent | Import perl-5.18.2 (diff) | |
download | wireguard-openbsd-e9ce384231aabe5c5a622aa68cef46f2c5bfdb4a.tar.xz wireguard-openbsd-e9ce384231aabe5c5a622aa68cef46f2c5bfdb4a.zip |
Merge perl-5.18.2 plus local patches, remove old files
OK espie@ sthen@ deraadt@
Diffstat (limited to 'gnu/usr.bin/perl/cpan/Encode')
-rw-r--r-- | gnu/usr.bin/perl/cpan/Encode/Changes | 72 | ||||
-rw-r--r-- | gnu/usr.bin/perl/cpan/Encode/Encode.pm | 189 | ||||
-rw-r--r-- | gnu/usr.bin/perl/cpan/Encode/Encode.xs | 2 | ||||
-rw-r--r-- | gnu/usr.bin/perl/cpan/Encode/Makefile.PL | 5 | ||||
-rw-r--r-- | gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.xs | 30 | ||||
-rw-r--r-- | gnu/usr.bin/perl/cpan/Encode/encoding.pm | 16 | ||||
-rw-r--r-- | gnu/usr.bin/perl/cpan/Encode/lib/Encode/Alias.pm | 5 | ||||
-rw-r--r-- | gnu/usr.bin/perl/cpan/Encode/lib/Encode/Encoder.pm | 2 | ||||
-rw-r--r-- | gnu/usr.bin/perl/cpan/Encode/lib/Encode/GSM0338.pm | 7 | ||||
-rw-r--r-- | gnu/usr.bin/perl/cpan/Encode/lib/Encode/Unicode/UTF7.pm | 5 | ||||
-rw-r--r-- | gnu/usr.bin/perl/cpan/Encode/t/Mod_EUCJP.pm | 3 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Encode/t/Unicode.t | 50 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Encode/t/enc_data.t | 3 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Encode/t/enc_eucjp.t | 3 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Encode/t/enc_module.t | 3 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Encode/t/enc_utf8.t | 3 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Encode/t/jperl.t | 3 | ||||
-rwxr-xr-x | gnu/usr.bin/perl/cpan/Encode/t/piconv.t | 6 |
18 files changed, 288 insertions, 119 deletions
diff --git a/gnu/usr.bin/perl/cpan/Encode/Changes b/gnu/usr.bin/perl/cpan/Encode/Changes index 54234214ca2..c916db96ea8 100644 --- a/gnu/usr.bin/perl/cpan/Encode/Changes +++ b/gnu/usr.bin/perl/cpan/Encode/Changes @@ -1,8 +1,76 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 2.44 2011/08/09 07:49:44 dankogai Exp dankogai $ +# $Id: Changes,v 2.49 2013/03/05 03:12:49 dankogai Exp dankogai $ # -$Revision: 2.44 $ $Date: 2011/08/09 07:49:44 $ +$Revision: 2.49 $ $Date: 2013/03/05 03:12:49 $ +! Encode.xs + Addressed: Encoding objects leak memory if decoding fails + https://github.com/dankogai/p5-encode/issues/8 + +2.48 2013/02/18 02:23:56 +! encoding.pm + t/Mod_EUCJP.pm t/enc_data.t t/enc_eucjp.t t/enc_module.t t/enc_utf8.t + t/encoding.t t/jperl.t + [PATCH] Deprecate encoding.pm + https://rt.cpan.org/Ticket/Display.html?id=81255 +! Encode/Supported.pod + Fixed: Pod errors + https://rt.cpan.org/Ticket/Display.html?id=81426 +! Encode.pm t/Encode.t + [PATCH] Fix for shared hash key scalars + https://rt.cpan.org/Ticket/Display.html?id=80608 +! Encode.pm + Fixed: Uninitialized value warning from Encode->encodings() + https://rt.cpan.org/Ticket/Display.html?id=80181 +! Makefile.PL + Install to 'site' instead of 'perl' when perl version is 5.11+ + https://rt.cpan.org/Ticket/Display.html?id=78917 +! Encode/Makefile_PL.e2x + find enc2xs.bat if it works on windows. + https://github.com/dankogai/p5-encode/pull/7 +! t/piconv.t + Fix finding piconv in t/piconv.t + https://github.com/dankogai/p5-encode/pull/6 + +2.47 2012/08/15 05:36:16 +! Encode.pm + POD Fixes: Copyright and mail address +! Makefile.PL + Added LICENSE => 'perl' +! lib/Encode/GSM0338.pm t/gsm0338.t + REALLY fixed RT#75670: Wrong decoding for GSM 3.38 character \x09 + ucm/gsm0338.ucm is dropped from MANIFEST since 2.25 + but I was fixing the wrong file! + https://rt.cpan.org/Ticket/Display.html?id=75670 + +! 2.46 2012/08/12 05:49:30 +! Encode.pm + Fixed: RT#78917 for I18N-Charset: Fails with Encode 2.45 + To be more exact, 2.45 broke Encode->encodings(':all') + https://rt.cpan.org/Ticket/Display.html?id=78917 + +2.45 2012/08/05 23:08:49 +! lib/Encode/Alias.pm + Addressed RT#78125: Missed Mac Alias x-mac-ce + https://rt.cpan.org/Ticket/Display.html?id=78125 +! lib/Encode/Unicode/UTF7.pm + Applied the patch in RT#76711 + https://rt.cpan.org/Ticket/Display.html?id=76711 +! ucm/gsm0338.ucm + Addressed RT#75670: Wrong decoding for GSM 3.38 character \x09 + https://rt.cpan.org/Ticket/Display.html?id=75670 +! Encode.pm + Applied the patch in RT#72519 + https://rt.cpan.org/Ticket/Display.html?id=72519 +! Unicode/Unicode.xs + t/Unicode.t + Bug fixes in Unicode.xs by chansen + https://github.com/dankogai/p5-encode/pull/5 +! Encode.pm + various POD improvements by daxim + https://github.com/dankogai/p5-encode/pull/4 + +2.44 2011/08/09 07:49:44 ! Unicode/Unicode.xs Addressed the following: Date: Fri, 22 Jul 2011 13:58:43 +0200 diff --git a/gnu/usr.bin/perl/cpan/Encode/Encode.pm b/gnu/usr.bin/perl/cpan/Encode/Encode.pm index 8c5e613659c..fe81119ef7b 100644 --- a/gnu/usr.bin/perl/cpan/Encode/Encode.pm +++ b/gnu/usr.bin/perl/cpan/Encode/Encode.pm @@ -1,10 +1,10 @@ # -# $Id: Encode.pm,v 2.44 2011/08/09 07:49:44 dankogai Exp dankogai $ +# $Id: Encode.pm,v 2.49 2013/03/05 03:13:47 dankogai Exp dankogai $ # package Encode; use strict; use warnings; -our $VERSION = '2.44_01'; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.49 $ =~ /(\d+)/g; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; use XSLoader (); XSLoader::load( __PACKAGE__, $VERSION ); @@ -61,9 +61,9 @@ eval { }; sub encodings { - my $class = shift; my %enc; - if ( @_ and $_[0] eq ":all" ) { + my $arg = $_[1] || ''; + if ( $arg eq ":all" ) { %enc = ( %Encoding, %ExtModule ); } else { @@ -146,7 +146,7 @@ sub clone_encoding($) { sub encode($$;$) { my ( $name, $string, $check ) = @_; return undef unless defined $string; - $string .= '' if ref $string; # stringify; + $string .= ''; # stringify; $check ||= 0; unless ( defined $name ) { require Carp; @@ -166,7 +166,7 @@ sub encode($$;$) { sub decode($$;$) { my ( $name, $octets, $check ) = @_; return undef unless defined $octets; - $octets .= '' if ref $octets; + $octets .= ''; $check ||= 0; my $enc = find_encoding($name); unless ( defined $enc ) { @@ -247,7 +247,7 @@ sub predefine_encodings { package Encode::UTF_EBCDIC; push @Encode::UTF_EBCDIC::ISA, 'Encode::Encoding'; *decode = sub { - my ( $obj, $str, $chk ) = @_; + my ( undef, $str, $chk ) = @_; my $res = ''; for ( my $i = 0 ; $i < length($str) ; $i++ ) { $res .= @@ -259,7 +259,7 @@ sub predefine_encodings { return $res; }; *encode = sub { - my ( $obj, $str, $chk ) = @_; + my ( undef, $str, $chk ) = @_; my $res = ''; for ( my $i = 0 ; $i < length($str) ; $i++ ) { $res .= @@ -278,7 +278,7 @@ sub predefine_encodings { package Encode::Internal; push @Encode::Internal::ISA, 'Encode::Encoding'; *decode = sub { - my ( $obj, $str, $chk ) = @_; + my ( undef, $str, $chk ) = @_; utf8::upgrade($str); $_[1] = '' if $chk; return $str; @@ -303,7 +303,7 @@ sub predefine_encodings { else { Encode::DEBUG and warn __PACKAGE__, " XS off"; *decode = sub { - my ( $obj, $octets, $chk ) = @_; + my ( undef, $octets, $chk ) = @_; my $str = Encode::decode_utf8($octets); if ( defined $str ) { $_[1] = '' if $chk; @@ -312,7 +312,7 @@ sub predefine_encodings { return undef; }; *encode = sub { - my ( $obj, $string, $chk ) = @_; + my ( undef, $string, $chk ) = @_; my $octets = Encode::encode_utf8($string); $_[1] = '' if $chk; return $octets; @@ -320,7 +320,7 @@ sub predefine_encodings { } *cat_decode = sub { # ($obj, $dst, $src, $pos, $trm, $chk) # currently ignores $chk - my ( $obj, undef, undef, $pos, $trm ) = @_; + my ( undef, undef, undef, $pos, $trm ) = @_; my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ]; use bytes; if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) { @@ -351,7 +351,9 @@ Encode - character encodings in Perl =head1 SYNOPSIS - use Encode; + use Encode qw(decode encode); + $characters = decode('UTF-8', $octets, Encode::FB_CROAK); + $octets = encode('UTF-8', $characters, Encode::FB_CROAK); =head2 Table of Contents @@ -360,16 +362,23 @@ to fit in one document. This one itself explains the top-level APIs and general topics at a glance. For other topics and more details, see the documentation for these modules: - Name Description - -------------------------------------------------------- - Encode::Alias Alias definitions to encodings - Encode::Encoding Encode Implementation Base Class - Encode::Supported List of Supported Encodings - Encode::CN Simplified Chinese Encodings - Encode::JP Japanese Encodings - Encode::KR Korean Encodings - Encode::TW Traditional Chinese Encodings - -------------------------------------------------------- +=over 2 + +=item L<Encode::Alias> - Alias definitions to encodings + +=item L<Encode::Encoding> - Encode Implementation Base Class + +=item L<Encode::Supported> - List of Supported Encodings + +=item L<Encode::CN> - Simplified Chinese Encodings + +=item L<Encode::JP> - Japanese Encodings + +=item L<Encode::KR> - Korean Encodings + +=item L<Encode::TW> - Traditional Chinese Encodings + +=back =head1 DESCRIPTION @@ -396,32 +405,35 @@ process "sequences of bytes". This is not a problem for Perl: because a byte has 256 possible values, it easily fits in Perl's much larger "logical character". -=head2 TERMINOLOGY +This document mostly explains the I<how>. L<perlunitut> and L<perlunifaq> +explain the I<why>. -=over 2 +=head2 TERMINOLOGY -=item * +=head3 character -I<character>: a character in the range 0 .. 2**32-1 (or more); +A character in the range 0 .. 2**32-1 (or more); what Perl's strings are made of. -=item * +=head3 byte -I<byte>: a character in the range 0..255; -A special case of a Perl character. +A character in the range 0..255; +a special case of a Perl character. -=item * +=head3 octet -I<octet>: 8 bits of data, with ordinal values 0..255; -Term for bytes passed to or from a non-Perl context, such as a disk file. - -=back +8 bits of data, with ordinal values 0..255; +term for bytes passed to or from a non-Perl context, such as a disk file, +standard I/O stream, database, command-line argument, environment variable, +socket etc. =head1 THE PERL ENCODING API -=over 2 +=head2 Basic methods -=item $octets = encode(ENCODING, STRING[, CHECK]) +=head3 encode + + $octets = encode(ENCODING, STRING[, CHECK]) Encodes the scalar value I<STRING> from Perl's internal form into I<ENCODING> and returns a sequence of octets. I<ENCODING> can be either a @@ -441,7 +453,9 @@ contains a completely valid utf8 string. See L</"The UTF8 flag"> below. If the $string is C<undef>, then C<undef> is returned. -=item $string = decode(ENCODING, OCTETS[, CHECK]) +=head3 decode + + $string = decode(ENCODING, OCTETS[, CHECK]) This function returns the string that results from decoding the scalar value I<OCTETS>, assumed to be a sequence of octets in I<ENCODING>, into @@ -463,7 +477,9 @@ below. If the $string is C<undef>, then C<undef> is returned. -=item [$obj =] find_encoding(ENCODING) +=head3 find_encoding + + [$obj =] find_encoding(ENCODING) Returns the I<encoding object> corresponding to I<ENCODING>. Returns C<undef> if no matching I<ENCODING> is find. The returned object is @@ -489,15 +505,17 @@ You can therefore save time by reusing this object as follows; ... # now do something with $utf8; } -Besides C<< ->decode >> and C<< ->encode >>, other methods are -available as well. For instance, C<< ->name >> returns the canonical +Besides L</decode> and L</encode>, other methods are +available as well. For instance, C<name()> returns the canonical name of the encoding object. find_encoding("latin1")->name; # iso-8859-1 See L<Encode::Encoding> for details. -=item [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK]) +=head3 from_to + + [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK]) Converts I<in-place> data between two encodings. The data in $octets must be encoded as octets and I<not> as characters in Perl's internal @@ -513,7 +531,7 @@ and to convert it back: Because the conversion happens in place, the data to be converted cannot be a string constant: it must be a scalar variable. -from_to() returns the length of the converted string in octets on success, +C<from_to()> returns the length of the converted string in octets on success, and C<undef> on error. B<CAVEAT>: The following operations may look the same, but are not: @@ -542,14 +560,18 @@ followed by C<encode> as follows: $octets = encode($to, decode($from, $octets, $check_from), $check_to); -=item $octets = encode_utf8($string); +=head3 encode_utf8 + + $octets = encode_utf8($string); Equivalent to C<$octets = encode("utf8", $string)>. The characters in $string are encoded in Perl's internal format, and the result is returned as a sequence of octets. Because all possible characters in Perl have a (loose, not strict) UTF-8 representation, this function cannot fail. -=item $string = decode_utf8($octets [, CHECK]); +=head3 decode_utf8 + + $string = decode_utf8($octets [, CHECK]); Equivalent to C<$string = decode("utf8", $octets [, CHECK])>. The sequence of octets represented by $octets is decoded @@ -558,8 +580,6 @@ Because not all sequences of octets are valid UTF-8, it is quite possible for this function to fail. For CHECK, see L</"Handling Malformed Data">. -=back - =head2 Listing available encodings use Encode; @@ -591,7 +611,7 @@ To add a new alias to a given encoding, use: define_alias(NEWNAME => ENCODING); After that, I<NEWNAME> can be used as an alias for I<ENCODING>. -<ENCODING> may be either the name of an encoding or an +I<ENCODING> may be either the name of an encoding or an I<encoding object>. Before you do that, first make sure the alias is nonexistent using @@ -602,7 +622,7 @@ For example: Encode::resolve_alias("iso-8859-12") # false; nonexistent Encode::resolve_alias($name) eq $name # true if $name is canonical -resolve_alias() does not need C<use Encode::Alias>; it can be +C<resolve_alias()> does not need C<use Encode::Alias>; it can be imported via C<use Encode qw(resolve_alias)>. See L<Encode::Alias> for details. @@ -614,7 +634,7 @@ IANA Character Set Registry, commonly seen as C<< Content-Type: text/plain; charset=I<WHATEVER> >>. For most cases, the canonical name works, but sometimes it does not, most notably with "utf-8-strict". -As of C<Encode> version 2.21, a new method C<mime_name()> is thereforeadded. +As of C<Encode> version 2.21, a new method C<mime_name()> is therefore added. use Encode; my $enc = find_encoding("UTF-8"); @@ -668,7 +688,7 @@ C<perlio_ok> method on it: perlio_ok("euc-jp") Fortunately, all encodings that come with C<Encode> core are C<PerlIO>-savvy -except for "hz" and "ISO-2022-kr". For the gory details, see +except for C<hz> and C<ISO-2022-kr>. For the gory details, see L<Encode::Encoding> and L<Encode::PerlIO>. =head1 Handling Malformed Data @@ -680,20 +700,15 @@ encountering malformed data. Without I<CHECK>, C<Encode::FB_DEFAULT> As of version 2.12, C<Encode> supports coderef values for C<CHECK>; see below. -=over 2 - -=item B<NOTE:> Not all encoding support this feature - -Some encodings ignore I<CHECK> argument. For example, +B<NOTE:> Not all encodings support this feature. +Some encodings ignore the I<CHECK> argument. For example, L<Encode::Unicode> ignores I<CHECK> and it always croaks on error. -=back +=head2 List of I<CHECK> values -Now here is the list of I<CHECK> values available +=head3 FB_DEFAULT -=over 2 - -=item I<CHECK> = Encode::FB_DEFAULT ( == 0) + I<CHECK> = Encode::FB_DEFAULT ( == 0) If I<CHECK> is 0, encoding and decoding replace any malformed character with a I<substitution character>. When you encode, I<SUBCHAR> is used. @@ -701,13 +716,17 @@ When you decode, the Unicode REPLACEMENT CHARACTER, code point U+FFFD, is used. If the data is supposed to be UTF-8, an optional lexical warning of warning category C<"utf8"> is given. -=item I<CHECK> = Encode::FB_CROAK ( == 1) +=head3 FB_CROAK + + I<CHECK> = Encode::FB_CROAK ( == 1) If I<CHECK> is 1, methods immediately die with an error message. Therefore, when I<CHECK> is 1, you should trap exceptions with C<eval{}>, unless you really want to let it C<die>. -=item I<CHECK> = Encode::FB_QUIET +=head3 FB_QUIET + + I<CHECK> = Encode::FB_QUIET If I<CHECK> is set to C<Encode::FB_QUIET>, encoding and decoding immediately return the portion of the data that has been processed so far when an @@ -724,17 +743,25 @@ code to do exactly that: # $buffer now contains the unprocessed partial character } -=item I<CHECK> = Encode::FB_WARN +=head3 FB_WARN + + I<CHECK> = Encode::FB_WARN This is the same as C<FB_QUIET> above, except that instead of being silent on errors, it issues a warning. This is handy for when you are debugging. +=head3 FB_PERLQQ FB_HTMLCREF FB_XMLCREF + +=over 2 + =item perlqq mode (I<CHECK> = Encode::FB_PERLQQ) =item HTML charref mode (I<CHECK> = Encode::FB_HTMLCREF) =item XML charref mode (I<CHECK> = Encode::FB_XMLCREF) +=back + For encodings that are implemented by the C<Encode::XS> module, C<CHECK> C<==> C<Encode::FB_PERLQQ> puts C<encode> and C<decode> into C<perlqq> fallback mode. @@ -750,7 +777,7 @@ XML uses C<&#xI<HHHH>;> where I<HHHH> is the hexadecimal number. In C<Encode> 2.10 or later, C<LEAVE_SRC> is also implied. -=item The bitmask +=head3 The bitmask These modes are all actually set via a bitmask. Here is how the C<FB_I<XXX>> constants are laid out. You can import the C<FB_I<XXX>> constants via @@ -766,22 +793,18 @@ constants via C<use Encode qw(:fallback_all)>. HTMLCREF 0x0200 XMLCREF 0x0400 -=back +=head3 LEAVE_SRC -=over 2 - -=item Encode::LEAVE_SRC + Encode::LEAVE_SRC If the C<Encode::LEAVE_SRC> bit is I<not> set but I<CHECK> is set, then the -second argument to encode() or decode() will be overwritten in place. +source string to encode() or decode() will be overwritten in place. If you're not interested in this, then bitwise-OR it with the bitmask. -=back - =head2 coderef for CHECK As of C<Encode> 2.12, C<CHECK> can also be a code reference which takes the -ordinal value of the unmapped caharacter as an argument and returns a string +ordinal value of the unmapped character as an argument and returns a string that represents the fallback character. For instance: $ascii = encode("ascii", $utf8, sub{ sprintf "<U+%04X>", shift }); @@ -880,9 +903,9 @@ The following API uses parts of Perl's internals in the current implementation. As such, they are efficient but may change in a future release. -=over 2 +=head3 is_utf8 -=item is_utf8(STRING [, CHECK]) + is_utf8(STRING [, CHECK]) [INTERNAL] Tests whether the UTF8 flag is turned on in the I<STRING>. If I<CHECK> is true, also checks whether I<STRING> contains well-formed @@ -890,7 +913,9 @@ UTF-8. Returns true if successful, false otherwise. As of Perl 5.8.1, L<utf8> also has the C<utf8::is_utf8> function. -=item _utf8_on(STRING) +=head3 _utf8_on + + _utf8_on(STRING) [INTERNAL] Turns the I<STRING>'s internal UTF8 flag B<on>. The I<STRING> is I<not> checked for containing only well-formed UTF-8. Do not use this @@ -901,7 +926,9 @@ if I<STRING> is not a string. B<NOTE>: For security reasons, this function does not work on tainted values. -=item _utf8_off(STRING) +=head3 _utf8_off + + _utf8_off(STRING) [INTERNAL] Turns the I<STRING>'s internal UTF8 flag B<off>. Do not use frivolously. Returns the previous state of the UTF8 flag, or C<undef> if @@ -911,8 +938,6 @@ previous setting. B<NOTE>: For security reasons, this function does not work on tainted values. -=back - =head1 UTF-8 vs. utf8 vs. UTF8 ....We now view strings not as sequences of bytes, but as sequences @@ -981,12 +1006,12 @@ L<perlebcdic>, L<perlfunc/open>, L<perlunicode>, L<perluniintro>, L<perlunifaq>, L<perlunitut> L<utf8>, -the Perl Unicode Mailing List E<lt>perl-unicode@perl.orgE<gt> +the Perl Unicode Mailing List L<http://lists.perl.org/list/perl-unicode.html> =head1 MAINTAINER This project was originated by the late Nick Ing-Simmons and later -maintained by Dan Kogai I<< <dankogai@dan.co.jp> >>. See AUTHORS +maintained by Dan Kogai I<< <dankogai@cpan.org> >>. See AUTHORS for a full list of people involved. For any questions, send mail to I<< <perl-unicode@perl.org> >> so that we can all share. @@ -996,7 +1021,7 @@ who submitted code to the project. =head1 COPYRIGHT -Copyright 2002-2011 Dan Kogai I<< <dankogai@dan.co.jp> >>. +Copyright 2002-2012 Dan Kogai I<< <dankogai@cpan.org> >>. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. diff --git a/gnu/usr.bin/perl/cpan/Encode/Encode.xs b/gnu/usr.bin/perl/cpan/Encode/Encode.xs index 312ba3448b8..f99da101576 100644 --- a/gnu/usr.bin/perl/cpan/Encode/Encode.xs +++ b/gnu/usr.bin/perl/cpan/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.20 2010/12/31 22:48:48 dankogai Exp $ + $Id: Encode.xs,v 2.21 2013/03/05 03:12:49 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT diff --git a/gnu/usr.bin/perl/cpan/Encode/Makefile.PL b/gnu/usr.bin/perl/cpan/Encode/Makefile.PL index 2db8802c711..126f00e515e 100644 --- a/gnu/usr.bin/perl/cpan/Encode/Makefile.PL +++ b/gnu/usr.bin/perl/cpan/Encode/Makefile.PL @@ -1,5 +1,5 @@ # -# $Id: Makefile.PL,v 2.8 2009/07/08 13:34:15 dankogai Exp $ +# $Id: Makefile.PL,v 2.10 2013/02/18 02:23:56 dankogai Exp $ # use 5.007003; use strict; @@ -43,8 +43,9 @@ WriteMakefile( DIST_DEFAULT => 'all tardist', }, INC => '-I' . File::Spec->catfile( '.', 'Encode' ), + LICENSE => 'perl', PMLIBDIRS => \@pmlibdirs, - INSTALLDIRS => 'perl', + INSTALLDIRS => ($] < 5.011 ? 'perl' : 'site'), ); package MY; diff --git a/gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.xs b/gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.xs index 039f1559a3d..026f8fb1cae 100644 --- a/gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.xs +++ b/gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.xs @@ -1,5 +1,5 @@ /* - $Id: Unicode.xs,v 2.8 2011/08/09 07:49:44 dankogai Exp dankogai $ + $Id: Unicode.xs,v 2.9 2012/08/05 23:08:49 dankogai Exp $ */ #define PERL_NO_GET_CONTEXT @@ -199,10 +199,6 @@ CODE: *hv_fetch((HV *)SvRV(obj),"Name",4,0), ord); } - if (s+size <= e) { - /* skip the next one as well */ - enc_unpack(aTHX_ &s,e,size,endian); - } ord = FBCHAR; } else { @@ -217,12 +213,23 @@ CODE: ord = FBCHAR; } } - else { - if (s+size > e) { - /* Partial character */ - s -= size; /* back up to 1st half */ - break; /* And exit loop */ + else if (s+size > e) { + if (check) { + if (check & ENCODE_STOP_AT_PARTIAL) { + s -= size; + break; + } + else { + croak("%"SVf":Malformed HI surrogate %"UVxf, + *hv_fetch((HV *)SvRV(obj),"Name",4,0), + ord); + } } + else { + ord = FBCHAR; + } + } + else { lo = enc_unpack(aTHX_ &s,e,size,endian); if (!isLoSurrogate(lo)) { if (check) { @@ -231,6 +238,7 @@ CODE: ord); } else { + s -= size; ord = FBCHAR; } } @@ -348,7 +356,7 @@ CODE: if (ucs2 == -1) { ucs2 = SvTRUE(attr("ucs2", 4)); } - if (ucs2) { + if (ucs2 || ord > 0x10FFFF) { if (check) { croak("%"SVf":code point \"\\x{%"UVxf"}\" too high", *hv_fetch((HV *)SvRV(obj),"Name",4,0),ord); diff --git a/gnu/usr.bin/perl/cpan/Encode/encoding.pm b/gnu/usr.bin/perl/cpan/Encode/encoding.pm index 24d6e5b66c6..2783c9f7e67 100644 --- a/gnu/usr.bin/perl/cpan/Encode/encoding.pm +++ b/gnu/usr.bin/perl/cpan/Encode/encoding.pm @@ -1,4 +1,4 @@ -# $Id: encoding.pm,v 2.9 2011/08/09 07:49:44 dankogai Exp dankogai $ +# $Id: encoding.pm,v 2.11 2013/02/18 02:23:56 dankogai Exp $ package encoding; our $VERSION = '2.6_01'; @@ -102,8 +102,16 @@ sub _get_locale_encoding { } sub import { + if ($] >= 5.017) { + warnings::warnif("deprecated", + "Use of the encoding pragma is deprecated") + } my $class = shift; my $name = shift; + if (!$name){ + require Carp; + Carp::croak("encoding: no encoding specified."); + } if ( $name eq ':_get_locale_encoding' ) { # used by lib/open.pm my $caller = caller(); { @@ -199,6 +207,12 @@ __END__ encoding - allows you to write your script in non-ascii or non-utf8 +=head1 WARNING + +This module is deprecated under perl 5.18. It uses a mechanism provided by +perl that is deprecated under 5.18 and higher, and may be removed in a +future version. + =head1 SYNOPSIS use encoding "greek"; # Perl like Greek to you? diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Alias.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Alias.pm index 9ded3738c22..c8f0ad01d7c 100644 --- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Alias.pm +++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Alias.pm @@ -2,7 +2,7 @@ package Encode::Alias; use strict; use warnings; no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.15 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.16 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; use base qw(Exporter); @@ -210,7 +210,8 @@ sub init_aliases { define_alias( qr/^(?:x[_-])?mac[_-](.*)$/i => '"mac$1"' ); # http://rt.cpan.org/Ticket/Display.html?id=36326 define_alias( qr/^macintosh$/i => '"MacRoman"' ); - + # https://rt.cpan.org/Ticket/Display.html?id=78125 + define_alias( qr/^macce$/i => '"MacCentralEurRoman"' ); # Ououououou. gone. They are differente! # define_alias( qr/\bmacRomanian$/i => '"macRumanian"'); diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Encoder.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Encoder.pm index 9a46d367436..2cde1e865f5 100644 --- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Encoder.pm +++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Encoder.pm @@ -1,5 +1,5 @@ # -# $Id: Encoder.pm,v 2.2 2011/08/09 07:49:44 dankogai Exp dankogai $ +# $Id: Encoder.pm,v 2.2 2011/08/09 07:49:44 dankogai Exp $ # package Encode::Encoder; use strict; diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/GSM0338.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/GSM0338.pm index 2ea71f2f77e..aeff6379789 100644 --- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/GSM0338.pm +++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/GSM0338.pm @@ -1,5 +1,5 @@ # -# $Id: GSM0338.pm,v 2.1 2008/05/07 20:56:05 dankogai Exp $ +# $Id: GSM0338.pm,v 2.2 2012/08/15 05:36:16 dankogai Exp $ # package Encode::GSM0338; @@ -8,7 +8,7 @@ use warnings; use Carp; use vars qw($VERSION); -$VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +$VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -138,7 +138,8 @@ our %UNI2GSM = ( "\x{00E4}" => "\x7B", # LATIN SMALL LETTER A WITH DIAERESIS "\x{00E5}" => "\x0F", # LATIN SMALL LETTER A WITH RING ABOVE "\x{00E6}" => "\x1D", # LATIN SMALL LETTER AE - "\x{00E7}" => "\x09", # LATIN SMALL LETTER C WITH CEDILLA + #"\x{00E7}" => "\x09", # LATIN SMALL LETTER C WITH CEDILLA + "\x{00C7}" => "\x09", # LATIN CAPITAL LETTER C WITH CEDILLA "\x{00E8}" => "\x04", # LATIN SMALL LETTER E WITH GRAVE "\x{00E9}" => "\x05", # LATIN SMALL LETTER E WITH ACUTE "\x{00EC}" => "\x07", # LATIN SMALL LETTER I WITH GRAVE diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Unicode/UTF7.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Unicode/UTF7.pm index 1d639627549..ea9257b47b5 100644 --- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Unicode/UTF7.pm +++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Unicode/UTF7.pm @@ -1,5 +1,5 @@ # -# $Id: UTF7.pm,v 2.5 2010/09/18 18:39:51 dankogai Exp $ +# $Id: UTF7.pm,v 2.6 2012/08/05 23:08:49 dankogai Exp $ # package Encode::Unicode::UTF7; use strict; @@ -7,7 +7,7 @@ use warnings; no warnings 'redefine'; use base qw(Encode::Encoding); __PACKAGE__->Define('UTF-7'); -our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.6 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use MIME::Base64; use Encode; @@ -62,6 +62,7 @@ sub decode($$;$) { my ( $obj, $bytes, $chk ) = @_; my $len = length($bytes); my $str = ""; + pos($bytes) = 0; no warnings 'uninitialized'; while ( pos($bytes) < $len ) { if ( $bytes =~ /\G([^+]+)/ogc ) { diff --git a/gnu/usr.bin/perl/cpan/Encode/t/Mod_EUCJP.pm b/gnu/usr.bin/perl/cpan/Encode/t/Mod_EUCJP.pm index 488f156d5a7..4f381c58737 100644 --- a/gnu/usr.bin/perl/cpan/Encode/t/Mod_EUCJP.pm +++ b/gnu/usr.bin/perl/cpan/Encode/t/Mod_EUCJP.pm @@ -1,6 +1,7 @@ -# $Id: Mod_EUCJP.pm,v 2.0 2004/05/16 20:55:17 dankogai Exp $ +# $Id: Mod_EUCJP.pm,v 2.1 2013/02/18 02:23:56 dankogai Exp $ # This file is in euc-jp package Mod_EUCJP; +no warnings "deprecated"; use encoding "euc-jp"; sub new { my $class = shift; diff --git a/gnu/usr.bin/perl/cpan/Encode/t/Unicode.t b/gnu/usr.bin/perl/cpan/Encode/t/Unicode.t index baa502c1f97..2cc5d548557 100755 --- a/gnu/usr.bin/perl/cpan/Encode/t/Unicode.t +++ b/gnu/usr.bin/perl/cpan/Encode/t/Unicode.t @@ -1,5 +1,5 @@ # -# $Id: Unicode.t,v 2.2 2009/11/16 14:08:13 dankogai Exp $ +# $Id: Unicode.t,v 2.3 2012/08/05 23:08:49 dankogai Exp $ # # This script is written entirely in ASCII, even though quoted literals # do include non-BMP unicode characters -- Are you happy, jhi? @@ -20,7 +20,7 @@ BEGIN { use strict; #use Test::More 'no_plan'; -use Test::More tests => 38; +use Test::More tests => 56; use Encode qw(encode decode find_encoding); # @@ -30,7 +30,7 @@ use Encode qw(encode decode find_encoding); my $dankogai = "\x{5c0f}\x{98fc}\x{3000}\x{5f3e}"; my $nasty = "$dankogai\x{1abcd}"; -my $fallback = "$dankogai\x{fffd}"; +my $fallback = "$dankogai\x{fffd}\x{fffd}"; #hi: (0x1abcd - 0x10000) / 0x400 + 0xD800 = 0xd82a #lo: (0x1abcd - 0x10000) % 0x400 + 0xDC00 = 0xdfcd @@ -85,6 +85,50 @@ is(index($@, 'UCS-2BE'), 0, "encode UCS-2BE: exception"); eval { encode('UCS-2LE', $nasty, 1) }; is(index($@, 'UCS-2LE'), 0, "encode UCS-2LE: exception"); +{ + my %tests = ( + 'UCS-2BE' => 'n*', + 'UCS-2LE' => 'v*', + 'UTF-16BE' => 'n*', + 'UTF-16LE' => 'v*', + 'UTF-32BE' => 'N*', + 'UTF-32LE' => 'V*', + ); + + while (my ($enc, $pack) = each(%tests)) { + is(decode($enc, pack($pack, 0xD800, 0x263A)), "\x{FFFD}\x{263A}", + "decode $enc (HI surrogate followed by WHITE SMILING FACE)"); + is(decode($enc, pack($pack, 0xDC00, 0x263A)), "\x{FFFD}\x{263A}", + "decode $enc (LO surrogate followed by WHITE SMILING FACE)"); + } +} + +{ + my %tests = ( + 'UTF-16BE' => 'n*', + 'UTF-16LE' => 'v*', + ); + + while (my ($enc, $pack) = each(%tests)) { + is(decode($enc, pack($pack, 0xD800)), "\x{FFFD}", + "decode $enc (HI surrogate)"); + is(decode($enc, pack($pack, 0x263A, 0xD800)), "\x{263A}\x{FFFD}", + "decode $enc (WHITE SMILING FACE followed by HI surrogate)"); + } +} + +{ + my %tests = ( + 'UTF-16BE' => 'n*', + 'UTF-16LE' => 'v*', + ); + + while (my ($enc, $pack) = each(%tests)) { + is(encode($enc, "\x{110000}"), pack($pack, 0xFFFD), + "ordinals greater than U+10FFFF is replaced with U+FFFD"); + } +} + # # SvGROW test for (en|de)code_xs # diff --git a/gnu/usr.bin/perl/cpan/Encode/t/enc_data.t b/gnu/usr.bin/perl/cpan/Encode/t/enc_data.t index 52d7e119969..a0caf650f19 100755 --- a/gnu/usr.bin/perl/cpan/Encode/t/enc_data.t +++ b/gnu/usr.bin/perl/cpan/Encode/t/enc_data.t @@ -1,4 +1,4 @@ -# $Id: enc_data.t,v 2.1 2006/05/03 18:24:10 dankogai Exp $ +# $Id: enc_data.t,v 2.2 2013/02/18 02:23:56 dankogai Exp $ BEGIN { require Config; import Config; @@ -22,6 +22,7 @@ BEGIN { use strict; +no warnings "deprecated"; use encoding 'euc-jp'; use Test::More tests => 4; diff --git a/gnu/usr.bin/perl/cpan/Encode/t/enc_eucjp.t b/gnu/usr.bin/perl/cpan/Encode/t/enc_eucjp.t index 2fdd8110d18..7c78a68ee95 100755 --- a/gnu/usr.bin/perl/cpan/Encode/t/enc_eucjp.t +++ b/gnu/usr.bin/perl/cpan/Encode/t/enc_eucjp.t @@ -1,4 +1,4 @@ -# $Id: enc_eucjp.t,v 2.1 2006/05/03 18:24:10 dankogai Exp $ +# $Id: enc_eucjp.t,v 2.2 2013/02/18 02:23:56 dankogai Exp $ # This is the twin of enc_utf8.t . BEGIN { @@ -21,6 +21,7 @@ BEGIN { } } +no warnings "deprecated"; use encoding 'euc-jp'; my @c = (127, 128, 255, 256); diff --git a/gnu/usr.bin/perl/cpan/Encode/t/enc_module.t b/gnu/usr.bin/perl/cpan/Encode/t/enc_module.t index f187bd78b7f..05fc6c2f64d 100755 --- a/gnu/usr.bin/perl/cpan/Encode/t/enc_module.t +++ b/gnu/usr.bin/perl/cpan/Encode/t/enc_module.t @@ -1,4 +1,4 @@ -# $Id: enc_module.t,v 2.1 2006/05/03 18:24:10 dankogai Exp $ +# $Id: enc_module.t,v 2.2 2013/02/18 02:23:56 dankogai Exp $ # This file is in euc-jp BEGIN { require Config; import Config; @@ -21,6 +21,7 @@ BEGIN { } use lib qw(t ext/Encode/t ../ext/Encode/t); # latter 2 for perl core use Mod_EUCJP; +no warnings "deprecated"; use encoding "euc-jp"; use Test::More tests => 3; use File::Basename; diff --git a/gnu/usr.bin/perl/cpan/Encode/t/enc_utf8.t b/gnu/usr.bin/perl/cpan/Encode/t/enc_utf8.t index 5a301962cb4..9c6caa3fa98 100755 --- a/gnu/usr.bin/perl/cpan/Encode/t/enc_utf8.t +++ b/gnu/usr.bin/perl/cpan/Encode/t/enc_utf8.t @@ -1,4 +1,4 @@ -# $Id: enc_utf8.t,v 2.1 2006/05/03 18:24:10 dankogai Exp $ +# $Id: enc_utf8.t,v 2.2 2013/02/18 02:23:56 dankogai Exp $ # This is the twin of enc_eucjp.t . BEGIN { @@ -17,6 +17,7 @@ BEGIN { } } +no warnings "deprecated"; use encoding 'utf8'; my @c = (127, 128, 255, 256); diff --git a/gnu/usr.bin/perl/cpan/Encode/t/jperl.t b/gnu/usr.bin/perl/cpan/Encode/t/jperl.t index da684689018..3abe86b9b79 100755 --- a/gnu/usr.bin/perl/cpan/Encode/t/jperl.t +++ b/gnu/usr.bin/perl/cpan/Encode/t/jperl.t @@ -1,5 +1,5 @@ # -# $Id: jperl.t,v 2.1 2006/05/03 18:24:10 dankogai Exp $ +# $Id: jperl.t,v 2.2 2013/02/18 02:23:56 dankogai Exp $ # # This script is written in euc-jp @@ -27,6 +27,7 @@ use strict; use Test::More tests => 15; # black magic tests commented out my $Debug = shift; +no warnings "deprecated"; no encoding; # ensure my $Enamae = "\xbe\xae\xbb\xf4\x20\xc3\xc6"; # euc-jp, with \x escapes use encoding "euc-jp"; diff --git a/gnu/usr.bin/perl/cpan/Encode/t/piconv.t b/gnu/usr.bin/perl/cpan/Encode/t/piconv.t index ed084b41591..e0768d543c4 100755 --- a/gnu/usr.bin/perl/cpan/Encode/t/piconv.t +++ b/gnu/usr.bin/perl/cpan/Encode/t/piconv.t @@ -1,5 +1,5 @@ # -# $Id: piconv.t,v 0.3 2009/11/16 14:08:13 dankogai Exp $ +# $Id: piconv.t,v 0.4 2013/02/18 02:23:56 dankogai Exp $ # BEGIN { @@ -27,8 +27,8 @@ sub run_cmd (;$$); my $blib = File::Spec->rel2abs( - File::Spec->catdir( $FindBin::RealBin, File::Spec->updir, 'blib' ) ); -my $script = File::Spec->catdir($blib, 'script', 'piconv'); + File::Spec->catdir( $FindBin::RealBin, File::Spec->updir ) ); +my $script = File::Spec->catdir($blib, 'bin', 'piconv'); my @base_cmd = ( $^X, "-Mblib=$blib", $script ); plan tests => 5; |