summaryrefslogtreecommitdiffstats
path: root/gnu/usr.bin/perl/cpan/Encode
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/usr.bin/perl/cpan/Encode')
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/Encode.pm372
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/Encode.xs972
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/Makefile.PL108
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.pm14
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.xs118
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/bin/enc2xs70
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/bin/piconv2
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/bin/ucmlint7
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/encengine.c84
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/encoding.pm49
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/lib/Encode/Alias.pm22
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/lib/Encode/CN/HZ.pm9
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/lib/Encode/Encoding.pm24
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/lib/Encode/GSM0338.pm11
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/lib/Encode/Guess.pm5
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/lib/Encode/JP/JIS7.pm9
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/lib/Encode/KR/2022_KR.pm4
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Header.pm524
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm8
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Name.pm15
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/lib/Encode/Supported.pod2
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/lib/Encode/Unicode/UTF7.pm9
-rwxr-xr-xgnu/usr.bin/perl/cpan/Encode/t/Aliases.t2
-rwxr-xr-xgnu/usr.bin/perl/cpan/Encode/t/enc_data.t8
-rwxr-xr-xgnu/usr.bin/perl/cpan/Encode/t/enc_eucjp.t23
-rwxr-xr-xgnu/usr.bin/perl/cpan/Encode/t/enc_module.t8
-rwxr-xr-xgnu/usr.bin/perl/cpan/Encode/t/enc_utf8.t23
-rwxr-xr-xgnu/usr.bin/perl/cpan/Encode/t/encoding.t6
-rwxr-xr-xgnu/usr.bin/perl/cpan/Encode/t/jperl.t6
-rwxr-xr-xgnu/usr.bin/perl/cpan/Encode/t/mime-header.t421
-rwxr-xr-xgnu/usr.bin/perl/cpan/Encode/t/mime-name.t34
-rw-r--r--gnu/usr.bin/perl/cpan/Encode/t/taint.t28
32 files changed, 1936 insertions, 1061 deletions
diff --git a/gnu/usr.bin/perl/cpan/Encode/Encode.pm b/gnu/usr.bin/perl/cpan/Encode/Encode.pm
index dce6c5415e9..f90f929f073 100644
--- a/gnu/usr.bin/perl/cpan/Encode/Encode.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/Encode.pm
@@ -1,21 +1,26 @@
#
-# $Id: Encode.pm,v 2.80 2016/01/25 14:54:01 dankogai Exp $
+# $Id: Encode.pm,v 2.97 2018/02/21 12:14:24 dankogai Exp $
#
package Encode;
use strict;
use warnings;
-our $VERSION = sprintf "%d.%02d_01", q$Revision: 2.80 $ =~ /(\d+)/g;
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
-use XSLoader ();
-XSLoader::load( __PACKAGE__, $VERSION );
+our $VERSION;
+BEGIN {
+ $VERSION = sprintf "%d.%02d", q$Revision: 2.97 $ =~ /(\d+)/g;
+ require XSLoader;
+ XSLoader::load( __PACKAGE__, $VERSION );
+}
use Exporter 5.57 'import';
+our @CARP_NOT = qw(Encode::Encoder);
+
# Public, encouraged API is exported by default
our @EXPORT = qw(
decode decode_utf8 encode encode_utf8 str2bytes bytes2str
- encodings find_encoding clone_encoding
+ encodings find_encoding find_mime_encoding clone_encoding
);
our @FB_FLAGS = qw(
DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC
@@ -44,7 +49,10 @@ our %EXPORT_TAGS = (
our $ON_EBCDIC = ( ord("A") == 193 );
-use Encode::Alias;
+use Encode::Alias ();
+use Encode::MIME::Name;
+
+use Storable;
# Make a %Encoding package variable to allow a certain amount of cheating
our %Encoding;
@@ -96,12 +104,17 @@ sub define_encoding {
my $alias = shift;
define_alias( $alias, $obj );
}
+ my $class = ref($obj);
+ push @Encode::CARP_NOT, $class unless grep { $_ eq $class } @Encode::CARP_NOT;
+ push @Encode::Encoding::CARP_NOT, $class unless grep { $_ eq $class } @Encode::Encoding::CARP_NOT;
return $obj;
}
sub getEncoding {
my ( $class, $name, $skip_external ) = @_;
+ defined($name) or return;
+
$name =~ s/\s+//g; # https://rt.cpan.org/Ticket/Display.html?id=65796
ref($name) && $name->can('renew') and return $name;
@@ -125,11 +138,26 @@ sub getEncoding {
return;
}
+# HACK: These two functions must be defined in Encode and because of
+# cyclic dependency between Encode and Encode::Alias, Exporter does not work
+sub find_alias {
+ goto &Encode::Alias::find_alias;
+}
+sub define_alias {
+ goto &Encode::Alias::define_alias;
+}
+
sub find_encoding($;$) {
my ( $name, $skip_external ) = @_;
return __PACKAGE__->getEncoding( $name, $skip_external );
}
+sub find_mime_encoding($;$) {
+ my ( $mime_name, $skip_external ) = @_;
+ my $name = Encode::MIME::Name::get_encode_name( $mime_name );
+ return find_encoding( $name, $skip_external );
+}
+
sub resolve_alias($) {
my $obj = find_encoding(shift);
defined $obj and return $obj->name;
@@ -139,8 +167,6 @@ sub resolve_alias($) {
sub clone_encoding($) {
my $obj = find_encoding(shift);
ref $obj or return;
- eval { require Storable };
- $@ and return;
return Storable::dclone($obj);
}
@@ -172,7 +198,7 @@ sub encode($$;$) {
else {
$octets = $enc->encode( $string, $check );
}
- $_[1] = $string if $check and !ref $check and !( $check & LEAVE_SRC() );
+ $_[1] = $string if $check and !ref $check and !( $check & LEAVE_SRC );
return $octets;
}
*str2bytes = \&encode;
@@ -201,7 +227,7 @@ sub decode($$;$) {
else {
$string = $enc->decode( $octets, $check );
}
- $_[1] = $octets if $check and !ref $check and !( $check & LEAVE_SRC() );
+ $_[1] = $octets if $check and !ref $check and !( $check & LEAVE_SRC );
return $string;
}
*bytes2str = \&decode;
@@ -220,14 +246,41 @@ sub from_to($$$;$) {
require Carp;
Carp::croak("Unknown encoding '$to'");
}
- my $uni = $f->decode($string);
- $_[0] = $string = $t->encode( $uni, $check );
+
+ # For Unicode, warnings need to be caught and re-issued at this level
+ # so that callers can disable utf8 warnings lexically.
+ my $uni;
+ if ( ref($f) eq 'Encode::Unicode' ) {
+ my $warn = '';
+ {
+ local $SIG{__WARN__} = sub { $warn = shift };
+ $uni = $f->decode($string);
+ }
+ warnings::warnif('utf8', $warn) if length $warn;
+ }
+ else {
+ $uni = $f->decode($string);
+ }
+
+ if ( ref($t) eq 'Encode::Unicode' ) {
+ my $warn = '';
+ {
+ local $SIG{__WARN__} = sub { $warn = shift };
+ $_[0] = $string = $t->encode( $uni, $check );
+ }
+ warnings::warnif('utf8', $warn) if length $warn;
+ }
+ else {
+ $_[0] = $string = $t->encode( $uni, $check );
+ }
+
return undef if ( $check && length($uni) );
return defined( $_[0] ) ? length($string) : undef;
}
sub encode_utf8($) {
my ($str) = @_;
+ return undef unless defined $str;
utf8::encode($str);
return $str;
}
@@ -241,133 +294,87 @@ sub decode_utf8($;$) {
$check ||= 0;
$utf8enc ||= find_encoding('utf8');
my $string = $utf8enc->decode( $octets, $check );
- $_[0] = $octets if $check and !ref $check and !( $check & LEAVE_SRC() );
+ $_[0] = $octets if $check and !ref $check and !( $check & LEAVE_SRC );
return $string;
}
-# sub decode_utf8($;$) {
-# my ( $str, $check ) = @_;
-# return $str if is_utf8($str);
-# if ($check) {
-# return decode( "utf8", $str, $check );
-# }
-# else {
-# return decode( "utf8", $str );
-# return $str;
-# }
-# }
-
-predefine_encodings(1);
-
-#
-# This is to restore %Encoding if really needed;
-#
-
-sub predefine_encodings {
- require Encode::Encoding;
- no warnings 'redefine';
- my $use_xs = shift;
- if ($ON_EBCDIC) {
-
- # was in Encode::UTF_EBCDIC
- package Encode::UTF_EBCDIC;
- push @Encode::UTF_EBCDIC::ISA, 'Encode::Encoding';
- *decode = sub {
- my ( undef, $str, $chk ) = @_;
- my $res = '';
- for ( my $i = 0 ; $i < length($str) ; $i++ ) {
- $res .=
- chr(
- utf8::unicode_to_native( ord( substr( $str, $i, 1 ) ) )
- );
- }
- $_[1] = '' if $chk;
- return $res;
- };
- *encode = sub {
- my ( undef, $str, $chk ) = @_;
- my $res = '';
- for ( my $i = 0 ; $i < length($str) ; $i++ ) {
- $res .=
- chr(
- utf8::native_to_unicode( ord( substr( $str, $i, 1 ) ) )
- );
- }
- $_[1] = '' if $chk;
- return $res;
- };
- $Encode::Encoding{Unicode} =
- bless { Name => "UTF_EBCDIC" } => "Encode::UTF_EBCDIC";
+onBOOT;
+
+if ($ON_EBCDIC) {
+ package Encode::UTF_EBCDIC;
+ use parent 'Encode::Encoding';
+ my $obj = bless { Name => "UTF_EBCDIC" } => "Encode::UTF_EBCDIC";
+ Encode::define_encoding($obj, 'Unicode');
+ sub decode {
+ my ( undef, $str, $chk ) = @_;
+ my $res = '';
+ for ( my $i = 0 ; $i < length($str) ; $i++ ) {
+ $res .=
+ chr(
+ utf8::unicode_to_native( ord( substr( $str, $i, 1 ) ) )
+ );
+ }
+ $_[1] = '' if $chk;
+ return $res;
}
- else {
-
- package Encode::Internal;
- push @Encode::Internal::ISA, 'Encode::Encoding';
- *decode = sub {
- my ( undef, $str, $chk ) = @_;
- utf8::upgrade($str);
- $_[1] = '' if $chk;
- return $str;
- };
- *encode = \&decode;
- $Encode::Encoding{Unicode} =
- bless { Name => "Internal" } => "Encode::Internal";
+ sub encode {
+ my ( undef, $str, $chk ) = @_;
+ my $res = '';
+ for ( my $i = 0 ; $i < length($str) ; $i++ ) {
+ $res .=
+ chr(
+ utf8::native_to_unicode( ord( substr( $str, $i, 1 ) ) )
+ );
+ }
+ $_[1] = '' if $chk;
+ return $res;
}
- {
- # https://rt.cpan.org/Public/Bug/Display.html?id=103253
- package Encode::XS;
- push @Encode::XS::ISA, 'Encode::Encoding';
+} else {
+ package Encode::Internal;
+ use parent 'Encode::Encoding';
+ my $obj = bless { Name => "Internal" } => "Encode::Internal";
+ Encode::define_encoding($obj, 'Unicode');
+ sub decode {
+ my ( undef, $str, $chk ) = @_;
+ utf8::upgrade($str);
+ $_[1] = '' if $chk;
+ return $str;
}
- {
+ *encode = \&decode;
+}
- # was in Encode::utf8
- package Encode::utf8;
- push @Encode::utf8::ISA, 'Encode::Encoding';
+{
+ # https://rt.cpan.org/Public/Bug/Display.html?id=103253
+ package Encode::XS;
+ use parent 'Encode::Encoding';
+}
- #
- if ($use_xs) {
- Encode::DEBUG and warn __PACKAGE__, " XS on";
- *decode = \&decode_xs;
- *encode = \&encode_xs;
- }
- else {
- Encode::DEBUG and warn __PACKAGE__, " XS off";
- *decode = sub {
- my ( undef, $octets, $chk ) = @_;
- my $str = Encode::decode_utf8($octets);
- if ( defined $str ) {
- $_[1] = '' if $chk;
- return $str;
- }
- return undef;
- };
- *encode = sub {
- my ( undef, $string, $chk ) = @_;
- my $octets = Encode::encode_utf8($string);
- $_[1] = '' if $chk;
- return $octets;
- };
+{
+ package Encode::utf8;
+ use parent 'Encode::Encoding';
+ my %obj = (
+ 'utf8' => { Name => 'utf8' },
+ 'utf-8-strict' => { Name => 'utf-8-strict', strict_utf8 => 1 }
+ );
+ for ( keys %obj ) {
+ bless $obj{$_} => __PACKAGE__;
+ Encode::define_encoding( $obj{$_} => $_ );
+ }
+ sub cat_decode {
+ # ($obj, $dst, $src, $pos, $trm, $chk)
+ # currently ignores $chk
+ my ( undef, undef, undef, $pos, $trm ) = @_;
+ my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ];
+ use bytes;
+ if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) {
+ $$rdst .=
+ substr( $$rsrc, $pos, $npos - $pos + length($trm) );
+ $$rpos = $npos + length($trm);
+ return 1;
}
- *cat_decode = sub { # ($obj, $dst, $src, $pos, $trm, $chk)
- # currently ignores $chk
- my ( undef, undef, undef, $pos, $trm ) = @_;
- my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ];
- use bytes;
- if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) {
- $$rdst .=
- substr( $$rsrc, $pos, $npos - $pos + length($trm) );
- $$rpos = $npos + length($trm);
- return 1;
- }
- $$rdst .= substr( $$rsrc, $pos );
- $$rpos = length($$rsrc);
- return '';
- };
- $Encode::Encoding{utf8} =
- bless { Name => "utf8" } => "Encode::utf8";
- $Encode::Encoding{"utf-8-strict"} =
- bless { Name => "utf-8-strict", strict_utf8 => 1 }
- => "Encode::utf8";
+ $$rdst .= substr( $$rsrc, $pos );
+ $$rpos = length($$rsrc);
+ return '';
}
}
@@ -470,19 +477,25 @@ I<ENCODING> and returns a sequence of octets. I<ENCODING> can be either a
canonical name or an alias. For encoding names and aliases, see
L</"Defining Aliases">. For CHECK, see L</"Handling Malformed Data">.
+B<CAVEAT>: the input scalar I<STRING> might be modified in-place depending
+on what is set in CHECK. See L</LEAVE_SRC> if you want your inputs to be
+left unchanged.
+
For example, to convert a string from Perl's internal format into
ISO-8859-1, also known as Latin1:
$octets = encode("iso-8859-1", $string);
-B<CAVEAT>: When you run C<$octets = encode("utf8", $string)>, then
+B<CAVEAT>: When you run C<$octets = encode("UTF-8", $string)>, then
$octets I<might not be equal to> $string. Though both contain the
same data, the UTF8 flag for $octets is I<always> off. When you
encode anything, the UTF8 flag on the result is always off, even when it
-contains a completely valid utf8 string. See L</"The UTF8 flag"> below.
+contains a completely valid UTF-8 string. See L</"The UTF8 flag"> below.
If the $string is C<undef>, then C<undef> is returned.
+C<str2bytes> may be used as an alias for C<encode>.
+
=head3 decode
$string = decode(ENCODING, OCTETS[, CHECK])
@@ -494,18 +507,24 @@ I<ENCODING> can be either a canonical name or an alias. For encoding names
and aliases, see L</"Defining Aliases">; for I<CHECK>, see L</"Handling
Malformed Data">.
+B<CAVEAT>: the input scalar I<OCTETS> might be modified in-place depending
+on what is set in CHECK. See L</LEAVE_SRC> if you want your inputs to be
+left unchanged.
+
For example, to convert ISO-8859-1 data into a string in Perl's
internal format:
$string = decode("iso-8859-1", $octets);
-B<CAVEAT>: When you run C<$string = decode("utf8", $octets)>, then $string
+B<CAVEAT>: When you run C<$string = decode("UTF-8", $octets)>, then $string
I<might not be equal to> $octets. Though both contain the same data, the
UTF8 flag for $string is on. See L</"The UTF8 flag">
below.
If the $string is C<undef>, then C<undef> is returned.
+C<bytes2str> may be used as an alias for C<decode>.
+
=head3 find_encoding
[$obj =] find_encoding(ENCODING)
@@ -514,11 +533,11 @@ Returns the I<encoding object> corresponding to I<ENCODING>. Returns
C<undef> if no matching I<ENCODING> is find. The returned object is
what does the actual encoding or decoding.
- $utf8 = decode($name, $bytes);
+ $string = decode($name, $bytes);
is in fact
- $utf8 = do {
+ $string = do {
$obj = find_encoding($name);
croak qq(encoding "$name" not found) unless ref $obj;
$obj->decode($bytes);
@@ -530,8 +549,8 @@ You can therefore save time by reusing this object as follows;
my $enc = find_encoding("iso-8859-1");
while(<>) {
- my $utf8 = $enc->decode($_);
- ... # now do something with $utf8;
+ my $string = $enc->decode($_);
+ ... # now do something with $string;
}
Besides L</decode> and L</encode>, other methods are
@@ -542,6 +561,20 @@ name of the encoding object.
See L<Encode::Encoding> for details.
+=head3 find_mime_encoding
+
+ [$obj =] find_mime_encoding(MIME_ENCODING)
+
+Returns the I<encoding object> corresponding to I<MIME_ENCODING>. Acts
+same as C<find_encoding()> but C<mime_name()> of returned object must
+match to I<MIME_ENCODING>. So as opposite of C<find_encoding()>
+canonical names and aliases are not used when searching for object.
+
+ find_mime_encoding("utf8"); # returns undef because "utf8" is not valid I<MIME_ENCODING>
+ find_mime_encoding("utf-8"); # returns encode object "utf-8-strict"
+ find_mime_encoding("UTF-8"); # same as "utf-8" because I<MIME_ENCODING> is case insensitive
+ find_mime_encoding("utf-8-strict"); returns undef because "utf-8-strict" is not valid I<MIME_ENCODING>
+
=head3 from_to
[$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
@@ -565,13 +598,13 @@ and C<undef> on error.
B<CAVEAT>: The following operations may look the same, but are not:
- from_to($data, "iso-8859-1", "utf8"); #1
+ from_to($data, "iso-8859-1", "UTF-8"); #1
$data = decode("iso-8859-1", $data); #2
Both #1 and #2 make $data consist of a completely valid UTF-8 string,
but only #2 turns the UTF8 flag on. #1 is equivalent to:
- $data = encode("utf8", decode("iso-8859-1", $data));
+ $data = encode("UTF-8", decode("iso-8859-1", $data));
See L</"The UTF8 flag"> below.
@@ -596,7 +629,11 @@ followed by C<encode> as follows:
Equivalent to C<$octets = encode("utf8", $string)>. The characters in
$string are encoded in Perl's internal format, and the result is returned
as a sequence of octets. Because all possible characters in Perl have a
-(loose, not strict) UTF-8 representation, this function cannot fail.
+(loose, not strict) utf8 representation, this function cannot fail.
+
+B<WARNING>: do not use this function for data exchange as it can produce
+not strict utf8 $octets! For strictly valid UTF-8 output use
+C<$octets = encode("UTF-8", $string)>.
=head3 decode_utf8
@@ -604,11 +641,19 @@ as a sequence of octets. Because all possible characters in Perl have a
Equivalent to C<$string = decode("utf8", $octets [, CHECK])>.
The sequence of octets represented by $octets is decoded
-from UTF-8 into a sequence of logical characters.
-Because not all sequences of octets are valid UTF-8,
+from (loose, not strict) utf8 into a sequence of logical characters.
+Because not all sequences of octets are valid not strict utf8,
it is quite possible for this function to fail.
For CHECK, see L</"Handling Malformed Data">.
+B<WARNING>: do not use this function for data exchange as it can produce
+$string with not strict utf8 representation! For strictly valid UTF-8
+$string representation use C<$string = decode("UTF-8", $octets [, CHECK])>.
+
+B<CAVEAT>: the input I<$octets> might be modified in-place depending on
+what is set in CHECK. See L</LEAVE_SRC> if you want your inputs to be
+left unchanged.
+
=head2 Listing available encodings
use Encode;
@@ -840,15 +885,14 @@ octets that represent the fallback character. For instance:
Acts like C<FB_PERLQQ> but U+I<XXXX> is used instead of C<\x{I<XXXX>}>.
-Even the fallback for C<decode> must return octets, which are
-then decoded with the character encoding that C<decode> accepts. So for
+Fallback for C<decode> must return decoded string (sequence of characters)
+and takes a list of ordinal values as its arguments. So for
example if you wish to decode octets as UTF-8, and use ISO-8859-15 as
a fallback for bytes that are not valid UTF-8, you could write
$str = decode 'UTF-8', $octets, sub {
- my $tmp = chr shift;
- from_to $tmp, 'ISO-8859-15', 'UTF-8';
- return $tmp;
+ my $tmp = join '', map chr, @_;
+ return decode 'ISO-8859-15', $tmp;
};
=head1 Defining Encodings
@@ -905,38 +949,11 @@ different kinds of strings and string-operations in Perl: one a
byte-oriented mode for when the internal UTF8 flag is off, and the other a
character-oriented mode for when the internal UTF8 flag is on.
-Here is how C<Encode> handles the UTF8 flag.
-
-=over 2
-
-=item *
-
-When you I<encode>, the resulting UTF8 flag is always B<off>.
-
-=item *
-
-When you I<decode>, the resulting UTF8 flag is B<on>--I<unless> you can
-unambiguously represent data. Here is what we mean by "unambiguously".
-After C<$utf8 = decode("foo", $octet)>,
-
- When $octet is... The UTF8 flag in $utf8 is
- ---------------------------------------------
- In ASCII only (or EBCDIC only) OFF
- In ISO-8859-1 ON
- In any other Encoding ON
- ---------------------------------------------
-
-As you see, there is one exception: in ASCII. That way you can assume
-Goal #1. And with C<Encode>, Goal #2 is assumed but you still have to be
-careful in the cases mentioned in the B<CAVEAT> paragraphs above.
-
This UTF8 flag is not visible in Perl scripts, exactly for the same reason
you cannot (or rather, you I<don't have to>) see whether a scalar contains
a string, an integer, or a floating-point number. But you can still peek
and poke these if you will. See the next section.
-=back
-
=head2 Messing with Perl's Internals
The following API uses parts of Perl's internals in the current
@@ -951,6 +968,13 @@ release.
If I<CHECK> is true, also checks whether I<STRING> contains well-formed
UTF-8. Returns true if successful, false otherwise.
+Typically only necessary for debugging and testing. Don't use this flag as
+a marker to distinguish character and binary data, that should be decided
+for each variable when you write your code.
+
+B<CAVEAT>: If I<STRING> has UTF8 flag set, it does B<NOT> mean that
+I<STRING> is UTF-8 encoded and vice-versa.
+
As of Perl 5.8.1, L<utf8> also has the C<utf8::is_utf8> function.
=head3 _utf8_on
diff --git a/gnu/usr.bin/perl/cpan/Encode/Encode.xs b/gnu/usr.bin/perl/cpan/Encode/Encode.xs
index cd7f7d1050b..774c2b1fec0 100644
--- a/gnu/usr.bin/perl/cpan/Encode/Encode.xs
+++ b/gnu/usr.bin/perl/cpan/Encode/Encode.xs
@@ -1,5 +1,5 @@
/*
- $Id: Encode.xs,v 2.35 2016/01/22 06:33:07 dankogai Exp $
+ $Id: Encode.xs,v 2.43 2018/02/21 12:14:33 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
@@ -31,16 +31,16 @@
UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
-#ifdef UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-# define UTF8_ALLOW_STRICT UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-#else
-# define UTF8_ALLOW_STRICT 0
+#ifndef SvIV_nomg
+#define SvIV_nomg SvIV
#endif
-#define UTF8_ALLOW_NONSTRICT (UTF8_ALLOW_ANY & \
- ~(UTF8_ALLOW_CONTINUATION | \
- UTF8_ALLOW_NON_CONTINUATION | \
- UTF8_ALLOW_LONG))
+#ifndef UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+# define UTF8_DISALLOW_ILLEGAL_INTERCHANGE 0
+# define UTF8_ALLOW_NON_STRICT (UTF8_ALLOW_FE_FF|UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF)
+#else
+# define UTF8_ALLOW_NON_STRICT 0
+#endif
static void
Encode_XSEncoding(pTHX_ encode_t * enc)
@@ -76,27 +76,60 @@ call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
PERL_UNUSED_VAR(orig);
}
+static void
+utf8_safe_downgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify)
+{
+ if (!modify) {
+ SV *tmp = sv_2mortal(newSVpvn((char *)*s, *slen));
+ SvUTF8_on(tmp);
+ if (SvTAINTED(*src))
+ SvTAINTED_on(tmp);
+ *src = tmp;
+ *s = (U8 *)SvPVX(*src);
+ }
+ if (*slen) {
+ if (!utf8_to_bytes(*s, slen))
+ croak("Wide character");
+ SvCUR_set(*src, *slen);
+ }
+ SvUTF8_off(*src);
+}
+
+static void
+utf8_safe_upgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify)
+{
+ if (!modify) {
+ SV *tmp = sv_2mortal(newSVpvn((char *)*s, *slen));
+ if (SvTAINTED(*src))
+ SvTAINTED_on(tmp);
+ *src = tmp;
+ }
+ sv_utf8_upgrade_nomg(*src);
+ *s = (U8 *)SvPV_nomg(*src, *slen);
+}
#define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s"
#define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode"
+#define ERR_DECODE_STR_NOMAP "%s \"%s\" does not map to Unicode"
static SV *
do_fallback_cb(pTHX_ UV ch, SV *fallback_cb)
{
dSP;
int argc;
- SV *retval = newSVpv("",0);
+ SV *retval;
ENTER;
SAVETMPS;
PUSHMARK(sp);
- XPUSHs(sv_2mortal(newSVnv((UV)ch)));
+ XPUSHs(sv_2mortal(newSVuv(ch)));
PUTBACK;
argc = call_sv(fallback_cb, G_SCALAR);
SPAGAIN;
if (argc != 1){
croak("fallback sub must return scalar!");
}
- sv_catsv(retval, POPs);
+ retval = POPs;
+ SvREFCNT_inc(retval);
PUTBACK;
FREETMPS;
LEAVE;
@@ -104,18 +137,42 @@ do_fallback_cb(pTHX_ UV ch, SV *fallback_cb)
}
static SV *
-encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
+do_bytes_fallback_cb(pTHX_ U8 *s, STRLEN slen, SV *fallback_cb)
+{
+ dSP;
+ int argc;
+ STRLEN i;
+ SV *retval;
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(sp);
+ for (i=0; i<slen; ++i)
+ XPUSHs(sv_2mortal(newSVuv(s[i])));
+ PUTBACK;
+ argc = call_sv(fallback_cb, G_SCALAR);
+ SPAGAIN;
+ if (argc != 1){
+ croak("fallback sub must return scalar!");
+ }
+ retval = POPs;
+ SvREFCNT_inc(retval);
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+ return retval;
+}
+
+static SV *
+encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 * s, STRLEN slen,
int check, STRLEN * offset, SV * term, int * retcode,
SV *fallback_cb)
{
- STRLEN slen;
- U8 *s = (U8 *) SvPV(src, slen);
STRLEN tlen = slen;
STRLEN ddone = 0;
STRLEN sdone = 0;
/* We allocate slen+1.
PerlIO dumps core if this value is smaller than this. */
- SV *dst = sv_2mortal(newSV(slen+1));
+ SV *dst = newSV(slen+1);
U8 *d = (U8 *)SvPVX(dst);
STRLEN dlen = SvLEN(dst)-1;
int code = 0;
@@ -127,74 +184,75 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
if (offset) {
s += *offset;
if (slen > *offset){ /* safeguard against slen overflow */
- slen -= *offset;
+ slen -= *offset;
}else{
- slen = 0;
+ slen = 0;
}
tlen = slen;
}
if (slen == 0){
- SvCUR_set(dst, 0);
- SvPOK_only(dst);
- goto ENCODE_END;
+ SvCUR_set(dst, 0);
+ SvPOK_only(dst);
+ goto ENCODE_END;
}
while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check,
- trm, trmlen)) )
+ trm, trmlen)) )
{
- SvCUR_set(dst, dlen+ddone);
- SvPOK_only(dst);
-
- if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL ||
- code == ENCODE_FOUND_TERM) {
- break;
- }
- switch (code) {
- case ENCODE_NOSPACE:
- {
- STRLEN more = 0; /* make sure you initialize! */
- STRLEN sleft;
- sdone += slen;
- ddone += dlen;
- sleft = tlen - sdone;
+ SvCUR_set(dst, dlen+ddone);
+ SvPOK_only(dst);
+
+ if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL ||
+ code == ENCODE_FOUND_TERM) {
+ break;
+ }
+ switch (code) {
+ case ENCODE_NOSPACE:
+ {
+ STRLEN more = 0; /* make sure you initialize! */
+ STRLEN sleft;
+ sdone += slen;
+ ddone += dlen;
+ sleft = tlen - sdone;
#if ENCODE_XS_PROFILE >= 2
- Perl_warn(aTHX_
- "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
- more, sdone, sleft, SvLEN(dst));
+ Perl_warn(aTHX_
+ "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
+ more, sdone, sleft, SvLEN(dst));
#endif
- if (sdone != 0) { /* has src ever been processed ? */
+ if (sdone != 0) { /* has src ever been processed ? */
#if ENCODE_XS_USEFP == 2
- more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone
- - SvLEN(dst);
+ more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone
+ - SvLEN(dst);
#elif ENCODE_XS_USEFP
- more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft);
+ more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft);
#else
- /* safe until SvLEN(dst) == MAX_INT/16 */
- more = (16*SvLEN(dst)+1)/sdone/16 * sleft;
+ /* safe until SvLEN(dst) == MAX_INT/16 */
+ more = (16*SvLEN(dst)+1)/sdone/16 * sleft;
#endif
+ }
+ more += UTF8_MAXLEN; /* insurance policy */
+ d = (U8 *) SvGROW(dst, SvLEN(dst) + more);
+ /* dst need to grow need MORE bytes! */
+ if (ddone >= SvLEN(dst)) {
+ Perl_croak(aTHX_ "Destination couldn't be grown.");
+ }
+ dlen = SvLEN(dst)-ddone-1;
+ d += ddone;
+ s += slen;
+ slen = tlen-sdone;
+ continue;
}
- more += UTF8_MAXLEN; /* insurance policy */
- d = (U8 *) SvGROW(dst, SvLEN(dst) + more);
- /* dst need to grow need MORE bytes! */
- if (ddone >= SvLEN(dst)) {
- Perl_croak(aTHX_ "Destination couldn't be grown.");
- }
- dlen = SvLEN(dst)-ddone-1;
- d += ddone;
- s += slen;
- slen = tlen-sdone;
- continue;
- }
+
case ENCODE_NOREP:
/* encoding */
if (dir == enc->f_utf8) {
STRLEN clen;
UV ch =
- utf8n_to_uvuni(s+slen, (SvCUR(src)-slen),
+ utf8n_to_uvuni(s+slen, (tlen-sdone-slen),
&clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY);
/* if non-representable multibyte prefix at end of current buffer - break*/
- if (clen > tlen - sdone) break;
+ if (clen > tlen - sdone - slen) break;
if (check & ENCODE_DIE_ON_ERR) {
Perl_croak(aTHX_ ERR_ENCODE_NOMAP,
(UV)ch, enc->name[0]);
@@ -208,16 +266,22 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
goto ENCODE_SET_SRC;
}
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
+ STRLEN sublen;
+ char *substr;
SV* subchar =
(fallback_cb != &PL_sv_undef)
? do_fallback_cb(aTHX_ ch, fallback_cb)
- : newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" :
+ : newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04" UVxf "}" :
check & ENCODE_HTMLCREF ? "&#%" UVuf ";" :
"&#x%" UVxf ";", (UV)ch);
- SvUTF8_off(subchar); /* make sure no decoded string gets in */
+ substr = SvPV(subchar, sublen);
+ if (SvUTF8(subchar) && sublen && !utf8_to_bytes((U8 *)substr, &sublen)) { /* make sure no decoded string gets in */
+ SvREFCNT_dec(subchar);
+ croak("Wide character");
+ }
sdone += slen + clen;
- ddone += dlen + SvCUR(subchar);
- sv_catsv(dst, subchar);
+ ddone += dlen + sublen;
+ sv_catpvn(dst, substr, sublen);
SvREFCNT_dec(subchar);
} else {
/* fallback char */
@@ -244,34 +308,37 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
}
if (check &
(ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
+ STRLEN sublen;
+ char *substr;
SV* subchar =
(fallback_cb != &PL_sv_undef)
? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb)
: newSVpvf("\\x%02" UVXf, (UV)s[slen]);
+ substr = SvPVutf8(subchar, sublen);
sdone += slen + 1;
- ddone += dlen + SvCUR(subchar);
- sv_catsv(dst, subchar);
+ ddone += dlen + sublen;
+ sv_catpvn(dst, substr, sublen);
SvREFCNT_dec(subchar);
} else {
sdone += slen + 1;
ddone += dlen + strlen(FBCHAR_UTF8);
- sv_catpv(dst, FBCHAR_UTF8);
+ sv_catpvn(dst, FBCHAR_UTF8, strlen(FBCHAR_UTF8));
}
}
/* settle variables when fallback */
d = (U8 *)SvEND(dst);
- dlen = SvLEN(dst) - ddone - 1;
+ dlen = SvLEN(dst) - ddone - 1;
s = (U8*)SvPVX(src) + sdone;
slen = tlen - sdone;
break;
- default:
- Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
- code, (dir == enc->f_utf8) ? "to" : "from",
- enc->name[0]);
- return &PL_sv_undef;
- }
- }
+ default:
+ Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
+ code, (dir == enc->f_utf8) ? "to" : "from",
+ enc->name[0]);
+ return &PL_sv_undef;
+ }
+ } /* End of looping through the string */
ENCODE_SET_SRC:
if (check && !(check & ENCODE_LEAVE_SRC)){
sdone = SvCUR(src) - (slen+sdone);
@@ -279,6 +346,7 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
sv_setpvn(src, (char*)s+slen, sdone);
}
SvCUR_set(src, sdone);
+ SvSETMAGIC(src);
}
/* warn("check = 0x%X, code = 0x%d\n", check, code); */
@@ -294,7 +362,7 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
}
#endif
- if (offset)
+ if (offset)
*offset += sdone + slen;
ENCODE_END:
@@ -318,14 +386,100 @@ strict_utf8(pTHX_ SV* sv)
return SvTRUE(*svp);
}
+/* Modern perls have the capability to do this more efficiently and portably */
+#ifdef utf8n_to_uvchr_msgs
+# define CAN_USE_BASE_PERL
+#endif
+
+#ifndef CAN_USE_BASE_PERL
+
+/*
+ * https://github.com/dankogai/p5-encode/pull/56#issuecomment-231959126
+ */
+#ifndef UNICODE_IS_NONCHAR
+#define UNICODE_IS_NONCHAR(c) ((c >= 0xFDD0 && c <= 0xFDEF) || (c & 0xFFFE) == 0xFFFE)
+#endif
+
+#ifndef UNICODE_IS_SUPER
+#define UNICODE_IS_SUPER(c) (c > PERL_UNICODE_MAX)
+#endif
+
+#define UNICODE_IS_STRICT(c) (!UNICODE_IS_SURROGATE(c) && !UNICODE_IS_NONCHAR(c) && !UNICODE_IS_SUPER(c))
+
+#ifndef UTF_ACCUMULATION_OVERFLOW_MASK
+#ifndef CHARBITS
+#define CHARBITS CHAR_BIT
+#endif
+#define UTF_ACCUMULATION_OVERFLOW_MASK (((UV) UTF_CONTINUATION_MASK) << ((sizeof(UV) * CHARBITS) - UTF_ACCUMULATION_SHIFT))
+#endif
+
+/*
+ * Convert non strict utf8 sequence of len >= 2 to unicode codepoint
+ */
+static UV
+convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen)
+{
+ UV uv;
+ U8 *ptr = s;
+ bool overflowed = 0;
+
+ uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(UTF8SKIP(s));
+
+ len--;
+ s++;
+
+ while (len--) {
+ if (!UTF8_IS_CONTINUATION(*s)) {
+ *rlen = s-ptr;
+ return 0;
+ }
+ if (uv & UTF_ACCUMULATION_OVERFLOW_MASK)
+ overflowed = 1;
+ uv = UTF8_ACCUMULATE(uv, *s);
+ s++;
+ }
+
+ *rlen = s-ptr;
+
+ if (overflowed || *rlen > (STRLEN)UNISKIP(uv)) {
+ return 0;
+ }
+
+ return uv;
+}
+
+#endif /* CAN_USE_BASE_PERL */
+
static U8*
process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
bool encode, bool strict, bool stop_at_partial)
{
+ /* Copies the purportedly UTF-8 encoded string starting at 's' and ending
+ * at 'e' - 1 to 'dst', checking as it goes along that the string actually
+ * is valid UTF-8. There are two levels of strictness checking. If
+ * 'strict' is FALSE, the string is checked for being well-formed UTF-8, as
+ * extended by Perl. Additionally, if 'strict' is TRUE, above-Unicode code
+ * points, surrogates, and non-character code points are checked for. When
+ * invalid input is encountered, some action is taken, exactly what depends
+ * on the flags in 'check_sv'. 'encode' gives if this is from an encode
+ * operation (if TRUE), or a decode one. This function returns the
+ * position in 's' of the start of the next character beyond where it got
+ * to. If there were no problems, that will be 'e'. If 'stop_at_partial'
+ * is TRUE, if the final character before 'e' is incomplete, but valid as
+ * far as is available, no action will be taken on that partial character,
+ * and the return value will point to its first byte */
+
UV uv;
STRLEN ulen;
SV *fallback_cb;
int check;
+ U8 *d;
+ STRLEN dlen;
+ char esc[UTF8_MAXLEN * 6 + 1];
+ STRLEN i;
+ const U32 flags = (strict)
+ ? UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+ : UTF8_ALLOW_NON_STRICT;
if (SvROK(check_sv)) {
/* croak("UTF-8 decoder doesn't support callback CHECK"); */
@@ -334,101 +488,180 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
}
else {
fallback_cb = &PL_sv_undef;
- check = SvIV(check_sv);
+ check = SvIV_nomg(check_sv);
}
SvPOK_only(dst);
SvCUR_set(dst,0);
+ dlen = (s && e && s < e) ? e-s+1 : 1;
+ d = (U8 *) SvGROW(dst, dlen);
+
+ stop_at_partial = stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL);
+
while (s < e) {
+
+#ifdef CAN_USE_BASE_PERL /* Use the much faster, portable implementation if
+ available */
+
+ /* If there were no errors, this will be 'e'; otherwise it will point
+ * to the first byte of the erroneous input */
+ const U8* e_or_where_failed;
+ bool valid = is_utf8_string_loc_flags(s, e - s, &e_or_where_failed, flags);
+ STRLEN len = e_or_where_failed - s;
+
+ /* Copy as far as was successful */
+ Move(s, d, len, U8);
+ d += len;
+ s = (U8 *) e_or_where_failed;
+
+ /* Are done if it was valid, or we are accepting partial characters and
+ * the only error is that the final bytes form a partial character */
+ if ( LIKELY(valid)
+ || ( stop_at_partial
+ && is_utf8_valid_partial_char_flags(s, e, flags)))
+ {
+ break;
+ }
+
+ /* Here, was not valid. If is 'strict', and is legal extended UTF-8,
+ * we know it is a code point whose value we can calculate, just not
+ * one accepted under strict. Otherwise, it is malformed in some way.
+ * In either case, the system function can calculate either the code
+ * point, or the best substitution for it */
+ uv = utf8n_to_uvchr(s, e - s, &ulen, UTF8_ALLOW_ANY);
+
+#else /* Use code for earlier perls */
+
+ ((void)sizeof(flags)); /* Avoid compiler warning */
+
if (UTF8_IS_INVARIANT(*s)) {
- sv_catpvn(dst, (char *)s, 1);
- s++;
+ *d++ = *s++;
continue;
}
- if (UTF8_IS_START(*s)) {
+ uv = 0;
+ ulen = 1;
+ if (! UTF8_IS_CONTINUATION(*s)) {
+ /* Not an invariant nor a continuation; must be a start byte. (We
+ * can't test for UTF8_IS_START as that excludes things like \xC0
+ * which are start bytes, but always lead to overlongs */
+
U8 skip = UTF8SKIP(s);
if ((s + skip) > e) {
- if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) {
- const U8 *p = s + 1;
- for (; p < e; p++) {
- if (!UTF8_IS_CONTINUATION(*p))
- goto malformed_byte;
- }
+ /* just calculate ulen, in pathological cases can be smaller then e-s */
+ if (e-s >= 2)
+ convert_utf8_multi_seq(s, e-s, &ulen);
+ else
+ ulen = 1;
+
+ if (stop_at_partial && ulen == (STRLEN)(e-s))
break;
- }
goto malformed_byte;
}
- uv = utf8n_to_uvuni(s, e - s, &ulen,
- UTF8_CHECK_ONLY | (strict ? UTF8_ALLOW_STRICT :
- UTF8_ALLOW_NONSTRICT)
- );
-#if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */
- if (strict && uv > PERL_UNICODE_MAX)
- ulen = (STRLEN) -1;
-#endif
- if (ulen == (STRLEN) -1) {
- if (strict) {
- uv = utf8n_to_uvuni(s, e - s, &ulen,
- UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT);
- if (ulen == (STRLEN) -1)
- goto malformed_byte;
- goto malformed;
- }
+ uv = convert_utf8_multi_seq(s, skip, &ulen);
+ if (uv == 0)
goto malformed_byte;
- }
+ else if (strict && !UNICODE_IS_STRICT(uv))
+ goto malformed;
/* Whole char is good */
- sv_catpvn(dst,(char *)s,skip);
+ memcpy(d, s, skip);
+ d += skip;
s += skip;
continue;
}
/* If we get here there is something wrong with alleged UTF-8 */
+ /* uv is used only when encoding */
malformed_byte:
- uv = (UV)*s;
- ulen = 1;
+ if (uv == 0)
+ uv = (UV)*s;
+ if (encode || ulen == 0)
+ ulen = 1;
malformed:
+
+#endif /* The two versions for processing come back together here, for the
+ * error handling code.
+ *
+ * Here, we are looping through the input and found an error.
+ * 'uv' is the code point in error if calculable, or the REPLACEMENT
+ * CHARACTER if not.
+ * 'ulen' is how many bytes of input this iteration of the loop
+ * consumes */
+
+ if (!encode && (check & (ENCODE_DIE_ON_ERR|ENCODE_WARN_ON_ERR|ENCODE_PERLQQ)))
+ for (i=0; i<ulen; ++i) sprintf(esc+4*i, "\\x%02X", s[i]);
if (check & ENCODE_DIE_ON_ERR){
if (encode)
- Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, "utf8");
+ Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, (strict ? "UTF-8" : "utf8"));
else
- Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", uv);
+ Perl_croak(aTHX_ ERR_DECODE_STR_NOMAP, (strict ? "UTF-8" : "utf8"), esc);
}
if (check & ENCODE_WARN_ON_ERR){
if (encode)
Perl_warner(aTHX_ packWARN(WARN_UTF8),
- ERR_ENCODE_NOMAP, uv, "utf8");
+ ERR_ENCODE_NOMAP, uv, (strict ? "UTF-8" : "utf8"));
else
Perl_warner(aTHX_ packWARN(WARN_UTF8),
- ERR_DECODE_NOMAP, "utf8", uv);
+ ERR_DECODE_STR_NOMAP, (strict ? "UTF-8" : "utf8"), esc);
}
if (check & ENCODE_RETURN_ON_ERR) {
break;
}
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
- SV* subchar =
- (fallback_cb != &PL_sv_undef)
- ? do_fallback_cb(aTHX_ uv, fallback_cb)
- : newSVpvf(check & ENCODE_PERLQQ
- ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}")
- : check & ENCODE_HTMLCREF ? "&#%" UVuf ";"
- : "&#x%" UVxf ";", uv);
- if (encode){
- SvUTF8_off(subchar); /* make sure no decoded string gets in */
- }
- sv_catsv(dst, subchar);
+ STRLEN sublen;
+ char *substr;
+ SV* subchar;
+ if (encode) {
+ subchar =
+ (fallback_cb != &PL_sv_undef)
+ ? do_fallback_cb(aTHX_ uv, fallback_cb)
+ : newSVpvf(check & ENCODE_PERLQQ
+ ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}")
+ : check & ENCODE_HTMLCREF ? "&#%" UVuf ";"
+ : "&#x%" UVxf ";", uv);
+ substr = SvPV(subchar, sublen);
+ if (SvUTF8(subchar) && sublen && !utf8_to_bytes((U8 *)substr, &sublen)) { /* make sure no decoded string gets in */
+ SvREFCNT_dec(subchar);
+ croak("Wide character");
+ }
+ } else {
+ if (fallback_cb != &PL_sv_undef) {
+ /* in decode mode we have sequence of wrong bytes */
+ subchar = do_bytes_fallback_cb(aTHX_ s, ulen, fallback_cb);
+ } else {
+ char *ptr = esc;
+ /* ENCODE_PERLQQ is already stored in esc */
+ if (check & (ENCODE_HTMLCREF|ENCODE_XMLCREF))
+ for (i=0; i<ulen; ++i) ptr += sprintf(ptr, ((check & ENCODE_HTMLCREF) ? "&#%u;" : "&#x%02X;"), s[i]);
+ subchar = newSVpvn(esc, strlen(esc));
+ }
+ substr = SvPVutf8(subchar, sublen);
+ }
+ dlen += sublen - ulen;
+ SvCUR_set(dst, d-(U8 *)SvPVX(dst));
+ *SvEND(dst) = '\0';
+ sv_catpvn(dst, substr, sublen);
SvREFCNT_dec(subchar);
+ d = (U8 *) SvGROW(dst, dlen) + SvCUR(dst);
} else {
- sv_catpv(dst, FBCHAR_UTF8);
+ STRLEN fbcharlen = strlen(FBCHAR_UTF8);
+ dlen += fbcharlen - ulen;
+ if (SvLEN(dst) < dlen) {
+ SvCUR_set(dst, d-(U8 *)SvPVX(dst));
+ d = (U8 *) sv_grow(dst, dlen) + SvCUR(dst);
+ }
+ memcpy(d, FBCHAR_UTF8, fbcharlen);
+ d += fbcharlen;
}
s += ulen;
}
+ SvCUR_set(dst, d-(U8 *)SvPVX(dst));
*SvEND(dst) = '\0';
return s;
@@ -440,7 +673,7 @@ MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_
PROTOTYPES: DISABLE
void
-Method_decode_xs(obj,src,check_sv = &PL_sv_no)
+Method_decode(obj,src,check_sv = &PL_sv_no)
SV * obj
SV * src
SV * check_sv
@@ -451,23 +684,32 @@ PREINIT:
SV *dst;
bool renewed = 0;
int check;
-CODE:
-{
- dSP; ENTER; SAVETMPS;
- if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0));
- s = (U8 *) SvPV(src, slen);
- e = (U8 *) SvEND(src);
- check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv);
- /*
+ bool modify;
+ dSP;
+INIT:
+ SvGETMAGIC(src);
+ SvGETMAGIC(check_sv);
+ check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv);
+ modify = (check && !(check & ENCODE_LEAVE_SRC));
+PPCODE:
+ if (!SvOK(src))
+ XSRETURN_UNDEF;
+ s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
+ if (SvUTF8(src))
+ utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify);
+ e = s+slen;
+
+ /*
* PerlIO check -- we assume the object is of PerlIO if renewed
*/
+ ENTER; SAVETMPS;
PUSHMARK(sp);
XPUSHs(obj);
PUTBACK;
if (call_method("renewed",G_SCALAR) == 1) {
SPAGAIN;
renewed = (bool)POPi;
- PUTBACK;
+ PUTBACK;
#if 0
fprintf(stderr, "renewed == %d\n", renewed);
#endif
@@ -475,37 +717,25 @@ CODE:
FREETMPS; LEAVE;
/* end PerlIO check */
- if (SvUTF8(src)) {
- s = utf8_to_bytes(s,&slen);
- if (s) {
- SvCUR_set(src,slen);
- SvUTF8_off(src);
- e = s+slen;
- }
- else {
- croak("Cannot decode string with wide characters");
- }
- }
-
dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */
s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed);
/* Clear out translated part of source unless asked not to */
- if (check && !(check & ENCODE_LEAVE_SRC)){
+ if (modify) {
slen = e-s;
if (slen) {
sv_setpvn(src, (char*)s, slen);
}
SvCUR_set(src, slen);
+ SvSETMAGIC(src);
}
SvUTF8_on(dst);
if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */
ST(0) = dst;
XSRETURN(1);
-}
void
-Method_encode_xs(obj,src,check_sv = &PL_sv_no)
+Method_encode(obj,src,check_sv = &PL_sv_no)
SV * obj
SV * src
SV * check_sv
@@ -515,12 +745,17 @@ PREINIT:
U8 *e;
SV *dst;
int check;
-CODE:
-{
- check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv);
- if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0));
- s = (U8 *) SvPV(src, slen);
- e = (U8 *) SvEND(src);
+ bool modify;
+INIT:
+ SvGETMAGIC(src);
+ SvGETMAGIC(check_sv);
+ check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv);
+ modify = (check && !(check & ENCODE_LEAVE_SRC));
+PPCODE:
+ if (!SvOK(src))
+ XSRETURN_UNDEF;
+ s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
+ e = s+slen;
dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */
if (SvUTF8(src)) {
/* Already encoded */
@@ -556,32 +791,32 @@ CODE:
}
/* Clear out translated part of source unless asked not to */
- if (check && !(check & ENCODE_LEAVE_SRC)){
+ if (modify) {
slen = e-s;
if (slen) {
sv_setpvn(src, (char*)s, slen);
}
SvCUR_set(src, slen);
+ SvSETMAGIC(src);
}
SvPOK_only(dst);
SvUTF8_off(dst);
if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */
ST(0) = dst;
XSRETURN(1);
-}
MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
-PROTOTYPES: ENABLE
+PROTOTYPES: DISABLE
-void
+SV *
Method_renew(obj)
SV * obj
CODE:
-{
PERL_UNUSED_VAR(obj);
- XSRETURN(1);
-}
+ RETVAL = newSVsv(obj);
+OUTPUT:
+ RETVAL
int
Method_renewed(obj)
@@ -592,17 +827,19 @@ CODE:
OUTPUT:
RETVAL
-void
+SV *
Method_name(obj)
SV * obj
+PREINIT:
+ encode_t *enc;
+INIT:
+ enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
CODE:
-{
- encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
- ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
- XSRETURN(1);
-}
+ RETVAL = newSVpvn(enc->name[0], strlen(enc->name[0]));
+OUTPUT:
+ RETVAL
-void
+bool
Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no)
SV * obj
SV * dst
@@ -610,140 +847,142 @@ SV * src
SV * off
SV * term
SV * check_sv
-CODE:
-{
+PREINIT:
int check;
- SV *fallback_cb = &PL_sv_undef;
- encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
- STRLEN offset = (STRLEN)SvIV(off);
+ SV *fallback_cb;
+ bool modify;
+ encode_t *enc;
+ STRLEN offset;
int code = 0;
- if (SvUTF8(src)) {
- sv_utf8_downgrade(src, FALSE);
- }
- if (SvROK(check_sv)){
- fallback_cb = check_sv;
- check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
- }else{
- check = SvIV(check_sv);
- }
- sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check,
- &offset, term, &code, fallback_cb));
+ U8 *s;
+ STRLEN slen;
+ SV *tmp;
+INIT:
+ SvGETMAGIC(src);
+ SvGETMAGIC(check_sv);
+ check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv);
+ fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef;
+ modify = (check && !(check & ENCODE_LEAVE_SRC));
+ enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
+ offset = (STRLEN)SvIV(off);
+CODE:
+ if (!SvOK(src))
+ XSRETURN_NO;
+ s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
+ if (SvUTF8(src))
+ utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify);
+ tmp = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check,
+ &offset, term, &code, fallback_cb);
+ sv_catsv(dst, tmp);
+ SvREFCNT_dec(tmp);
SvIV_set(off, (IV)offset);
- if (code == ENCODE_FOUND_TERM) {
- ST(0) = &PL_sv_yes;
- }else{
- ST(0) = &PL_sv_no;
- }
- XSRETURN(1);
-}
+ RETVAL = (code == ENCODE_FOUND_TERM);
+OUTPUT:
+ RETVAL
-void
+SV *
Method_decode(obj,src,check_sv = &PL_sv_no)
SV * obj
SV * src
SV * check_sv
-CODE:
-{
+PREINIT:
int check;
- SV *fallback_cb = &PL_sv_undef;
- encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
- if (SvUTF8(src)) {
- sv_utf8_downgrade(src, FALSE);
- }
- if (SvROK(check_sv)){
- fallback_cb = check_sv;
- check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
- }else{
- check = SvIV(check_sv);
- }
- ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check,
+ SV *fallback_cb;
+ bool modify;
+ encode_t *enc;
+ U8 *s;
+ STRLEN slen;
+INIT:
+ SvGETMAGIC(src);
+ SvGETMAGIC(check_sv);
+ check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv);
+ fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef;
+ modify = (check && !(check & ENCODE_LEAVE_SRC));
+ enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
+CODE:
+ if (!SvOK(src))
+ XSRETURN_UNDEF;
+ s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
+ if (SvUTF8(src))
+ utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify);
+ RETVAL = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check,
NULL, Nullsv, NULL, fallback_cb);
- SvUTF8_on(ST(0));
- XSRETURN(1);
-}
+ SvUTF8_on(RETVAL);
+OUTPUT:
+ RETVAL
-void
+SV *
Method_encode(obj,src,check_sv = &PL_sv_no)
SV * obj
SV * src
SV * check_sv
-CODE:
-{
+PREINIT:
int check;
- SV *fallback_cb = &PL_sv_undef;
- encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
- sv_utf8_upgrade(src);
- if (SvROK(check_sv)){
- fallback_cb = check_sv;
- check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
- }else{
- check = SvIV(check_sv);
- }
- ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check,
+ SV *fallback_cb;
+ bool modify;
+ encode_t *enc;
+ U8 *s;
+ STRLEN slen;
+INIT:
+ SvGETMAGIC(src);
+ SvGETMAGIC(check_sv);
+ check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv);
+ fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef;
+ modify = (check && !(check & ENCODE_LEAVE_SRC));
+ enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
+CODE:
+ if (!SvOK(src))
+ XSRETURN_UNDEF;
+ s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
+ if (!SvUTF8(src))
+ utf8_safe_upgrade(aTHX_ &src, &s, &slen, modify);
+ RETVAL = encode_method(aTHX_ enc, enc->f_utf8, src, s, slen, check,
NULL, Nullsv, NULL, fallback_cb);
- XSRETURN(1);
-}
+OUTPUT:
+ RETVAL
-void
+bool
Method_needs_lines(obj)
SV * obj
CODE:
-{
- /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */
PERL_UNUSED_VAR(obj);
- ST(0) = &PL_sv_no;
- XSRETURN(1);
-}
+ RETVAL = FALSE;
+OUTPUT:
+ RETVAL
-void
+bool
Method_perlio_ok(obj)
SV * obj
+PREINIT:
+ SV *sv;
CODE:
-{
- /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */
- /* require_pv(PERLIO_FILENAME); */
-
PERL_UNUSED_VAR(obj);
- eval_pv("require PerlIO::encoding", 0);
- SPAGAIN;
-
- if (SvTRUE(get_sv("@", 0))) {
- ST(0) = &PL_sv_no;
- }else{
- ST(0) = &PL_sv_yes;
- }
- XSRETURN(1);
-}
+ sv = eval_pv("require PerlIO::encoding", 0);
+ RETVAL = SvTRUE(sv);
+OUTPUT:
+ RETVAL
-void
+SV *
Method_mime_name(obj)
SV * obj
+PREINIT:
+ encode_t *enc;
+INIT:
+ enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
CODE:
-{
- encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
- SV *retval;
- eval_pv("require Encode::MIME::Name", 0);
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(sp);
+ XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0]))));
+ PUTBACK;
+ call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR);
SPAGAIN;
-
- if (SvTRUE(get_sv("@", 0))) {
- ST(0) = &PL_sv_undef;
- }else{
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0]))));
- PUTBACK;
- call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR);
- SPAGAIN;
- retval = newSVsv(POPs);
- PUTBACK;
- FREETMPS;
- LEAVE;
- /* enc->name[0] */
- ST(0) = retval;
- }
- XSRETURN(1);
-}
+ RETVAL = newSVsv(POPs);
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+OUTPUT:
+ RETVAL
MODULE = Encode PACKAGE = Encode
@@ -752,10 +991,11 @@ PROTOTYPES: ENABLE
I32
_bytes_to_utf8(sv, ...)
SV * sv
+PREINIT:
+ SV * encoding;
+INIT:
+ encoding = items == 2 ? ST(1) : Nullsv;
CODE:
-{
- SV * encoding = items == 2 ? ST(1) : Nullsv;
-
if (encoding)
RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
else {
@@ -769,18 +1009,19 @@ CODE:
Safefree(converted); /* ... so free it */
RETVAL = len;
}
-}
OUTPUT:
RETVAL
I32
_utf8_to_bytes(sv, ...)
SV * sv
+PREINIT:
+ SV * to;
+ SV * check;
+INIT:
+ to = items > 1 ? ST(1) : Nullsv;
+ check = items > 2 ? ST(2) : Nullsv;
CODE:
-{
- SV * to = items > 1 ? ST(1) : Nullsv;
- SV * check = items > 2 ? ST(2) : Nullsv;
-
if (to) {
RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
} else {
@@ -840,7 +1081,6 @@ CODE:
RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
}
}
-}
OUTPUT:
RETVAL
@@ -848,39 +1088,31 @@ bool
is_utf8(sv, check = 0)
SV * sv
int check
+PREINIT:
+ char *str;
+ STRLEN len;
CODE:
-{
- if (SvGMAGICAL(sv)) /* it could be $1, for example */
- sv = newSVsv(sv); /* GMAGIG will be done */
+ SvGETMAGIC(sv); /* SvGETMAGIC() can modify SvOK flag */
+ str = SvOK(sv) ? SvPV_nomg(sv, len) : NULL; /* SvPV() can modify SvUTF8 flag */
RETVAL = SvUTF8(sv) ? TRUE : FALSE;
- if (RETVAL &&
- check &&
- !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
+ if (RETVAL && check && (!str || !is_utf8_string((U8 *)str, len)))
RETVAL = FALSE;
- if (sv != ST(0))
- SvREFCNT_dec(sv); /* it was a temp copy */
-}
OUTPUT:
RETVAL
-#ifndef SvIsCOW
-# define SvIsCOW(sv) (SvREADONLY(sv) && SvFAKE(sv))
-#endif
-
SV *
_utf8_on(sv)
SV * sv
CODE:
-{
- if (SvPOK(sv)) {
- SV *rsv = newSViv(SvUTF8(sv));
- RETVAL = rsv;
- if (SvIsCOW(sv)) sv_force_normal(sv);
- SvUTF8_on(sv);
+ SvGETMAGIC(sv);
+ if (!SvTAINTED(sv) && SvPOKp(sv)) {
+ if (SvTHINKFIRST(sv)) sv_force_normal(sv);
+ RETVAL = boolSV(SvUTF8(sv));
+ SvUTF8_on(sv);
+ SvSETMAGIC(sv);
} else {
- RETVAL = &PL_sv_undef;
+ RETVAL = &PL_sv_undef;
}
-}
OUTPUT:
RETVAL
@@ -888,125 +1120,41 @@ SV *
_utf8_off(sv)
SV * sv
CODE:
-{
- if (SvPOK(sv)) {
- SV *rsv = newSViv(SvUTF8(sv));
- RETVAL = rsv;
- if (SvIsCOW(sv)) sv_force_normal(sv);
- SvUTF8_off(sv);
+ SvGETMAGIC(sv);
+ if (!SvTAINTED(sv) && SvPOKp(sv)) {
+ if (SvTHINKFIRST(sv)) sv_force_normal(sv);
+ RETVAL = boolSV(SvUTF8(sv));
+ SvUTF8_off(sv);
+ SvSETMAGIC(sv);
} else {
- RETVAL = &PL_sv_undef;
+ RETVAL = &PL_sv_undef;
}
-}
OUTPUT:
RETVAL
-int
-DIE_ON_ERR()
-CODE:
- RETVAL = ENCODE_DIE_ON_ERR;
-OUTPUT:
- RETVAL
-
-int
-WARN_ON_ERR()
-CODE:
- RETVAL = ENCODE_WARN_ON_ERR;
-OUTPUT:
- RETVAL
-
-int
-LEAVE_SRC()
-CODE:
- RETVAL = ENCODE_LEAVE_SRC;
-OUTPUT:
- RETVAL
-
-int
-RETURN_ON_ERR()
-CODE:
- RETVAL = ENCODE_RETURN_ON_ERR;
-OUTPUT:
- RETVAL
-
-int
-PERLQQ()
-CODE:
- RETVAL = ENCODE_PERLQQ;
-OUTPUT:
- RETVAL
-
-int
-HTMLCREF()
-CODE:
- RETVAL = ENCODE_HTMLCREF;
-OUTPUT:
- RETVAL
-
-int
-XMLCREF()
-CODE:
- RETVAL = ENCODE_XMLCREF;
-OUTPUT:
- RETVAL
-
-int
-STOP_AT_PARTIAL()
-CODE:
- RETVAL = ENCODE_STOP_AT_PARTIAL;
-OUTPUT:
- RETVAL
-
-int
-FB_DEFAULT()
-CODE:
- RETVAL = ENCODE_FB_DEFAULT;
-OUTPUT:
- RETVAL
-
-int
-FB_CROAK()
-CODE:
- RETVAL = ENCODE_FB_CROAK;
-OUTPUT:
- RETVAL
-
-int
-FB_QUIET()
-CODE:
- RETVAL = ENCODE_FB_QUIET;
-OUTPUT:
- RETVAL
-
-int
-FB_WARN()
-CODE:
- RETVAL = ENCODE_FB_WARN;
-OUTPUT:
- RETVAL
-
-int
-FB_PERLQQ()
-CODE:
- RETVAL = ENCODE_FB_PERLQQ;
-OUTPUT:
- RETVAL
-
-int
-FB_HTMLCREF()
-CODE:
- RETVAL = ENCODE_FB_HTMLCREF;
-OUTPUT:
- RETVAL
-
-int
-FB_XMLCREF()
+void
+onBOOT()
CODE:
- RETVAL = ENCODE_FB_XMLCREF;
-OUTPUT:
- RETVAL
+{
+#include "def_t.exh"
+}
BOOT:
{
-#include "def_t.exh"
+ HV *stash = gv_stashpvn("Encode", strlen("Encode"), GV_ADD);
+ newCONSTSUB(stash, "DIE_ON_ERR", newSViv(ENCODE_DIE_ON_ERR));
+ newCONSTSUB(stash, "WARN_ON_ERR", newSViv(ENCODE_WARN_ON_ERR));
+ newCONSTSUB(stash, "RETURN_ON_ERR", newSViv(ENCODE_RETURN_ON_ERR));
+ newCONSTSUB(stash, "LEAVE_SRC", newSViv(ENCODE_LEAVE_SRC));
+ newCONSTSUB(stash, "PERLQQ", newSViv(ENCODE_PERLQQ));
+ newCONSTSUB(stash, "HTMLCREF", newSViv(ENCODE_HTMLCREF));
+ newCONSTSUB(stash, "XMLCREF", newSViv(ENCODE_XMLCREF));
+ newCONSTSUB(stash, "STOP_AT_PARTIAL", newSViv(ENCODE_STOP_AT_PARTIAL));
+ newCONSTSUB(stash, "FB_DEFAULT", newSViv(ENCODE_FB_DEFAULT));
+ newCONSTSUB(stash, "FB_CROAK", newSViv(ENCODE_FB_CROAK));
+ newCONSTSUB(stash, "FB_QUIET", newSViv(ENCODE_FB_QUIET));
+ newCONSTSUB(stash, "FB_WARN", newSViv(ENCODE_FB_WARN));
+ newCONSTSUB(stash, "FB_PERLQQ", newSViv(ENCODE_FB_PERLQQ));
+ newCONSTSUB(stash, "FB_HTMLCREF", newSViv(ENCODE_FB_HTMLCREF));
+ newCONSTSUB(stash, "FB_XMLCREF", newSViv(ENCODE_FB_XMLCREF));
}
diff --git a/gnu/usr.bin/perl/cpan/Encode/Makefile.PL b/gnu/usr.bin/perl/cpan/Encode/Makefile.PL
index e0372ca2832..8c20d20226f 100644
--- a/gnu/usr.bin/perl/cpan/Encode/Makefile.PL
+++ b/gnu/usr.bin/perl/cpan/Encode/Makefile.PL
@@ -1,16 +1,30 @@
#
-# $Id: Makefile.PL,v 2.16 2015/09/24 02:19:21 dankogai Exp $
+# $Id: Makefile.PL,v 2.22 2017/10/06 22:21:53 dankogai Exp $
#
use 5.007003;
use strict;
use warnings;
+use utf8;
use ExtUtils::MakeMaker;
use File::Spec;
+use Config;
# Just for sure :)
my %ARGV = map { my @r = split /=/,$_; defined $r[1] or $r[1]=1; @r } @ARGV;
$ARGV{DEBUG} and warn "$_ => $ARGV{$_}\n" for sort keys %ARGV;
$ENV{PERL_CORE} ||= $ARGV{PERL_CORE} if $ARGV{PERL_CORE};
+# similar strictness as in core
+my $ccflags = $Config{ccflags};
+if (!$ENV{PERL_CORE}) {
+ if (my $gccver = $Config{gccversion}) {
+ $gccver =~ s/\.//g; $gccver =~ s/ .*//;
+ $gccver .= "0" while length $gccver < 3;
+ $gccver = 0+$gccver;
+ $ccflags .= ' -Werror=declaration-after-statement' if $gccver > 412;
+ $ccflags .= ' -Wpointer-sign' if !$Config{d_cplusplus} and $gccver > 400;
+ $ccflags .= ' -fpermissive' if $Config{d_cplusplus};
+ }
+}
my %tables =
(
@@ -39,17 +53,24 @@ WriteMakefile(
NAME => "Encode",
EXE_FILES => \@exe_files,
VERSION_FROM => 'Encode.pm',
+ ABSTRACT_FROM=> 'Encode.pm',
+ AUTHOR => 'Dan Kogai <dankogai@dan.co.jp>',
OBJECT => '$(O_FILES)',
'dist' => {
COMPRESS => 'gzip -9f',
SUFFIX => 'gz',
DIST_DEFAULT => 'all tardist',
},
+ CCFLAGS => $ccflags,
INC => '-I' . File::Spec->catfile( '.', 'Encode' ),
LICENSE => 'perl',
PREREQ_PM => {
Exporter => '5.57', # use Exporter 'import';
parent => '0.221', # version bundled with 5.10.1
+ Storable => '0', # bundled with Perl 5.7.3
+ },
+ TEST_REQUIRES => {
+ 'Test::More' => '0.81_01',
},
PMLIBDIRS => \@pmlibdirs,
INSTALLDIRS => ($] < 5.011 ? 'perl' : 'site'),
@@ -57,6 +78,91 @@ WriteMakefile(
resources => {
repository => 'https://github.com/dankogai/p5-encode',
},
+ x_contributors => [
+ 'Alex Davies <alex.davies@talktalk.net>',
+ 'Alex Kapranoff <alex@kapranoff.ru>',
+ 'Alex Vandiver <alex@chmrr.net>',
+ 'Andreas J. Koenig <andreas.koenig@anima.de>',
+ 'Andrew Pennebaker <andrew.pennebaker@networkedinsights.com>',
+ 'Andy Grundman <andyg@activestate.com>',
+ 'Anton Tagunov <tagunov@motor.ru>',
+ 'Autrijus Tang <autrijus@autrijus.org>',
+ 'Benjamin Goldberg <goldbb2@earthlink.net>',
+ 'Bjoern Hoehrmann <derhoermi@gmx.net>',
+ 'Bjoern Jacke <debianbugs@j3e.de>',
+ 'bulk88 <bulk88@hotmail.com>',
+ 'Craig A. Berry <craigberry@mac.com>',
+ 'Curtis Jewell <csjewell@cpan.org>',
+ 'Dan Kogai <dankogai@dan.co.jp>',
+ 'Dave Evans <dave@rudolf.org.uk>',
+ 'David Golden <dagolden@cpan.org>',
+ 'David Steinbrunner <dsteinbrunner@pobox.com>',
+ 'Deng Liu <dengliu@ntu.edu.tw>',
+ 'Dominic Dunlop <domo@computer.org>',
+ 'drry',
+ 'Elizabeth Mattijsen <liz@dijkmat.nl>',
+ 'Flavio Poletti <flavio@polettix.it>',
+ 'Gerrit P. Haase <gp@familiehaase.de>',
+ 'Gisle Aas <gisle@ActiveState.com>',
+ 'Graham Barr <gbarr@pobox.com>',
+ 'Graham Knop <haarg@haarg.org>',
+ 'Graham Ollis <perl@wdlabs.com>',
+ 'Gurusamy Sarathy <gsar@activestate.com>',
+ 'H.Merijn Brand <h.m.brand@xs4all.nl>',
+ 'Hugo van der Sanden <hv@crypt.org>',
+ 'chansen <chansen@cpan.org>',
+ 'Chris Nandor <pudge@pobox.com>',
+ 'Inaba Hiroto <inaba@st.rim.or.jp>',
+ 'Jarkko Hietaniemi <jhi@iki.fi>',
+ 'Jesse Vincent <jesse@fsck.com>',
+ 'Jungshik Shin <jshin@mailaps.org>',
+ 'Karen Etheridge <ether@cpan.org>',
+ 'Karl Williamson <khw@cpan.org>',
+ 'Kenichi Ishigaki <ishigaki@cpan.org>',
+ 'KONNO Hiroharu <hiroharu.konno@bowneglobal.co.jp>',
+ 'Laszlo Molnar <ml1050@freemail.hu>',
+ 'Makamaka <makamaka@donzoko.net>',
+ 'Mark-Jason Dominus <mjd@plover.com>',
+ 'Masahiro Iuchi <masahiro.iuchi@gmail.com>',
+ 'MATSUNO Tokuhiro <tokuhirom+cpan@gmail.com>',
+ 'Mattia Barbon <mbarbon@dsi.unive.it>',
+ 'Michael G Schwern <schwern@pobox.com>',
+ 'Michael LaGrasta <michael@lagrasta.com>',
+ 'Miron Cuperman <miron@hyper.to>',
+ 'Moritz Lenz <moritz@faui2k3.org>',
+ 'MORIYAMA Masayuki <msyk@mtg.biglobe.ne.jp>',
+ 'Nick Ing-Simmons <nick@ing-simmons.net>',
+ 'Nicholas Clark <nick@ccl4.org>',
+ 'Olivier Mengué <dolmen@cpan.org>',
+ 'otsune',
+ 'Pali <pali@cpan.org>',
+ 'Paul Marquess <paul_marquess@yahoo.co.uk>',
+ 'Peter Prymmer <pvhp@best.com>',
+ 'Peter Rabbitson <ribasushi@cpan.org>',
+ 'Philip Newton <pne@cpan.org>',
+ 'Piotr Fusik <pfusik@op.pl>',
+ 'Rafael Garcia-Suarez <rgarciasuarez@mandriva.com>',
+ 'Randy Stauner <randy@magnificent-tears.com>',
+ 'Reini Urban <rurban@cpan.org>',
+ 'Robin Barker <rmb1@cise.npl.co.uk>',
+ 'SADAHIRO Tomoyuki <SADAHIRO@cpan.org>',
+ 'Simon Cozens <simon@netthink.co.uk>',
+ 'Slaven Rezic <SREZIC@cpan.org>',
+ 'Spider Boardman <spider@web.zk3.dec.com>',
+ 'Steve Hay <steve.m.hay@googlemail.com>',
+ 'Steve Peters <steve@fisharerojo.org>',
+ 'SUGAWARA Hajime <sugawara@hdt.co.jp>',
+ 'SUZUKI Norio <ZAP00217@nifty.com>',
+ 'szr8 <blz.marcel@gmail.com>',
+ 'Tatsuhiko Miyagawa <miyagawa@bulknews.net>',
+ 'Tels <perl_dummy@bloodgate.com>',
+ 'Tony Cook <tony@develop-help.com>',
+ 'Vadim Konovalov <vkonovalov@peterstar.ru>',
+ 'Victor <victor@vsespb.ru>',
+ 'Ville Skyttä <ville.skytta@iki.fi>',
+ 'Vincent van Dam <vvandam@sandvine.com>',
+ 'Yitzchak Scott-Thoennes <sthoenna@efn.org>',
+ ],
},
);
diff --git a/gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.pm b/gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.pm
index 7dec3e38159..2a8b477784c 100644
--- a/gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.pm
@@ -2,9 +2,8 @@ package Encode::Unicode;
use strict;
use warnings;
-no warnings 'redefine';
-our $VERSION = do { my @r = ( q$Revision: 2.15 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.17 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
@@ -13,7 +12,7 @@ XSLoader::load( __PACKAGE__, $VERSION );
# Object Generator 8 transcoders all at once!
#
-require Encode;
+use Encode ();
our %BOM_Unknown = map { $_ => 1 } qw(UTF-16 UTF-32);
@@ -34,12 +33,13 @@ for my $name (
$endian = ( $3 eq 'BE' ) ? 'n' : ( $3 eq 'LE' ) ? 'v' : '';
$size == 4 and $endian = uc($endian);
- $Encode::Encoding{$name} = bless {
+ my $obj = bless {
Name => $name,
size => $size,
endian => $endian,
ucs2 => $ucs2,
} => __PACKAGE__;
+ Encode::define_encoding($obj, $name);
}
use parent qw(Encode::Encoding);
@@ -52,12 +52,6 @@ sub renew {
return $clone;
}
-# There used to be a perl implementation of (en|de)code but with
-# XS version is ripe, perl version is zapped for optimal speed
-
-*decode = \&decode_xs;
-*encode = \&encode_xs;
-
1;
__END__
diff --git a/gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.xs b/gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.xs
index 3bad2adae03..b459786d16a 100644
--- a/gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.xs
+++ b/gnu/usr.bin/perl/cpan/Encode/Unicode/Unicode.xs
@@ -1,5 +1,5 @@
/*
- $Id: Unicode.xs,v 2.14 2016/01/22 06:33:07 dankogai Exp $
+ $Id: Unicode.xs,v 2.17 2018/02/08 00:26:15 dankogai Exp $
*/
#define PERL_NO_GET_CONTEXT
@@ -125,36 +125,62 @@ PROTOTYPES: DISABLE
#define attr(k, l) (hv_exists((HV *)SvRV(obj),k,l) ? \
*hv_fetch((HV *)SvRV(obj),k,l,0) : &PL_sv_undef)
-#define attr_true(k, l) (hv_exists((HV *)SvRV(obj),k,l) ? \
- SvTRUE(*hv_fetch((HV *)SvRV(obj),k,l,0)) : FALSE)
void
-decode_xs(obj, str, check = 0)
+decode(obj, str, check = 0)
SV * obj
SV * str
IV check
CODE:
{
- U8 endian = *((U8 *)SvPV_nolen(attr("endian", 6)));
- int size = SvIV(attr("size", 4));
+ SV *sve = attr("endian", 6);
+ U8 endian = *((U8 *)SvPV_nolen(sve));
+ SV *svs = attr("size", 4);
+ int size = SvIV(svs);
int ucs2 = -1; /* only needed in the event of surrogate pairs */
SV *result = newSVpvn("",0);
STRLEN usize = (size > 0 ? size : 1); /* protect against rogue size<=0 */
STRLEN ulen;
STRLEN resultbuflen;
U8 *resultbuf;
- U8 *s = (U8 *)SvPVbyte(str,ulen);
- U8 *e = (U8 *)SvEND(str);
+ U8 *s;
+ U8 *e;
+ bool modify = (check && !(check & ENCODE_LEAVE_SRC));
+ bool temp_result;
+
+ SvGETMAGIC(str);
+ if (!SvOK(str))
+ XSRETURN_UNDEF;
+ s = modify ? (U8 *)SvPV_force_nomg(str, ulen) : (U8 *)SvPV_nomg(str, ulen);
+ if (SvUTF8(str)) {
+ if (!modify) {
+ SV *tmp = sv_2mortal(newSVpvn((char *)s, ulen));
+ SvUTF8_on(tmp);
+ if (SvTAINTED(str))
+ SvTAINTED_on(tmp);
+ str = tmp;
+ s = (U8 *)SvPVX(str);
+ }
+ if (ulen) {
+ if (!utf8_to_bytes(s, &ulen))
+ croak("Wide character");
+ SvCUR_set(str, ulen);
+ }
+ SvUTF8_off(str);
+ }
+ e = s+ulen;
+
/* Optimise for the common case of being called from PerlIOEncode_fill()
with a standard length buffer. In this case the result SV's buffer is
only used temporarily, so we can afford to allocate the maximum needed
and not care about unused space. */
- const bool temp_result = (ulen == PERLIO_BUFSIZ);
+ temp_result = (ulen == PERLIO_BUFSIZ);
ST(0) = sv_2mortal(result);
SvUTF8_on(result);
if (!endian && s+size <= e) {
+ SV *sv;
UV bom;
endian = (size == 4) ? 'N' : 'n';
bom = enc_unpack(aTHX_ &s,e,size,endian);
@@ -183,8 +209,9 @@ CODE:
}
#if 1
/* Update endian for next sequence */
- if (attr_true("renewed", 7)) {
- hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+ sv = attr("renewed", 7);
+ if (SvTRUE(sv)) {
+ (void)hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
}
#endif
}
@@ -202,11 +229,12 @@ CODE:
U8 *d;
if (issurrogate(ord)) {
if (ucs2 == -1) {
- ucs2 = attr_true("ucs2", 4);
+ SV *sv = attr("ucs2", 4);
+ ucs2 = SvTRUE(sv);
}
if (ucs2 || size == 4) {
if (check) {
- croak("%"SVf":no surrogates allowed %"UVxf,
+ croak("%" SVf ":no surrogates allowed %" UVxf,
*hv_fetch((HV *)SvRV(obj),"Name",4,0),
ord);
}
@@ -216,7 +244,7 @@ CODE:
UV lo;
if (!isHiSurrogate(ord)) {
if (check) {
- croak("%"SVf":Malformed HI surrogate %"UVxf,
+ croak("%" SVf ":Malformed HI surrogate %" UVxf,
*hv_fetch((HV *)SvRV(obj),"Name",4,0),
ord);
}
@@ -231,7 +259,7 @@ CODE:
break;
}
else {
- croak("%"SVf":Malformed HI surrogate %"UVxf,
+ croak("%" SVf ":Malformed HI surrogate %" UVxf,
*hv_fetch((HV *)SvRV(obj),"Name",4,0),
ord);
}
@@ -244,7 +272,7 @@ CODE:
lo = enc_unpack(aTHX_ &s,e,size,endian);
if (!isLoSurrogate(lo)) {
if (check) {
- croak("%"SVf":Malformed LO surrogate %"UVxf,
+ croak("%" SVf ":Malformed LO surrogate %" UVxf,
*hv_fetch((HV *)SvRV(obj),"Name",4,0),
ord);
}
@@ -262,7 +290,7 @@ CODE:
if ((ord & 0xFFFE) == 0xFFFE || (ord >= 0xFDD0 && ord <= 0xFDEF)) {
if (check) {
- croak("%"SVf":Unicode character %"UVxf" is illegal",
+ croak("%" SVf ":Unicode character %" UVxf " is illegal",
*hv_fetch((HV *)SvRV(obj),"Name",4,0),
ord);
} else {
@@ -287,7 +315,7 @@ CODE:
resultbuflen = SvLEN(result);
}
- d = uvuni_to_utf8_flags(resultbuf+SvCUR(result), ord,
+ d = uvchr_to_utf8_flags(resultbuf+SvCUR(result), ord,
UNICODE_WARN_ILLEGAL_INTERCHANGE);
SvCUR_set(result, d - (U8 *)SvPVX(result));
}
@@ -295,7 +323,7 @@ CODE:
if (s < e) {
/* unlikely to happen because it's fixed-length -- dankogai */
if (check & ENCODE_WARN_ON_ERR) {
- Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character",
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),"%" SVf ":Partial character",
*hv_fetch((HV *)SvRV(obj),"Name",4,0));
}
}
@@ -308,6 +336,7 @@ CODE:
SvCUR_set(str,0);
}
*SvEND(str) = '\0';
+ SvSETMAGIC(str);
}
if (!temp_result) shrink_buffer(result);
@@ -316,25 +345,46 @@ CODE:
}
void
-encode_xs(obj, utf8, check = 0)
+encode(obj, utf8, check = 0)
SV * obj
SV * utf8
IV check
CODE:
{
- U8 endian = *((U8 *)SvPV_nolen(attr("endian", 6)));
- const int size = SvIV(attr("size", 4));
+ SV *sve = attr("endian", 6);
+ U8 endian = *((U8 *)SvPV_nolen(sve));
+ SV *svs = attr("size", 4);
+ const int size = SvIV(svs);
int ucs2 = -1; /* only needed if there is invalid_ucs2 input */
const STRLEN usize = (size > 0 ? size : 1);
SV *result = newSVpvn("", 0);
STRLEN ulen;
- U8 *s = (U8 *) SvPVutf8(utf8, ulen);
- const U8 *e = (U8 *) SvEND(utf8);
+ U8 *s;
+ U8 *e;
+ bool modify = (check && !(check & ENCODE_LEAVE_SRC));
+ bool temp_result;
+
+ SvGETMAGIC(utf8);
+ if (!SvOK(utf8))
+ XSRETURN_UNDEF;
+ s = modify ? (U8 *)SvPV_force_nomg(utf8, ulen) : (U8 *)SvPV_nomg(utf8, ulen);
+ if (!SvUTF8(utf8)) {
+ if (!modify) {
+ SV *tmp = sv_2mortal(newSVpvn((char *)s, ulen));
+ if (SvTAINTED(utf8))
+ SvTAINTED_on(tmp);
+ utf8 = tmp;
+ }
+ sv_utf8_upgrade_nomg(utf8);
+ s = (U8 *)SvPV_nomg(utf8, ulen);
+ }
+ e = s+ulen;
+
/* Optimise for the common case of being called from PerlIOEncode_flush()
with a standard length buffer. In this case the result SV's buffer is
only used temporarily, so we can afford to allocate the maximum needed
and not care about unused space. */
- const bool temp_result = (ulen == PERLIO_BUFSIZ);
+ temp_result = (ulen == PERLIO_BUFSIZ);
ST(0) = sv_2mortal(result);
@@ -344,18 +394,20 @@ CODE:
SvGROW(result, ((ulen+1) * usize));
if (!endian) {
+ SV *sv;
endian = (size == 4) ? 'N' : 'n';
enc_pack(aTHX_ result,size,endian,BOM_BE);
#if 1
/* Update endian for next sequence */
- if (attr_true("renewed", 7)) {
- hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+ sv = attr("renewed", 7);
+ if (SvTRUE(sv)) {
+ (void)hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
}
#endif
}
while (s < e && s+UTF8SKIP(s) <= e) {
STRLEN len;
- UV ord = utf8n_to_uvuni(s, e-s, &len, (UTF8_DISALLOW_SURROGATE
+ UV ord = utf8n_to_uvchr(s, e-s, &len, (UTF8_DISALLOW_SURROGATE
|UTF8_WARN_SURROGATE
|UTF8_DISALLOW_FE_FF
|UTF8_WARN_FE_FF
@@ -364,11 +416,12 @@ CODE:
if (size != 4 && invalid_ucs2(ord)) {
if (!issurrogate(ord)) {
if (ucs2 == -1) {
- ucs2 = attr_true("ucs2", 4);
+ SV *sv = attr("ucs2", 4);
+ ucs2 = SvTRUE(sv);
}
if (ucs2 || ord > 0x10FFFF) {
if (check) {
- croak("%"SVf":code point \"\\x{%"UVxf"}\" too high",
+ croak("%" SVf ":code point \"\\x{%" UVxf "}\" too high",
*hv_fetch((HV *)SvRV(obj),"Name",4,0),ord);
}
enc_pack(aTHX_ result,size,endian,FBCHAR);
@@ -394,7 +447,7 @@ CODE:
But this is critical when you choose to LEAVE_SRC
in which case we die */
if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)) {
- Perl_croak(aTHX_ "%"SVf":partial character is not allowed "
+ Perl_croak(aTHX_ "%" SVf ":partial character is not allowed "
"when CHECK = 0x%" UVuf,
*hv_fetch((HV *)SvRV(obj),"Name",4,0), check);
}
@@ -408,12 +461,11 @@ CODE:
SvCUR_set(utf8,0);
}
*SvEND(utf8) = '\0';
+ SvSETMAGIC(utf8);
}
if (!temp_result) shrink_buffer(result);
if (SvTAINTED(utf8)) SvTAINTED_on(result); /* propagate taintedness */
- SvSETMAGIC(utf8);
-
XSRETURN(1);
}
diff --git a/gnu/usr.bin/perl/cpan/Encode/bin/enc2xs b/gnu/usr.bin/perl/cpan/Encode/bin/enc2xs
index f8d9f52f2a2..619b64b7573 100644
--- a/gnu/usr.bin/perl/cpan/Encode/bin/enc2xs
+++ b/gnu/usr.bin/perl/cpan/Encode/bin/enc2xs
@@ -11,7 +11,7 @@ use warnings;
use Getopt::Std;
use Config;
my @orig_ARGV = @ARGV;
-our $VERSION = do { my @r = (q$Revision: 2.18 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
# These may get re-ordered.
# RAW is a do_now as inserted by &enter
@@ -123,7 +123,10 @@ my %encode_types = (U => \&encode_U,
);
# Win32 does not expand globs on command line
-eval "\@ARGV = map(glob(\$_),\@ARGV)" if ($^O eq 'MSWin32');
+if ($^O eq 'MSWin32' and !$ENV{PERL_CORE}) {
+ eval "\@ARGV = map(glob(\$_),\@ARGV)";
+ @ARGV = @orig_ARGV unless @ARGV;
+}
my %opt;
# I think these are:
@@ -134,6 +137,8 @@ my %opt;
# -o <output> to specify the output file name (else it's the first arg)
# -f <inlist> to give a file with a list of input files (else use the args)
# -n <name> to name the encoding (else use the basename of the input file.
+#Getopt::Long::Configure("bundling");
+#GetOptions(\%opt, qw(C M=s S Q q O o=s f=s n=s v));
getopts('CM:SQqOo:f:n:v',\%opt);
$opt{M} and make_makefile_pl($opt{M}, @ARGV);
@@ -196,9 +201,9 @@ sub compiler_info {
# This really should go first, else the die here causes empty (non-erroneous)
# output files to be written.
my @encfiles;
-if (exists $opt{'f'}) {
+if (exists $opt{f}) {
# -F is followed by name of file containing list of filenames
- my $flist = $opt{'f'};
+ my $flist = $opt{f};
open(FLIST,$flist) || die "Cannot open $flist:$!";
chomp(@encfiles = <FLIST>);
close(FLIST);
@@ -206,9 +211,15 @@ if (exists $opt{'f'}) {
@encfiles = @ARGV;
}
-my $cname = (exists $opt{'o'}) ? $opt{'o'} : shift(@ARGV);
+my $cname = $opt{o} ? $opt{o} : shift(@ARGV);
+unless ($cname) { #debuging a win32 nmake error-only. works via cmdline
+ print "\nARGV:";
+ print "$_ " for @ARGV;
+ print "\nopt:";
+ print " $_ => ",defined $opt{$_}?$opt{$_}:"undef","\n" for keys %opt;
+}
chmod(0666,$cname) if -f $cname && !-w $cname;
-open(C,">$cname") || die "Cannot open $cname:$!";
+open(C,">", $cname) || die "Cannot open $cname:$!";
my $dname = $cname;
my $hname = $cname;
@@ -220,10 +231,10 @@ if ($cname =~ /\.(c|xs)$/i) # VMS may have upcased filenames with DECC$ARGV_PARS
$doC = 1;
$dname =~ s/(\.[^\.]*)?$/.exh/;
chmod(0666,$dname) if -f $cname && !-w $dname;
- open(D,">$dname") || die "Cannot open $dname:$!";
+ open(D,">", $dname) || die "Cannot open $dname:$!";
$hname =~ s/(\.[^\.]*)?$/.h/;
chmod(0666,$hname) if -f $cname && !-w $hname;
- open(H,">$hname") || die "Cannot open $hname:$!";
+ open(H,">", $hname) || die "Cannot open $hname:$!";
foreach my $fh (\*C,\*D,\*H)
{
@@ -469,7 +480,9 @@ sub compile_ucm
$erep = $attr{'subchar'};
$erep =~ s/^\s+//; $erep =~ s/\s+$//;
}
- print "Reading $name ($cs)\n";
+ print "Reading $name ($cs)\n"
+ unless defined $ENV{MAKEFLAGS}
+ and $ENV{MAKEFLAGS} =~ /\b(s|silent|quiet)\b/;
my $nfb = 0;
my $hfb = 0;
while (<$fh>)
@@ -755,9 +768,17 @@ sub addstrings
if ($a->{'Forward'})
{
my ($cpp, $static, $sized) = compiler_info(1);
- my $var = $static ? 'static const' : 'extern';
my $count = $sized ? scalar(@{$a->{'Entries'}}) : '';
- print $fh "$var encpage_t $name\[$count];\n";
+ if ($static) {
+ # we cannot ask Config for d_plusplus since we can override CC=g++-6 on the cmdline
+ print $fh "#ifdef __cplusplus\n"; # -fpermissive since g++-6
+ print $fh "extern encpage_t $name\[$count];\n";
+ print $fh "#else\n";
+ print $fh "static const encpage_t $name\[$count];\n";
+ print $fh "#endif\n";
+ } else {
+ print $fh "extern encpage_t $name\[$count];\n";
+ }
}
$a->{'DoneStrings'} = 1;
foreach my $b (@{$a->{'Entries'}})
@@ -848,9 +869,16 @@ sub outtable
outtable($fh,$t,$bigname) unless $t->{'Done'};
}
my ($cpp, $static) = compiler_info(0);
- my $var = $static ? 'static const ' : '';
- print $fh "\n${var}encpage_t $name\[",
- scalar(@{$a->{'Entries'}}), "] = {\n";
+ my $count = scalar(@{$a->{'Entries'}});
+ if ($static) {
+ print $fh "#ifdef __cplusplus\n"; # -fpermissive since g++-6
+ print $fh "encpage_t $name\[$count] = {\n";
+ print $fh "#else\n";
+ print $fh "static const encpage_t $name\[$count] = {\n";
+ print $fh "#endif\n";
+ } else {
+ print $fh "\nencpage_t $name\[$count] = {\n";
+ }
foreach my $b (@{$a->{'Entries'}})
{
my ($sc,$ec,$out,$t,$end,$l,$fb) = @$b;
@@ -1010,8 +1038,7 @@ sub find_e2x{
sub make_makefile_pl
{
- eval { require Encode; };
- $@ and die "You need to install Encode to use enc2xs -M\nerror: $@\n";
+ eval { require Encode } or die "You need to install Encode to use enc2xs -M\nerror: $@\n";
# our used for variable expansion
$_Enc2xs = $0;
$_Version = $VERSION;
@@ -1035,8 +1062,7 @@ use vars qw(
);
sub make_configlocal_pm {
- eval { require Encode; };
- $@ and die "Unable to require Encode: $@\n";
+ eval { require Encode } or die "Unable to require Encode: $@\n";
eval { require File::Spec; };
# our used for variable expantion
@@ -1056,8 +1082,7 @@ sub make_configlocal_pm {
$mod =~ s/.*\bEncode\b/Encode/o;
$mod =~ s/\.pm\z//o;
$mod =~ s,/,::,og;
- eval qq{ require $mod; };
- return if $@;
+ eval qq{ require $mod; } or return;
warn qq{ require $mod;\n};
for my $enc ( Encode->encodings() ) {
no warnings;
@@ -1091,8 +1116,7 @@ sub _mkversion{
}
sub _print_expand{
- eval { require File::Basename; };
- $@ and die "File::Basename needed. Are you on miniperl?;\nerror: $@\n";
+ eval { require File::Basename } or die "File::Basename needed. Are you on miniperl?;\nerror: $@\n";
File::Basename->import();
my ($src, $dst, $clobber) = @_;
if (!$clobber and -e $dst){
@@ -1104,7 +1128,7 @@ sub _print_expand{
if ((my $d = dirname($dst)) ne '.'){
-d $d or mkdir $d, 0755 or die "mkdir $d : $!";
}
- open my $out, ">$dst" or die "$!";
+ open my $out, ">", $dst or die "$!";
my $asis = 0;
while (<$in>){
if (/^#### END_OF_HEADER/){
diff --git a/gnu/usr.bin/perl/cpan/Encode/bin/piconv b/gnu/usr.bin/perl/cpan/Encode/bin/piconv
index 60b2a59b78b..2218d16f396 100644
--- a/gnu/usr.bin/perl/cpan/Encode/bin/piconv
+++ b/gnu/usr.bin/perl/cpan/Encode/bin/piconv
@@ -1,5 +1,5 @@
#!./perl
-# $Id: piconv,v 2.7 2014/05/31 09:48:48 dankogai Exp $
+# $Id: piconv,v 2.8 2016/08/04 03:15:58 dankogai Exp $
#
BEGIN { pop @INC if $INC[-1] eq '.' }
use 5.8.0;
diff --git a/gnu/usr.bin/perl/cpan/Encode/bin/ucmlint b/gnu/usr.bin/perl/cpan/Encode/bin/ucmlint
index 25e0d67ef60..a31a7a28f66 100644
--- a/gnu/usr.bin/perl/cpan/Encode/bin/ucmlint
+++ b/gnu/usr.bin/perl/cpan/Encode/bin/ucmlint
@@ -1,19 +1,18 @@
#!/usr/local/bin/perl
#
-# $Id: ucmlint,v 2.2 2008/03/12 09:51:11 dankogai Exp $
+# $Id: ucmlint,v 2.4 2017/06/10 17:23:50 dankogai Exp $
#
BEGIN { pop @INC if $INC[-1] eq '.' }
use strict;
-our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Getopt::Std;
our %Opt;
getopts("Dehfv", \%Opt);
if ($Opt{e}){
- eval{ require Encode; };
- $@ and die "can't load Encode : $@";
+ eval { require Encode } or die "can't load Encode : $@";
}
$Opt{h} and help();
diff --git a/gnu/usr.bin/perl/cpan/Encode/encengine.c b/gnu/usr.bin/perl/cpan/Encode/encengine.c
index bddf556b35d..67613a89e3c 100644
--- a/gnu/usr.bin/perl/cpan/Encode/encengine.c
+++ b/gnu/usr.bin/perl/cpan/Encode/encengine.c
@@ -102,56 +102,56 @@ do_encode(const encpage_t * enc, const U8 * src, STRLEN * slen, U8 * dst,
U8 *dend = d + dlen, *dlast = d;
int code = 0;
while (s < send) {
- const encpage_t *e = enc;
- U8 byte = *s;
- while (byte > e->max)
- e++;
- if (byte >= e->min && e->slen && (approx || !(e->slen & 0x80))) {
- const U8 *cend = s + (e->slen & 0x7f);
- if (cend <= send) {
- STRLEN n;
- if ((n = e->dlen)) {
- const U8 *out = e->seq + n * (byte - e->min);
- U8 *oend = d + n;
- if (dst) {
- if (oend <= dend) {
- while (d < oend)
- *d++ = *out++;
+ const encpage_t *e = enc;
+ U8 byte = *s;
+ while (byte > e->max)
+ e++;
+ if (byte >= e->min && e->slen && (approx || !(e->slen & 0x80))) {
+ const U8 *cend = s + (e->slen & 0x7f);
+ if (cend <= send) {
+ STRLEN n;
+ if ((n = e->dlen)) {
+ const U8 *out = e->seq + n * (byte - e->min);
+ U8 *oend = d + n;
+ if (dst) {
+ if (oend <= dend) {
+ while (d < oend)
+ *d++ = *out++;
+ }
+ else {
+ /* Out of space */
+ code = ENCODE_NOSPACE;
+ break;
+ }
+ }
+ else
+ d = oend;
+ }
+ enc = e->next;
+ s++;
+ if (s == cend) {
+ if (approx && (e->slen & 0x80))
+ code = ENCODE_FALLBACK;
+ last = s;
+ if (term && (STRLEN)(d-dlast) == tlen && memEQ(dlast, term, tlen)) {
+ code = ENCODE_FOUND_TERM;
+ break;
+ }
+ dlast = d;
+ }
}
else {
- /* Out of space */
- code = ENCODE_NOSPACE;
+ /* partial source character */
+ code = ENCODE_PARTIAL;
break;
}
- }
- else
- d = oend;
- }
- enc = e->next;
- s++;
- if (s == cend) {
- if (approx && (e->slen & 0x80))
- code = ENCODE_FALLBACK;
- last = s;
- if (term && (STRLEN)(d-dlast) == tlen && memEQ(dlast, term, tlen)) {
- code = ENCODE_FOUND_TERM;
- break;
- }
- dlast = d;
- }
}
else {
- /* partial source character */
- code = ENCODE_PARTIAL;
- break;
+ /* Cannot represent */
+ code = ENCODE_NOREP;
+ break;
}
}
- else {
- /* Cannot represent */
- code = ENCODE_NOREP;
- break;
- }
- }
*slen = last - src;
*dout = d - dst;
return code;
diff --git a/gnu/usr.bin/perl/cpan/Encode/encoding.pm b/gnu/usr.bin/perl/cpan/Encode/encoding.pm
index 8450f9ca127..c3f324d29fa 100644
--- a/gnu/usr.bin/perl/cpan/Encode/encoding.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/encoding.pm
@@ -1,15 +1,16 @@
-# $Id: encoding.pm,v 2.17 2015/09/15 13:53:27 dankogai Exp dankogai $
+# $Id: encoding.pm,v 2.22 2018/02/11 05:32:03 dankogai Exp $
package encoding;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.17 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.22 $ =~ /(\d+)/g;
use Encode;
use strict;
use warnings;
+use Config;
use constant {
DEBUG => !!$ENV{PERL_ENCODE_DEBUG},
HAS_PERLIO => eval { require PerlIO::encoding; PerlIO::encoding->VERSION(0.02) },
- PERL_5_21_7 => $^V && $^V ge v5.21.7,
+ PERL_5_21_7 => $^V && $^V ge v5.21.7, # lexically scoped
};
sub _exception {
@@ -114,10 +115,10 @@ sub import {
Carp::croak("encoding: pragma does not support EBCDIC platforms");
}
- if ($] >= 5.017) {
- warnings::warnif("deprecated",
- "Use of the encoding pragma is deprecated")
- }
+ my $deprecate =
+ ($] >= 5.017 and !$Config{usecperl})
+ ? "Use of the encoding pragma is deprecated" : 0;
+
my $class = shift;
my $name = shift;
if (!$name){
@@ -133,6 +134,7 @@ sub import {
return;
}
$name = _get_locale_encoding() if $name eq ':locale';
+ BEGIN { strict->unimport('hashpairs') if $] >= 5.027 and $^V =~ /c$/; }
my %arg = @_;
$name = $ENV{PERL_ENCODING} unless defined $name;
my $enc = find_encoding($name);
@@ -142,6 +144,12 @@ sub import {
}
$name = $enc->name; # canonize
unless ( $arg{Filter} ) {
+ if ($] >= 5.025003 and !$Config{usecperl}) {
+ require Carp;
+ Carp::croak("The encoding pragma is no longer supported. Check cperl");
+ }
+ warnings::warnif("deprecated",$deprecate) if $deprecate;
+
DEBUG and warn "_exception($name) = ", _exception($name);
if (! _exception($name)) {
if (!PERL_5_21_7) {
@@ -155,16 +163,20 @@ sub import {
${^E_NCODING} = $enc;
}
}
- HAS_PERLIO or return 1;
+ if (! HAS_PERLIO ) {
+ return 1;
+ }
}
else {
+ warnings::warnif("deprecated",$deprecate) if $deprecate;
+
defined( ${^ENCODING} ) and undef ${^ENCODING};
undef ${^E_NCODING} if PERL_5_21_7;
# implicitly 'use utf8'
require utf8; # to fetch $utf8::hint_bits;
$^H |= $utf8::hint_bits;
- eval {
+
require Filter::Util::Call;
Filter::Util::Call->import;
filter_add(
@@ -177,8 +189,6 @@ sub import {
$status;
}
);
- };
- $@ eq '' and DEBUG and warn "Filter installed";
}
defined ${^UNICODE} and ${^UNICODE} != 0 and return 1;
for my $h (qw(STDIN STDOUT)) {
@@ -188,20 +198,14 @@ sub import {
Carp::croak(
"encoding: Unknown encoding for $h, '$arg{$h}'");
}
- eval { binmode( $h, ":raw :encoding($arg{$h})" ) };
+ binmode( $h, ":raw :encoding($arg{$h})" );
}
else {
unless ( exists $arg{$h} ) {
- eval {
no warnings 'uninitialized';
binmode( $h, ":raw :encoding($name)" );
- };
}
}
- if ($@) {
- require Carp;
- Carp::croak($@);
- }
}
return 1; # I doubt if we need it, though
}
@@ -280,6 +284,10 @@ Old code should be converted to UTF-8, via something like the recipe in the
L</SYNOPSIS> (though this simple approach may require manual adjustments
afterwards).
+If UTF-8 is not an option, it is recommended that one use a simple source
+filter, such as that provided by L<Filter::Encoding> on CPAN or this
+pragma's own C<Filter> option (see below).
+
The only legitimate use of this pragma is almost certainly just one per file,
near the top, with file scope, as the file is likely going to only be written
in one encoding. Further restrictions apply in Perls before v5.22 (see
@@ -291,6 +299,9 @@ There are two basic modes of operation (plus turning if off):
=item C<use encoding ['I<ENCNAME>'] ;>
+Please note: This mode of operation is no longer supported as of Perl
+v5.26.
+
This is the normal operation. It translates various literals encountered in
the Perl source file from the encoding I<ENCNAME> into UTF-8, and similarly
converts character code points. This is used when the script is a combination
@@ -352,7 +363,7 @@ Note that C<STDERR> WILL NOT be changed, regardless.
Also note that non-STD file handles remain unaffected. Use C<use
open> or C<binmode> to change the layers of those.
-=item C<use encoding I<ENCNAME> Filter=E<gt>1;>
+=item C<use encoding I<ENCNAME>, Filter=E<gt>1;>
This operates as above, but the C<Filter> argument with a non-zero
value causes the entire script, and not just literals, to be translated from
diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Alias.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Alias.pm
index 04ad4967c98..dbfa01b6182 100644
--- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Alias.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Alias.pm
@@ -1,8 +1,7 @@
package Encode::Alias;
use strict;
use warnings;
-no warnings 'redefine';
-our $VERSION = do { my @r = ( q$Revision: 2.20 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.24 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
use Exporter 'import';
@@ -19,7 +18,6 @@ our @Alias; # ordered matching list
our %Alias; # cached known aliases
sub find_alias {
- require Encode;
my $class = shift;
my $find = shift;
unless ( exists $Alias{$find} ) {
@@ -79,8 +77,10 @@ sub find_alias {
sub define_alias {
while (@_) {
- my ( $alias, $name ) = splice( @_, 0, 2 );
- unshift( @Alias, $alias => $name ); # newer one has precedence
+ my $alias = shift;
+ my $name = shift;
+ unshift( @Alias, $alias => $name ) # newer one has precedence
+ if defined $alias;
if ( ref($alias) ) {
# clear %Alias cache to allow overrides
@@ -96,13 +96,20 @@ sub define_alias {
}
}
}
- else {
+ elsif (defined $alias) {
DEBUG and warn "delete \$Alias\{$alias\}";
delete $Alias{$alias};
}
+ elsif (DEBUG) {
+ require Carp;
+ Carp::croak("undef \$alias");
+ }
}
}
+# HACK: Encode must be used after define_alias is declarated as Encode calls define_alias
+use Encode ();
+
# Allow latin-1 style names as well
# 0 1 2 3 4 5 6 7 8 9 10
our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
@@ -128,7 +135,6 @@ sub undef_aliases {
}
sub init_aliases {
- require Encode;
undef_aliases();
# Try all-lower-case version should all else fails
@@ -264,7 +270,7 @@ sub init_aliases {
define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' );
# At last, Map white space and _ to '-'
- define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
+ define_alias( qr/^([^\s_]+)[\s_]+([^\s_]*)$/i => '"$1-$2"' );
}
1;
diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/CN/HZ.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/CN/HZ.pm
index f035d821f57..e444cb01c33 100644
--- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/CN/HZ.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/CN/HZ.pm
@@ -5,7 +5,7 @@ use warnings;
use utf8 ();
use vars qw($VERSION);
-$VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+$VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
@@ -21,6 +21,7 @@ sub needs_lines { 1 }
sub decode ($$;$) {
my ( $obj, $str, $chk ) = @_;
+ return undef unless defined $str;
my $GB = Encode::find_encoding('gb2312-raw');
my $ret = substr($str, 0, 0); # to propagate taintedness
@@ -49,7 +50,8 @@ sub decode ($$;$) {
else { # GB mode; the byte ranges are as in RFC 1843.
no warnings 'uninitialized';
if ( $str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)// ) {
- $ret .= $GB->decode( $1, $chk );
+ my $prefix = $1;
+ $ret .= $GB->decode( $prefix, $chk );
}
elsif ( $str =~ s/^\x7E\x7D// ) { # '~}'
$in_ascii = 1;
@@ -134,6 +136,7 @@ sub cat_decode {
sub encode($$;$) {
my ( $obj, $str, $chk ) = @_;
+ return undef unless defined $str;
my $GB = Encode::find_encoding('gb2312-raw');
my $ret = substr($str, 0, 0); # to propagate taintedness;
@@ -153,7 +156,7 @@ sub encode($$;$) {
}
elsif ( $str =~ s/(.)// ) {
my $s = $1;
- my $tmp = $GB->encode( $s, $chk );
+ my $tmp = $GB->encode( $s, $chk || 0 );
last if !defined $tmp;
if ( length $tmp == 2 ) { # maybe a valid GB char (XXX)
if ($in_ascii) {
diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Encoding.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Encoding.pm
index 39d2e0ab64e..815937f4554 100644
--- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Encoding.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Encoding.pm
@@ -3,11 +3,15 @@ package Encode::Encoding;
# Base class for classes which implement encodings
use strict;
use warnings;
-our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
-require Encode;
+our @CARP_NOT = qw(Encode Encode::Encoder);
-sub DEBUG { 0 }
+use Carp ();
+use Encode ();
+use Encode::MIME::Name;
+
+use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
sub Define {
my $obj = shift;
@@ -20,13 +24,10 @@ sub Define {
sub name { return shift->{'Name'} }
-sub mime_name{
- require Encode::MIME::Name;
+sub mime_name {
return Encode::MIME::Name::get_mime_name(shift->name);
}
-# sub renew { return $_[0] }
-
sub renew {
my $self = shift;
my $clone = bless {%$self} => ref($self);
@@ -42,8 +43,7 @@ sub renewed { return $_[0]->{renewed} || 0 }
sub needs_lines { 0 }
sub perlio_ok {
- eval { require PerlIO::encoding };
- return $@ ? 0 : 1;
+ return eval { require PerlIO::encoding } ? 1 : 0;
}
# (Temporary|legacy) methods
@@ -56,14 +56,12 @@ sub fromUnicode { shift->encode(@_) }
#
sub encode {
- require Carp;
my $obj = shift;
my $class = ref($obj) ? ref($obj) : $obj;
Carp::croak( $class . "->encode() not defined!" );
}
sub decode {
- require Carp;
my $obj = shift;
my $class = ref($obj) ? ref($obj) : $obj;
Carp::croak( $class . "->encode() not defined!" );
@@ -188,7 +186,6 @@ MUST return the string representing the canonical name of the encoding.
Predefined As:
sub mime_name{
- require Encode::MIME::Name;
return Encode::MIME::Name::get_mime_name(shift->name);
}
@@ -226,8 +223,7 @@ unless the value is numeric so return 0 for false.
Predefined As:
sub perlio_ok {
- eval{ require PerlIO::encoding };
- return $@ ? 0 : 1;
+ return eval { require PerlIO::encoding } ? 1 : 0;
}
If your encoding does not support PerlIO for some reasons, just;
diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/GSM0338.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/GSM0338.pm
index 20257a1cbd9..e87141ebc41 100644
--- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/GSM0338.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/GSM0338.pm
@@ -1,5 +1,5 @@
#
-# $Id: GSM0338.pm,v 2.5 2013/09/14 07:51:59 dankogai Exp $
+# $Id: GSM0338.pm,v 2.7 2017/06/10 17:23:50 dankogai Exp $
#
package Encode::GSM0338;
@@ -8,7 +8,7 @@ use warnings;
use Carp;
use vars qw($VERSION);
-$VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+$VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
@@ -171,6 +171,7 @@ our $NBSP = "\x{00A0}";
sub decode ($$;$) {
my ( $obj, $bytes, $chk ) = @_;
+ return undef unless defined $bytes;
my $str = substr($bytes, 0, 0); # to propagate taintedness;
while ( length $bytes ) {
my $c = substr( $bytes, 0, 1, '' );
@@ -216,6 +217,7 @@ sub decode ($$;$) {
sub encode($$;$) {
my ( $obj, $str, $chk ) = @_;
+ return undef unless defined $str;
my $bytes = substr($str, 0, 0); # to propagate taintedness
while ( length $str ) {
my $u = substr( $str, 0, 1, '' );
@@ -270,10 +272,9 @@ expression with C<eval {}> block as follows;
eval {
$utf8 = decode("gsm0338", $gsm0338, $chk);
- };
- if ($@){
+ } or do {
# handle exception here
- }
+ };
=head1 BUGS
diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Guess.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Guess.pm
index b44daf59eb5..41fc19b7991 100644
--- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Guess.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Guess.pm
@@ -2,15 +2,16 @@ package Encode::Guess;
use strict;
use warnings;
use Encode qw(:fallbacks find_encoding);
-our $VERSION = do { my @r = ( q$Revision: 2.6 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
my $Canon = 'Guess';
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
our %DEF_SUSPECTS = map { $_ => find_encoding($_) } qw(ascii utf8);
-$Encode::Encoding{$Canon} = bless {
+my $obj = bless {
Name => $Canon,
Suspects => {%DEF_SUSPECTS},
} => __PACKAGE__;
+Encode::define_encoding($obj, $Canon);
use parent qw(Encode::Encoding);
sub needs_lines { 1 }
diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/JP/JIS7.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/JP/JIS7.pm
index 588389a034a..6fc383c4966 100644
--- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/JP/JIS7.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/JP/JIS7.pm
@@ -1,7 +1,7 @@
package Encode::JP::JIS7;
use strict;
use warnings;
-our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
@@ -9,11 +9,12 @@ for my $name ( '7bit-jis', 'iso-2022-jp', 'iso-2022-jp-1' ) {
my $h2z = ( $name eq '7bit-jis' ) ? 0 : 1;
my $jis0212 = ( $name eq 'iso-2022-jp' ) ? 0 : 1;
- $Encode::Encoding{$name} = bless {
+ my $obj = bless {
Name => $name,
h2z => $h2z,
jis0212 => $jis0212,
} => __PACKAGE__;
+ Encode::define_encoding($obj, $name);
}
use parent qw(Encode::Encoding);
@@ -29,6 +30,7 @@ use Encode::CJKConstants qw(:all);
sub decode($$;$) {
my ( $obj, $str, $chk ) = @_;
+ return undef unless defined $str;
my $residue = '';
if ($chk) {
$str =~ s/([^\x00-\x7f].*)$//so and $residue = $1;
@@ -45,11 +47,12 @@ sub decode($$;$) {
sub encode($$;$) {
require Encode::JP::H2Z;
my ( $obj, $utf8, $chk ) = @_;
+ return undef unless defined $utf8;
# empty the input string in the stack so perlio is ok
$_[1] = '' if $chk;
my ( $h2z, $jis0212 ) = @$obj{qw(h2z jis0212)};
- my $octet = Encode::encode( 'euc-jp', $utf8, $chk );
+ my $octet = Encode::encode( 'euc-jp', $utf8, $chk || 0 );
$h2z and &Encode::JP::H2Z::h2z( \$octet );
euc_jis( \$octet, $jis0212 );
return $octet;
diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/KR/2022_KR.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/KR/2022_KR.pm
index 44373e5d589..122326403b2 100644
--- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/KR/2022_KR.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/KR/2022_KR.pm
@@ -1,7 +1,7 @@
package Encode::KR::2022_KR;
use strict;
use warnings;
-our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
@@ -16,6 +16,7 @@ sub perlio_ok {
sub decode {
my ( $obj, $str, $chk ) = @_;
+ return undef unless defined $str;
my $res = $str;
my $residue = iso_euc( \$res );
@@ -26,6 +27,7 @@ sub decode {
sub encode {
my ( $obj, $utf8, $chk ) = @_;
+ return undef unless defined $utf8;
# empty the input string in the stack so perlio is ok
$_[1] = '' if $chk;
diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Header.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Header.pm
index ba6adba4758..848de99fa43 100644
--- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Header.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Header.pm
@@ -1,185 +1,307 @@
package Encode::MIME::Header;
use strict;
use warnings;
-no warnings 'redefine';
-our $VERSION = do { my @r = ( q$Revision: 2.19 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
-use Encode qw(find_encoding encode_utf8 decode_utf8);
-use MIME::Base64;
-use Carp;
+our $VERSION = do { my @r = ( q$Revision: 2.28 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use Carp ();
+use Encode ();
+use MIME::Base64 ();
my %seed = (
- decode_b => '1', # decodes 'B' encoding ?
- decode_q => '1', # decodes 'Q' encoding ?
- encode => 'B', # encode with 'B' or 'Q' ?
- bpl => 75, # bytes per line
+ decode_b => 1, # decodes 'B' encoding ?
+ decode_q => 1, # decodes 'Q' encoding ?
+ encode => 'B', # encode with 'B' or 'Q' ?
+ charset => 'UTF-8', # encode charset
+ bpl => 75, # bytes per line
);
-$Encode::Encoding{'MIME-Header'} =
- bless { %seed, Name => 'MIME-Header', } => __PACKAGE__;
+my @objs;
+
+push @objs, bless {
+ %seed,
+ Name => 'MIME-Header',
+} => __PACKAGE__;
-$Encode::Encoding{'MIME-B'} = bless {
+push @objs, bless {
%seed,
decode_q => 0,
Name => 'MIME-B',
} => __PACKAGE__;
-$Encode::Encoding{'MIME-Q'} = bless {
+push @objs, bless {
%seed,
- decode_q => 1,
+ decode_b => 0,
encode => 'Q',
Name => 'MIME-Q',
} => __PACKAGE__;
+Encode::define_encoding($_, $_->{Name}) foreach @objs;
+
use parent qw(Encode::Encoding);
sub needs_lines { 1 }
sub perlio_ok { 0 }
+# RFC 2047 and RFC 2231 grammar
+my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\|\}~]+/;
+my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/;
+my $re_encoding = qr/[QqBb]/;
+my $re_encoded_text = qr/[^\?]*/;
+my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
+my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding\?$re_encoded_text)\?=/;
+my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
+
+# in strict mode check also for valid base64 characters and also for valid quoted printable codes
+my $re_encoding_strict_b = qr/[Bb]/;
+my $re_encoding_strict_q = qr/[Qq]/;
+my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/;
+my $re_encoded_text_strict_q = qr/(?:[\x21-\x3C\x3E\x40-\x7E]|=[0-9A-Fa-f]{2})*/; # NOTE: first part are printable US-ASCII except ?, =, SPACE and TAB
+my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
+my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
+
+my $re_newline = qr/(?:\r\n|[\r\n])/;
+
+# in strict mode encoded words must be always separated by spaces or tabs (or folded newline)
+# except in comments when separator between words and comment round brackets can be omitted
+my $re_word_begin_strict = qr/(?:(?:[ \t]|\A)\(?|(?:[^\\]|\A)\)\()/;
+my $re_word_sep_strict = qr/(?:$re_newline?[ \t])+/;
+my $re_word_end_strict = qr/(?:\)\(|\)?(?:$re_newline?[ \t]|\z))/;
+
+my $re_match = qr/()((?:$re_encoded_word\s*)*$re_encoded_word)()/;
+my $re_match_strict = qr/($re_word_begin_strict)((?:$re_encoded_word_strict$re_word_sep_strict)*$re_encoded_word_strict)(?=$re_word_end_strict)/;
+
+my $re_capture = qr/$re_capture_encoded_word(?:\s*)?/;
+my $re_capture_strict = qr/$re_capture_encoded_word_strict$re_word_sep_strict?/;
+
+our $STRICT_DECODE = 0;
+
sub decode($$;$) {
- use utf8;
- my ( $obj, $str, $chk ) = @_;
- # zap spaces between encoded words
- $str =~ s/\?=\s+=\?/\?==\?/gos;
-
- # multi-line header to single line
- $str =~ s/(?:\r\n|[\r\n])[ \t]//gos;
-
- 1 while ( $str =~
- s/(=\?[-0-9A-Za-z_]+\?[Qq]\?)([^?]*?)\?=\1([^?]*?\?=)/$1$2$3/ )
- ; # Concat consecutive QP encoded mime headers
- # Fixes breaking inside multi-byte characters
-
- $str =~ s{
- =\? # begin encoded word
- ([-0-9A-Za-z_]+) # charset (encoding)
- (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
- \?([QqBb])\? # delimiter
- (.*?) # Base64-encodede contents
- \?= # end encoded word
- }{
- if (uc($2) eq 'B'){
- $obj->{decode_b} or croak qq(MIME "B" unsupported);
- decode_b($1, $3, $chk);
- } elsif (uc($2) eq 'Q'){
- $obj->{decode_q} or croak qq(MIME "Q" unsupported);
- decode_q($1, $3, $chk);
- } else {
- croak qq(MIME "$2" encoding is nonexistent!);
+ my ($obj, $str, $chk) = @_;
+ return undef unless defined $str;
+
+ my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match;
+ my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture;
+
+ my $stop = 0;
+ my $output = substr($str, 0, 0); # to propagate taintedness
+
+ # decode each line separately, match whole continuous folded line at one call
+ 1 while not $stop and $str =~ s{^((?:[^\r\n]*(?:$re_newline[ \t])?)*)($re_newline)?}{
+
+ my $line = $1;
+ my $sep = defined $2 ? $2 : '';
+
+ $stop = 1 unless length($line) or length($sep);
+
+ # NOTE: this code partially could break $chk support
+ # in non strict mode concat consecutive encoded mime words with same charset, language and encoding
+ # fixes breaking inside multi-byte characters
+ 1 while not $STRICT_DECODE and $line =~ s/$re_capture_encoded_word_split\s*=\?\1\2\?\3\?($re_encoded_text)\?=/=\?$1$2\?$3\?$4$5\?=/so;
+
+ # process sequence of encoded MIME words at once
+ 1 while not $stop and $line =~ s{^(.*?)$re_match_decode}{
+
+ my $begin = $1 . $2;
+ my $words = $3;
+
+ $begin =~ tr/\r\n//d;
+ $output .= $begin;
+
+ # decode one MIME word
+ 1 while not $stop and $words =~ s{^(.*?)($re_capture_decode)}{
+
+ $output .= $1;
+ my $orig = $2;
+ my $charset = $3;
+ my ($mime_enc, $text) = split /\?/, $5;
+
+ $text =~ tr/\r\n//d;
+
+ my $enc = Encode::find_mime_encoding($charset);
+
+ # in non strict mode allow also perl encoding aliases
+ if ( not defined $enc and not $STRICT_DECODE ) {
+ # make sure that decoded string will be always strict UTF-8
+ $charset = 'UTF-8' if lc($charset) eq 'utf8';
+ $enc = Encode::find_encoding($charset);
+ }
+
+ if ( not defined $enc ) {
+ Carp::croak qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR;
+ Carp::carp qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR;
+ $stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
+ $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
+ $stop ? $orig : '';
+ } else {
+ if ( uc($mime_enc) eq 'B' and $obj->{decode_b} ) {
+ my $decoded = _decode_b($enc, $text, $chk);
+ $stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
+ $output .= (defined $decoded ? $decoded : $text) unless $stop;
+ $stop ? $orig : '';
+ } elsif ( uc($mime_enc) eq 'Q' and $obj->{decode_q} ) {
+ my $decoded = _decode_q($enc, $text, $chk);
+ $stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
+ $output .= (defined $decoded ? $decoded : $text) unless $stop;
+ $stop ? $orig : '';
+ } else {
+ Carp::croak qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR;
+ Carp::carp qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR;
+ $stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
+ $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
+ $stop ? $orig : '';
+ }
+ }
+
+ }se;
+
+ if ( not $stop ) {
+ $output .= $words;
+ $words = '';
+ }
+
+ $words;
+
+ }se;
+
+ if ( not $stop ) {
+ $line =~ tr/\r\n//d;
+ $output .= $line . $sep;
+ $line = '';
+ $sep = '';
}
- }egox;
- $_[1] = $str if $chk;
- return $str;
-}
-sub decode_b {
- my $enc = shift;
- my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
- my $db64 = decode_base64(shift);
- my $chk = shift;
- return $d->name eq 'utf8'
- ? Encode::decode_utf8($db64)
- : $d->decode( $db64, $chk || Encode::FB_PERLQQ );
-}
+ $line . $sep;
-sub decode_q {
- my ( $enc, $q, $chk ) = @_;
- my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
- $q =~ s/_/ /go;
- $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
- return $d->name eq 'utf8'
- ? Encode::decode_utf8($q)
- : $d->decode( $q, $chk || Encode::FB_PERLQQ );
-}
+ }se;
-my $especials =
- join( '|' => map { quotemeta( chr($_) ) }
- unpack( "C*", qq{()<>,;:"'/[]?=} ) );
+ $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+ return $output;
+}
-my $re_encoded_word = qr{
- =\? # begin encoded word
- (?:[-0-9A-Za-z_]+) # charset (encoding)
- (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
- \?(?:[QqBb])\? # delimiter
- (?:.*?) # Base64-encodede contents
- \?= # end encoded word
-}xo;
+sub _decode_b {
+ my ($enc, $text, $chk) = @_;
+ # MIME::Base64::decode ignores everything after a '=' padding character
+ # in non strict mode split string after each sequence of padding characters and decode each substring
+ my $octets = $STRICT_DECODE ?
+ MIME::Base64::decode($text) :
+ join('', map { MIME::Base64::decode($_) } split /(?<==)(?=[^=])/, $text);
+ return _decode_octets($enc, $octets, $chk);
+}
-my $re_especials = qr{$re_encoded_word|$especials}xo;
+sub _decode_q {
+ my ($enc, $text, $chk) = @_;
+ $text =~ s/_/ /go;
+ $text =~ s/=([0-9A-Fa-f]{2})/pack('C', hex($1))/ego;
+ return _decode_octets($enc, $text, $chk);
+}
-# cf:
-# https://rt.cpan.org/Ticket/Display.html?id=88717
-# https://www.ietf.org/rfc/rfc0822.txt
-my $re_linear_white_space = qr{(?:[ \t]|\r\n?)};
+sub _decode_octets {
+ my ($enc, $octets, $chk) = @_;
+ $chk = 0 unless defined $chk;
+ $chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk;
+ my $output = $enc->decode($octets, $chk);
+ return undef if not ref $chk and $chk and $octets ne '';
+ return $output;
+}
sub encode($$;$) {
- my ( $obj, $str, $chk ) = @_;
- my @line = ();
- for my $line ( split /\r\n|[\r\n]/o, $str ) {
- my ( @word, @subline );
- if ($line =~ /\A([\w\-]+:\s+)(.*)\z/o) {
- push @word, $1, $obj->_encode($2); # "X-Header-Name: ..."
+ my ($obj, $str, $chk) = @_;
+ return undef unless defined $str;
+ my $output = $obj->_fold_line($obj->_encode_string($str, $chk));
+ $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+ return $output . substr($str, 0, 0); # to propagate taintedness
+}
+
+sub _fold_line {
+ my ($obj, $line) = @_;
+ my $bpl = $obj->{bpl};
+ my $output = '';
+
+ while ( length($line) ) {
+ if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) {
+ $output .= $1;
+ $output .= "\r\n" . $2 if length($line);
+ } elsif ( $line =~ s/(\s)(.*)$// ) {
+ $output .= $line;
+ $line = $2;
+ $output .= "\r\n" . $1 if length($line);
} else {
- push @word, $obj->_encode($line); # anything else
- }
- my $subline = '';
- for my $word (@word) {
- use bytes ();
- if ( bytes::length($subline) + bytes::length($word) >
- $obj->{bpl} - 1 )
- {
- push @subline, $subline;
- $subline = '';
- }
- $subline .= ' ' if ($subline =~ /\?=$/ and $word =~ /^=\?/);
- $subline .= $word;
+ $output .= $line;
+ last;
}
- length($subline) and push @subline, $subline;
- push @line, join( "\n " => grep !/^$/, @subline );
}
- $_[1] = '' if $chk;
- return (substr($str, 0, 0) . join( "\n", @line ));
-}
-use constant HEAD => '=?UTF-8?';
-use constant TAIL => '?=';
-use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
-
-sub _encode {
- my ( $o, $str ) = @_;
- my $enc = $o->{encode};
- my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) );
+ return $output;
+}
- # to coerce a floating-point arithmetics, the following contains
- # .0 in numbers -- dankogai
- $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0;
+sub _encode_string {
+ my ($obj, $str, $chk) = @_;
+ my $wordlen = $obj->{bpl} > 76 ? 76 : $obj->{bpl};
+ my $enc = Encode::find_mime_encoding($obj->{charset});
+ my $enc_chk = $chk;
+ $enc_chk = 0 unless defined $enc_chk;
+ $enc_chk |= Encode::LEAVE_SRC if not ref $enc_chk and $enc_chk;
my @result = ();
- my $chunk = '';
- while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) {
- use bytes ();
- if ( bytes::length($chunk) + bytes::length($chr) > $llen ) {
- push @result, SINGLE->{$enc}($chunk);
- $chunk = '';
+ my $octets = '';
+ while ( length( my $chr = substr($str, 0, 1, '') ) ) {
+ my $seq = $enc->encode($chr, $enc_chk);
+ if ( not length($seq) ) {
+ substr($str, 0, 0, $chr);
+ last;
+ }
+ if ( $obj->_encoded_word_len($octets . $seq) > $wordlen ) {
+ push @result, $obj->_encode_word($octets);
+ $octets = '';
}
- $chunk .= $chr;
+ $octets .= $seq;
}
- length($chunk) and push @result, SINGLE->{$enc}($chunk);
- return @result;
+ length($octets) and push @result, $obj->_encode_word($octets);
+ $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+ return join(' ', @result);
+}
+
+sub _encode_word {
+ my ($obj, $octets) = @_;
+ my $charset = $obj->{charset};
+ my $encode = $obj->{encode};
+ my $text = $encode eq 'B' ? _encode_b($octets) : _encode_q($octets);
+ return "=?$charset?$encode?$text?=";
+}
+
+sub _encoded_word_len {
+ my ($obj, $octets) = @_;
+ my $charset = $obj->{charset};
+ my $encode = $obj->{encode};
+ my $text_len = $encode eq 'B' ? _encoded_b_len($octets) : _encoded_q_len($octets);
+ return length("=?$charset?$encode??=") + $text_len;
}
sub _encode_b {
- HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL;
+ my ($octets) = @_;
+ return MIME::Base64::encode($octets, '');
+}
+
+sub _encoded_b_len {
+ my ($octets) = @_;
+ return ( length($octets) + 2 ) / 3 * 4;
}
+my $re_invalid_q_char = qr/[^0-9A-Za-z !*+\-\/]/;
+
sub _encode_q {
- my $chunk = shift;
- $chunk = encode_utf8($chunk);
- $chunk =~ s{
- ([^0-9A-Za-z])
- }{
- join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
- }egox;
- return HEAD . 'Q?' . $chunk . TAIL;
+ my ($octets) = @_;
+ $octets =~ s{($re_invalid_q_char)}{
+ join('', map { sprintf('=%02X', $_) } unpack('C*', $1))
+ }egox;
+ $octets =~ s/ /_/go;
+ return $octets;
+}
+
+sub _encoded_q_len {
+ my ($octets) = @_;
+ my $invalid_count = () = $octets =~ /$re_invalid_q_char/sgo;
+ return ( $invalid_count * 3 ) + ( length($octets) - $invalid_count );
}
1;
@@ -187,55 +309,119 @@ __END__
=head1 NAME
-Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
+Encode::MIME::Header -- MIME encoding for an unstructured email header
=head1 SYNOPSIS
- use Encode qw/encode decode/;
- $utf8 = decode('MIME-Header', $header);
- $header = encode('MIME-Header', $utf8);
+ use Encode qw(encode decode);
-=head1 ABSTRACT
+ my $mime_str = encode("MIME-Header", "Sample:Text \N{U+263A}");
+ # $mime_str is "=?UTF-8?B?U2FtcGxlOlRleHQg4pi6?="
-This module implements RFC 2047 Mime Header Encoding. There are 3
-variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
-difference is described below
+ my $mime_q_str = encode("MIME-Q", "Sample:Text \N{U+263A}");
+ # $mime_q_str is "=?UTF-8?Q?Sample=3AText_=E2=98=BA?="
- decode() encode()
- ----------------------------------------------
- MIME-Header Both B and Q =?UTF-8?B?....?=
- MIME-B B only; Q croaks =?UTF-8?B?....?=
- MIME-Q Q only; B croaks =?UTF-8?Q?....?=
+ my $str = decode("MIME-Header",
+ "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n " .
+ "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="
+ );
+ # $str is "If you can read this you understand the example."
-=head1 DESCRIPTION
+ use Encode qw(decode :fallbacks);
+ use Encode::MIME::Header;
+ local $Encode::MIME::Header::STRICT_DECODE = 1;
+ my $strict_string = decode("MIME-Header", $mime_string, FB_CROAK);
+ # use strict decoding and croak on errors
-When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
-is extracted and decoded for I<X> encoding (B for Base64, Q for
-Quoted-Printable). Then the decoded chunk is fed to
-decode(I<encoding>). So long as I<encoding> is supported by Encode,
-any source encoding is fine.
+=head1 ABSTRACT
-When you encode, it just encodes UTF-8 string with I<X> encoding then
-quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
-encode are left as is and long lines are folded within 76 bytes per
-line.
+This module implements L<RFC 2047|https://tools.ietf.org/html/rfc2047> MIME
+encoding for an unstructured field body of the email header. It can also be
+used for L<RFC 822|https://tools.ietf.org/html/rfc822> 'text' token. However,
+it cannot be used directly for the whole header with the field name or for the
+structured header fields like From, To, Cc, Message-Id, etc... There are 3
+encoding names supported by this module: C<MIME-Header>, C<MIME-B> and
+C<MIME-Q>.
-=head1 BUGS
+=head1 DESCRIPTION
-It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
-and =?ISO-8859-1?= but that makes the implementation too complicated.
-These days major mail agents all support =?UTF-8? so I think it is
-just good enough.
+Decode method takes an unstructured field body of the email header (or
+L<RFC 822|https://tools.ietf.org/html/rfc822> 'text' token) as its input and
+decodes each MIME encoded-word from input string to a sequence of bytes
+according to L<RFC 2047|https://tools.ietf.org/html/rfc2047> and
+L<RFC 2231|https://tools.ietf.org/html/rfc2231>. Subsequently, each sequence
+of bytes with the corresponding MIME charset is decoded with
+L<the Encode module|Encode> and finally, one output string is returned. Text
+parts of the input string which do not contain MIME encoded-word stay
+unmodified in the output string. Folded newlines between two consecutive MIME
+encoded-words are discarded, others are preserved in the output string.
+C<MIME-B> can decode Base64 variant, C<MIME-Q> can decode Quoted-Printable
+variant and C<MIME-Header> can decode both of them. If L<Encode module|Encode>
+does not support particular MIME charset or chosen variant then an action based
+on L<CHECK flags|Encode/Handling Malformed Data> is performed (by default, the
+MIME encoded-word is not decoded).
+
+Encode method takes a scalar string as its input and uses
+L<strict UTF-8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder for encoding it to UTF-8
+bytes. Then a sequence of UTF-8 bytes is encoded into MIME encoded-words
+(C<MIME-Header> and C<MIME-B> use a Base64 variant while C<MIME-Q> uses a
+Quoted-Printable variant) where each MIME encoded-word is limited to 75
+characters. MIME encoded-words are separated by C<CRLF SPACE> and joined to
+one output string. Output string is suitable for unstructured field body of
+the email header.
+
+Both encode and decode methods propagate
+L<CHECK flags|Encode/Handling Malformed Data> when encoding and decoding the
+MIME charset.
-Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
-Makamaka. Thre are still too many MUAs especially cellular phone
-handsets which does not grok UTF-8.
+=head1 BUGS
-=head1 SEE ALSO
+Versions prior to 2.22 (part of Encode 2.83) have a malfunctioning decoder
+and encoder. The MIME encoder infamously inserted additional spaces or
+discarded white spaces between consecutive MIME encoded-words, which led to
+invalid MIME headers produced by this module. The MIME decoder had a tendency
+to discard white spaces, incorrectly interpret data or attempt to decode Base64
+MIME encoded-words as Quoted-Printable. These problems were fixed in version
+2.22. It is highly recommended not to use any version prior 2.22!
+
+Versions prior to 2.24 (part of Encode 2.87) ignored
+L<CHECK flags|Encode/Handling Malformed Data>. The MIME encoder used
+L<not strict utf8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder for input Unicode
+strings which could lead to invalid UTF-8 sequences. MIME decoder used also
+L<not strict utf8|Encode/UTF-8 vs. utf8 vs. UTF8> decoder and additionally
+called the decode method with a C<Encode::FB_PERLQQ> flag (thus user-specified
+L<CHECK flags|Encode/Handling Malformed Data> were ignored). Moreover, it
+automatically croaked when a MIME encoded-word contained unknown encoding.
+Since version 2.24, this module uses
+L<strict UTF-8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder and decoder. And
+L<CHECK flags|Encode/Handling Malformed Data> are correctly propagated.
+
+Since version 2.22 (part of Encode 2.83), the MIME encoder should be fully
+compliant to L<RFC 2047|https://tools.ietf.org/html/rfc2047> and
+L<RFC 2231|https://tools.ietf.org/html/rfc2231>. Due to the aforementioned
+bugs in previous versions of the MIME encoder, there is a I<less strict>
+compatible mode for the MIME decoder which is used by default. It should be
+able to decode MIME encoded-words encoded by pre 2.22 versions of this module.
+However, note that this is not correct according to
+L<RFC 2047|https://tools.ietf.org/html/rfc2047>.
+
+In default I<not strict> mode the MIME decoder attempts to decode every substring
+which looks like a MIME encoded-word. Therefore, the MIME encoded-words do not
+need to be separated by white space. To enforce a correct I<strict> mode, set
+variable C<$Encode::MIME::Header::STRICT_DECODE> to 1 e.g. by localizing:
+
+ use Encode::MIME::Header;
+ local $Encode::MIME::Header::STRICT_DECODE = 1;
+
+=head1 AUTHORS
+
+Pali E<lt>pali@cpan.orgE<gt>
-L<Encode>
+=head1 SEE ALSO
-RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other
-locations.
+L<Encode>,
+L<RFC 822|https://tools.ietf.org/html/rfc822>,
+L<RFC 2047|https://tools.ietf.org/html/rfc2047>,
+L<RFC 2231|https://tools.ietf.org/html/rfc2231>
=cut
diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm
index 86955c83f1f..dc1e4275f07 100644
--- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm
@@ -5,16 +5,17 @@ use warnings;
use parent qw(Encode::MIME::Header);
-$Encode::Encoding{'MIME-Header-ISO_2022_JP'} =
- bless { encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } =>
+my $obj =
+ bless { decode_b => '1', decode_q => '1', encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } =>
__PACKAGE__;
+Encode::define_encoding($obj, 'MIME-Header-ISO_2022_JP');
use constant HEAD => '=?ISO-2022-JP?B?';
use constant TAIL => '?=';
use Encode::CJKConstants qw(%RE);
-our $VERSION = do { my @r = ( q$Revision: 1.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 1.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
# I owe the below codes totally to
# Jcode by Dan Kogai & http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64
@@ -22,6 +23,7 @@ our $VERSION = do { my @r = ( q$Revision: 1.4 $ =~ /\d+/g ); sprintf "%d." . "%0
sub encode {
my $self = shift;
my $str = shift;
+ return undef unless defined $str;
utf8::encode($str) if ( Encode::is_utf8($str) );
Encode::from_to( $str, 'utf8', 'euc-jp' );
diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Name.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Name.pm
index 10d86a746dc..72ec79adda7 100644
--- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Name.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/MIME/Name.pm
@@ -1,8 +1,9 @@
package Encode::MIME::Name;
use strict;
use warnings;
-our $VERSION = do { my @r = ( q$Revision: 1.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 1.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+# NOTE: This table must be 1:1 mapping
our %MIME_NAME_OF = (
'AdobeStandardEncoding' => 'Adobe-Standard-Encoding',
'AdobeSymbol' => 'Adobe-Symbol-Encoding',
@@ -37,13 +38,14 @@ our %MIME_NAME_OF = (
'cp866' => 'IBM866',
'cp869' => 'IBM869',
'cp936' => 'GBK',
+ 'euc-cn' => 'EUC-CN',
'euc-jp' => 'EUC-JP',
'euc-kr' => 'EUC-KR',
#'gb2312-raw' => 'GB2312', # no, you're wrong, I18N::Charset
'hp-roman8' => 'hp-roman8',
'hz' => 'HZ-GB-2312',
'iso-2022-jp' => 'ISO-2022-JP',
- 'iso-2022-jp-1' => 'ISO-2022-JP',
+ 'iso-2022-jp-1' => 'ISO-2022-JP-1',
'iso-2022-kr' => 'ISO-2022-KR',
'iso-8859-1' => 'ISO-8859-1',
'iso-8859-10' => 'ISO-8859-10',
@@ -73,13 +75,20 @@ our %MIME_NAME_OF = (
'UTF-32BE' => 'UTF-32BE',
'UTF-32LE' => 'UTF-32LE',
'UTF-7' => 'UTF-7',
- 'utf8' => 'UTF-8',
'utf-8-strict' => 'UTF-8',
'viscii' => 'VISCII',
);
+# NOTE: %MIME_NAME_OF is still 1:1 mapping
+our %ENCODE_NAME_OF = map { uc $MIME_NAME_OF{$_} => $_ } keys %MIME_NAME_OF;
+
+# Add additional 1:N mapping
+$MIME_NAME_OF{'utf8'} = 'UTF-8';
+
sub get_mime_name($) { $MIME_NAME_OF{$_[0]} };
+sub get_encode_name($) { $ENCODE_NAME_OF{uc $_[0]} };
+
1;
__END__
diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Supported.pod b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Supported.pod
index c731509a816..b23f6ca83ec 100644
--- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Supported.pod
+++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Supported.pod
@@ -367,7 +367,7 @@ Unicode character should belong).
Not very popular. Needs CNS 11643-1 and -2 which are not available in
this module. CNS 11643 is supported (via euc-tw) in Encode::HanExtra.
-Autrijus Tang may add support for this encoding in his module in future.
+Audrey Tang may add support for this encoding in her module in future.
=item Various HP-UX encodings
diff --git a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Unicode/UTF7.pm b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Unicode/UTF7.pm
index d5d86e2f903..e68647755f4 100644
--- a/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Unicode/UTF7.pm
+++ b/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Unicode/UTF7.pm
@@ -1,15 +1,14 @@
#
-# $Id: UTF7.pm,v 2.8 2013/09/14 07:51:59 dankogai Exp $
+# $Id: UTF7.pm,v 2.10 2017/06/10 17:23:50 dankogai Exp $
#
package Encode::Unicode::UTF7;
use strict;
use warnings;
-no warnings 'redefine';
use parent qw(Encode::Encoding);
__PACKAGE__->Define('UTF-7');
-our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use MIME::Base64;
-use Encode;
+use Encode qw(find_encoding);
#
# Algorithms taken from Unicode::String by Gisle Aas
@@ -30,6 +29,7 @@ sub needs_lines { 1 }
sub encode($$;$) {
my ( $obj, $str, $chk ) = @_;
+ return undef unless defined $str;
my $len = length($str);
pos($str) = 0;
my $bytes = substr($str, 0, 0); # to propagate taintedness
@@ -61,6 +61,7 @@ sub encode($$;$) {
sub decode($$;$) {
use re 'taint';
my ( $obj, $bytes, $chk ) = @_;
+ return undef unless defined $bytes;
my $len = length($bytes);
my $str = substr($bytes, 0, 0); # to propagate taintedness;
pos($bytes) = 0;
diff --git a/gnu/usr.bin/perl/cpan/Encode/t/Aliases.t b/gnu/usr.bin/perl/cpan/Encode/t/Aliases.t
index 2fc14cc1142..8d4752bddbc 100755
--- a/gnu/usr.bin/perl/cpan/Encode/t/Aliases.t
+++ b/gnu/usr.bin/perl/cpan/Encode/t/Aliases.t
@@ -159,7 +159,7 @@ define_alias( sub {
return "iso-8859-2" if $enc =~ /hebrew/i;
return "does-not-exist" if $enc =~ /arabic/i; # should then use other override alias
return "utf-8" if $enc =~ /eight/i;
- return;
+ return "unknown";
});
print "# alias test with alias overrides\n";
diff --git a/gnu/usr.bin/perl/cpan/Encode/t/enc_data.t b/gnu/usr.bin/perl/cpan/Encode/t/enc_data.t
index a0caf650f19..e610b0d10e0 100755
--- a/gnu/usr.bin/perl/cpan/Encode/t/enc_data.t
+++ b/gnu/usr.bin/perl/cpan/Encode/t/enc_data.t
@@ -1,4 +1,4 @@
-# $Id: enc_data.t,v 2.2 2013/02/18 02:23:56 dankogai Exp $
+# $Id: enc_data.t,v 2.5 2016/11/29 23:29:23 dankogai Exp $
BEGIN {
require Config; import Config;
@@ -11,7 +11,11 @@ BEGIN {
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # encoding pragma does not support EBCDIC platforms\n";
+ print "1..0 # Skip: encoding pragma does not support EBCDIC platforms\n";
+ exit(0);
+ }
+ if ($] >= 5.025 and !$Config{usecperl}) {
+ print "1..0 # Skip: encoding pragma not supported in Perl 5.26\n";
exit(0);
}
if ($] <= 5.008 and !$Config{perl_patchlevel}){
diff --git a/gnu/usr.bin/perl/cpan/Encode/t/enc_eucjp.t b/gnu/usr.bin/perl/cpan/Encode/t/enc_eucjp.t
index 7c78a68ee95..fc0af3cf336 100755
--- a/gnu/usr.bin/perl/cpan/Encode/t/enc_eucjp.t
+++ b/gnu/usr.bin/perl/cpan/Encode/t/enc_eucjp.t
@@ -1,4 +1,4 @@
-# $Id: enc_eucjp.t,v 2.2 2013/02/18 02:23:56 dankogai Exp $
+# $Id: enc_eucjp.t,v 2.5 2017/06/10 17:23:50 dankogai Exp $
# This is the twin of enc_utf8.t .
BEGIN {
@@ -19,6 +19,10 @@ BEGIN {
print "1..0 # Skip: Perl 5.8.1 or later required\n";
exit 0;
}
+ if ($] >= 5.025003 and !$Config{usecperl}){
+ print "1..0 # Skip: Perl <=5.25.2 or cperl required\n";
+ exit 0;
+ }
}
no warnings "deprecated";
@@ -26,7 +30,7 @@ use encoding 'euc-jp';
my @c = (127, 128, 255, 256);
-print "1.." . (scalar @c + 1) . "\n";
+print "1.." . (scalar @c + 2) . "\n";
my @f;
@@ -61,7 +65,19 @@ binmode(F, ":raw"); # Output raw bytes.
print F chr(128); # Output illegal UTF-8.
close F;
open(F, $f) or die "$0: failed to open '$f' for reading: $!";
-binmode(F, ":encoding(utf-8)");
+binmode(F, ":encoding(UTF-8)");
+{
+ local $^W = 1;
+ local $SIG{__WARN__} = sub { $a = shift };
+ eval { <F> }; # This should get caught.
+}
+close F;
+print $a =~ qr{^UTF-8 "\\x80" does not map to Unicode} ?
+ "ok $t - illegal UTF-8 input\n" : "not ok $t - illegal UTF-8 input: a = " . unpack("H*", $a) . "\n";
+$t++;
+
+open(F, $f) or die "$0: failed to open '$f' for reading: $!";
+binmode(F, ":encoding(utf8)");
{
local $^W = 1;
local $SIG{__WARN__} = sub { $a = shift };
@@ -70,6 +86,7 @@ binmode(F, ":encoding(utf-8)");
close F;
print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ?
"ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " . unpack("H*", $a) . "\n";
+$t++;
# On VMS temporary file names like "f0." may be more readable than "f0" since
# "f0" could be a logical name pointing elsewhere.
diff --git a/gnu/usr.bin/perl/cpan/Encode/t/enc_module.t b/gnu/usr.bin/perl/cpan/Encode/t/enc_module.t
index 05fc6c2f64d..fd6e6dcde69 100755
--- a/gnu/usr.bin/perl/cpan/Encode/t/enc_module.t
+++ b/gnu/usr.bin/perl/cpan/Encode/t/enc_module.t
@@ -1,4 +1,4 @@
-# $Id: enc_module.t,v 2.2 2013/02/18 02:23:56 dankogai Exp $
+# $Id: enc_module.t,v 2.5 2016/11/29 23:29:23 dankogai Exp $
# This file is in euc-jp
BEGIN {
require Config; import Config;
@@ -15,7 +15,11 @@ BEGIN {
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # encoding pragma does not support EBCDIC platforms\n";
+ print "1..0 # Skip: encoding pragma does not support EBCDIC platforms\n";
+ exit(0);
+ }
+ if ($] >= 5.025 and !$Config{usecperl}) {
+ print "1..0 # Skip: encoding pragma not supported in Perl 5.26\n";
exit(0);
}
}
diff --git a/gnu/usr.bin/perl/cpan/Encode/t/enc_utf8.t b/gnu/usr.bin/perl/cpan/Encode/t/enc_utf8.t
index 9c6caa3fa98..be7d487804c 100755
--- a/gnu/usr.bin/perl/cpan/Encode/t/enc_utf8.t
+++ b/gnu/usr.bin/perl/cpan/Encode/t/enc_utf8.t
@@ -1,4 +1,4 @@
-# $Id: enc_utf8.t,v 2.2 2013/02/18 02:23:56 dankogai Exp $
+# $Id: enc_utf8.t,v 2.5 2017/06/10 17:23:50 dankogai Exp $
# This is the twin of enc_eucjp.t .
BEGIN {
@@ -15,6 +15,10 @@ BEGIN {
print "1..0 # encoding pragma does not support EBCDIC platforms\n";
exit(0);
}
+ if ($] >= 5.025003 and !$Config{usecperl}){
+ print "1..0 # Skip: Perl <=5.25.2 or cperl required\n";
+ exit 0;
+ }
}
no warnings "deprecated";
@@ -22,7 +26,7 @@ use encoding 'utf8';
my @c = (127, 128, 255, 256);
-print "1.." . (scalar @c + 1) . "\n";
+print "1.." . (scalar @c + 2) . "\n";
my @f;
@@ -55,7 +59,19 @@ binmode(F, ":raw"); # Output raw bytes.
print F chr(128); # Output illegal UTF-8.
close F;
open(F, $f) or die "$0: failed to open '$f' for reading: $!";
-binmode(F, ":encoding(utf-8)");
+binmode(F, ":encoding(UTF-8)");
+{
+ local $^W = 1;
+ local $SIG{__WARN__} = sub { $a = shift };
+ eval { <F> }; # This should get caught.
+}
+close F;
+print $a =~ qr{^UTF-8 "\\x80" does not map to Unicode} ?
+ "ok $t - illegal UTF-8 input\n" : "not ok $t - illegal UTF-8 input: a = " . unpack("H*", $a) . "\n";
+$t++;
+
+open(F, $f) or die "$0: failed to open '$f' for reading: $!";
+binmode(F, ":encoding(utf8)");
{
local $^W = 1;
local $SIG{__WARN__} = sub { $a = shift };
@@ -64,6 +80,7 @@ binmode(F, ":encoding(utf-8)");
close F;
print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ?
"ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " . unpack("H*", $a) . "\n";
+$t++;
# On VMS temporary file names like "f0." may be more readable than "f0" since
# "f0" could be a logical name pointing elsewhere.
diff --git a/gnu/usr.bin/perl/cpan/Encode/t/encoding.t b/gnu/usr.bin/perl/cpan/Encode/t/encoding.t
index 21f9e47eb7a..33010e74b5f 100755
--- a/gnu/usr.bin/perl/cpan/Encode/t/encoding.t
+++ b/gnu/usr.bin/perl/cpan/Encode/t/encoding.t
@@ -9,7 +9,11 @@ BEGIN {
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # encoding pragma does not support EBCDIC platforms\n";
+ print "1..0 # Skip: encoding pragma does not support EBCDIC platforms\n";
+ exit(0);
+ }
+ if ($] >= 5.025 and !$Config{usecperl}) {
+ print "1..0 # Skip: encoding pragma not supported in Perl 5.26\n";
exit(0);
}
}
diff --git a/gnu/usr.bin/perl/cpan/Encode/t/jperl.t b/gnu/usr.bin/perl/cpan/Encode/t/jperl.t
index 3abe86b9b79..5995a592bab 100755
--- a/gnu/usr.bin/perl/cpan/Encode/t/jperl.t
+++ b/gnu/usr.bin/perl/cpan/Encode/t/jperl.t
@@ -1,5 +1,5 @@
#
-# $Id: jperl.t,v 2.2 2013/02/18 02:23:56 dankogai Exp $
+# $Id: jperl.t,v 2.5 2016/11/29 23:29:23 dankogai Exp $
#
# This script is written in euc-jp
@@ -17,6 +17,10 @@ BEGIN {
print "1..0 # Skip: EBCDIC\n";
exit 0;
}
+ if ($] >= 5.025 and !$Config{usecperl}) {
+ print "1..0 # Skip: encoding pragma not supported in Perl 5.26\n";
+ exit(0);
+ }
$| = 1;
}
diff --git a/gnu/usr.bin/perl/cpan/Encode/t/mime-header.t b/gnu/usr.bin/perl/cpan/Encode/t/mime-header.t
index b031aa4010b..bf48753d93f 100755
--- a/gnu/usr.bin/perl/cpan/Encode/t/mime-header.t
+++ b/gnu/usr.bin/perl/cpan/Encode/t/mime-header.t
@@ -1,5 +1,5 @@
#
-# $Id: mime-header.t,v 2.8 2016/01/25 14:54:13 dankogai Exp dankogai $
+# $Id: mime-header.t,v 2.15 2017/07/18 07:15:29 dankogai Exp $
# This script is written in utf8
#
BEGIN {
@@ -19,114 +19,319 @@ BEGIN {
$| = 1;
}
-no utf8;
-
use strict;
-#use Test::More qw(no_plan);
-use Test::More tests => 14;
-use_ok("Encode::MIME::Header");
-
-my $eheader =<<'EOS';
-From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
-To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
-CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
-Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
- =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
-EOS
-
-my $dheader=<<"EOS";
-From: Keith Moore <moore\@cs.utk.edu>
-To: Keld J\xF8rn Simonsen <keld\@dkuug.dk>
-CC: Andr\xE9 Pirard <PIRARD\@vm1.ulg.ac.be>
-Subject: If you can read this you understand the example.
-EOS
-
-is(Encode::decode('MIME-Header', $eheader), $dheader, "decode ASCII (RFC2047)");
use utf8;
+use charnames ":full";
+
+use Test::More tests => 266;
+
+BEGIN {
+ use_ok("Encode::MIME::Header");
+}
+
+my @decode_long_tests;
+if ($] < 5.009004) { # perl versions without Regular expressions Engine de-recursivised which cause stack overflow
+ push(@decode_long_tests, "a" x 1000000 => "a" x 1000000);
+ push(@decode_long_tests, "=?utf-8?Q?a?= " x 400 => "a" x 400 . " ");
+ push(@decode_long_tests, "=?utf-8?Q?a?= =?US-ASCII?Q?b?= " x 200 => "ab" x 200 . " ");
+} else {
+ push(@decode_long_tests, "a" x 1000000 => "a" x 1000000);
+ push(@decode_long_tests, "=?utf-8?Q?a?= " x 10000 => "a" x 10000 . " ");
+ push(@decode_long_tests, "=?utf-8?Q?a?= =?US-ASCII?Q?b?= " x 10000 => "ab" x 10000 . " ");
+}
+
+my @decode_tests = (
+ # RFC2047 p.5
+ "=?iso-8859-1?q?this=20is=20some=20text?=" => "this is some text",
+ # RFC2047 p.10
+ "=?US-ASCII?Q?Keith_Moore?=" => "Keith Moore",
+ "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" => "Keld Jørn Simonsen",
+ "=?ISO-8859-1?Q?Andr=E9?= Pirard" => "André Pirard",
+ "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" => "If you can read this you understand the example.",
+ "=?ISO-8859-1?Q?Olle_J=E4rnefors?=" => "Olle Järnefors",
+ "=?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?=" => "Patrik Fältström",
+ # RFC2047 p.11
+ "(=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)" => "(םולש ןב ילטפנ)",
+ "(=?ISO-8859-1?Q?a?=)" => "(a)",
+ "(=?ISO-8859-1?Q?a?= b)" => "(a b)",
+ "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)" => "(ab)",
+ "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)" => "(ab)",
+ "(=?ISO-8859-1?Q?a?=\r\n\t=?ISO-8859-1?Q?b?=)" => "(ab)",
+ # RFC2047 p.12
+ "(=?ISO-8859-1?Q?a_b?=)" => '(a b)',
+ "(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)" => "(a b)",
+ # RFC2231 p.6
+ "=?US-ASCII*EN?Q?Keith_Moore?=" => "Keith Moore",
+ # others
+ "=?US-ASCII*en-US?Q?Keith_Moore?=" => "Keith Moore",
+ "=?ISO-8859-1*da-DK?Q?Keld_J=F8rn_Simonsen?=" => "Keld Jørn Simonsen",
+ "=?ISO-8859-1*fr-BE?Q?Andr=E9?= Pirard" => "André Pirard",
+ "=?ISO-8859-1*en?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" => "If you can read this you understand the example.",
+ # multiple (separated by CRLF)
+ "=?US-ASCII?Q?a?=\r\n=?US-ASCII?Q?b?=" => "a\r\nb",
+ "a\r\nb" => "a\r\nb",
+ "a\r\n\r\nb" => "a\r\n\r\nb",
+ "a\r\n\r\nb\r\n" => "a\r\n\r\nb\r\n",
+ # multiple multiline (separated by CRLF)
+ "=?US-ASCII?Q?a?=\r\n =?US-ASCII?Q?b?=\r\n=?US-ASCII?Q?c?=" => "ab\r\nc",
+ "a\r\n b\r\nc" => "a b\r\nc",
+ # RT67569
+ "foo =?us-ascii?q?bar?=" => "foo bar",
+ "foo\r\n =?us-ascii?q?bar?=" => "foo bar",
+ "=?us-ascii?q?foo?= bar" => "foo bar",
+ "=?us-ascii?q?foo?=\r\n bar" => "foo bar",
+ "foo bar" => "foo bar",
+ "foo\r\n bar" => "foo bar",
+ "=?us-ascii?q?foo?= =?us-ascii?q?bar?=" => "foobar",
+ "=?us-ascii?q?foo?=\r\n =?us-ascii?q?bar?=" => "foobar",
+ # RT40027
+ "a: b\r\n c" => "a: b c",
+ # RT104422
+ "=?utf-8?Q?pre?= =?utf-8?B?IGZvbw==?=\r\n =?utf-8?Q?bar?=" => "pre foobar",
+ # RT114034 - replace invalid UTF-8 sequence with unicode replacement character
+ "=?utf-8?Q?=f9=80=80=80=80?=" => "�",
+ "=?utf-8?Q?=28=c3=29?=" => "(�)",
+ # decode only known MIME charsets, do not crash on invalid
+ "prefix =?unknown?Q?a=20b=20c?= middle =?US-ASCII?Q?d=20e=20f?= suffix" => "prefix =?unknown?Q?a=20b=20c?= middle d e f suffix",
+ "prefix =?US-ASCII?Q?a_b_c?= =?unknown?Q?d_e_f?= suffix" => "prefix a b c =?unknown?Q?d_e_f?= suffix",
+ "prefix =?US-ASCII?Q?a_b_c?= =?unknown?Q?d_e_f?= =?US-ASCII?Q?g_h_i?= suffix" => "prefix a b c =?unknown?Q?d_e_f?= g h i suffix",
+ # long strings
+ @decode_long_tests,
+ # separators around encoded words
+ "\r\n =?US-ASCII?Q?a?=" => " a",
+ "\r\n (=?US-ASCII?Q?a?=)" => " (a)",
+ "\r\n (=?US-ASCII?Q?a?=)\r\n " => " (a) ",
+ "(=?US-ASCII?Q?a?=)\r\n " => "(a) ",
+ " (=?US-ASCII?Q?a?=) " => " (a) ",
+ "(=?US-ASCII?Q?a?=) " => "(a) ",
+ " (=?US-ASCII?Q?a?=)" => " (a)",
+ "(=?US-ASCII?Q?a?=)(=?US-ASCII?Q?b?=)" => "(a)(b)",
+ "(=?US-ASCII?Q?a?=) (=?US-ASCII?Q?b?=)" => "(a) (b)",
+ "(=?US-ASCII?Q?a?=)\r\n (=?US-ASCII?Q?b?=)" => "(a) (b)",
+ "\r\n (=?US-ASCII?Q?a?=)\r\n (=?US-ASCII?Q?b?=)\r\n " => " (a) (b) ",
+ "\r\n(=?US-ASCII?Q?a?=)\r\n(=?US-ASCII?Q?b?=)" => "\r\n(a)\r\n(b)",
+);
+
+my @decode_default_tests = (
+ @decode_tests,
+ "=?us-ascii?q?foo bar?=" => "foo bar",
+ "=?us-ascii?q?foo\r\n bar?=" => "foo bar",
+ '=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?=' => 'foo <bar@baz.foo> bar',
+ '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="' => '"foo <bar@baz.foo> bar"',
+ "=?us-ascii?q?foo?==?us-ascii?q?bar?=" => "foobar",
+ "foo=?us-ascii?q?bar?=" => "foobar",
+ "foo =?us-ascii?q?=20?==?us-ascii?q?bar?=" => "foo bar",
+ # Encode::MIME::Header pre 2.83
+ "[=?UTF-8?B?ZsOzcnVt?=]=?UTF-8?B?IHNwcsOhdmE=?=" => "[fórum] správa",
+ "test:=?UTF-8?B?IHNwcsOhdmE=?=" => "test: správa",
+ "=?UTF-8?B?dMOpc3Q=?=:=?UTF-8?B?IHNwcsOhdmE=?=", "tést: správa",
+ # multiple base64 parts in one b word
+ "=?us-ascii?b?Zg==Zg==?=" => "ff",
+ # b word with invalid characters
+ "=?us-ascii?b?Zm!!9!v?=" => "foo",
+ # concat consecutive words (with same parameters) and join them into one utf-8 symbol
+ "=?UTF-8?Q?=C3?= =?UTF-8?Q?=A1?=" => "á",
+ # RT114034 - use strict UTF-8 decoder for invalid MIME charsets utf8, UTF8 and utf-8-strict
+ "=?utf8?Q?=C3=A1=f9=80=80=80=80?=" => "á�",
+ "=?UTF8?Q?=C3=A1=f9=80=80=80=80?=" => "á�",
+ "=?utf-8-strict?Q?=C3=A1=f9=80=80=80=80?=" => "á�",
+ # allow non-ASCII characters in q word
+ "=?UTF-8?Q?\x{C3}\x{A1}?=" => "á",
+);
+
+my @decode_strict_tests = (
+ @decode_tests,
+ "=?us-ascii?q?foo bar?=" => "=?us-ascii?q?foo bar?=",
+ "=?us-ascii?q?foo\r\n bar?=" => "=?us-ascii?q?foo bar?=",
+ '=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?=' => 'foo <bar@baz.foo> bar',
+ '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="' => '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="',
+ # do not decode invalid q words
+ "=?us-ascii?q?foo=?=" => "=?us-ascii?q?foo=?=",
+ "=?us-ascii?q?foo=?= =?us-ascii?q?foo?=" => "=?us-ascii?q?foo=?= foo",
+ # do not decode invalid b words
+ "=?us-ascii?b?----?=" => "=?us-ascii?b?----?=",
+ "=?us-ascii?b?Zm8=-?= =?us-ascii?b?Zm9v?= and =?us-ascii?b?Zg==?=" => "=?us-ascii?b?Zm8=-?= foo and f",
+ "=?us-ascii?b?----?= =?us-ascii?b?Zm9v?= and =?us-ascii?b?Zg==?=" => "=?us-ascii?b?----?= foo and f",
+ # RT114034 - utf8, UTF8 and also utf-8-strict are invalid MIME charset, do not decode it
+ "=?utf8?Q?=C3=A1?=" => "=?utf8?Q?=C3=A1?=",
+ "=?UTF8?Q?=C3=A1?=" => "=?UTF8?Q?=C3=A1?=",
+ "=?utf-8-strict?Q?=C3=A1?=" => "=?utf-8-strict?Q?=C3=A1?=",
+ # do not allow non-ASCII characters in q word
+ "=?UTF-8?Q?\x{C3}\x{A1}?=" => "=?UTF-8?Q?\x{C3}\x{A1}?=",
+);
+
+my @encode_tests = (
+ "小飼 弾" => "=?UTF-8?B?5bCP6aO8IOW8vg==?=", "=?UTF-8?Q?=E5=B0=8F=E9=A3=BC_=E5=BC=BE?=",
+ "漢字、カタカナ、ひらがなを含む、非常に長いタイトル行が一体全体どのようにしてEncodeされるのか?" => "=?UTF-8?B?5ryi5a2X44CB44Kr44K/44Kr44OK44CB44Gy44KJ44GM44Gq44KS5ZCr44KA?=\r\n =?UTF-8?B?44CB6Z2e5bi444Gr6ZW344GE44K/44Kk44OI44Or6KGM44GM5LiA5L2T5YWo?=\r\n =?UTF-8?B?5L2T44Gp44Gu44KI44GG44Gr44GX44GmRW5jb2Rl44GV44KM44KL44Gu44GL?=\r\n =?UTF-8?B?77yf?=", "=?UTF-8?Q?=E6=BC=A2=E5=AD=97=E3=80=81=E3=82=AB=E3=82=BF=E3=82=AB=E3=83=8A?=\r\n =?UTF-8?Q?=E3=80=81=E3=81=B2=E3=82=89=E3=81=8C=E3=81=AA=E3=82=92=E5=90=AB?=\r\n =?UTF-8?Q?=E3=82=80=E3=80=81=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84?=\r\n =?UTF-8?Q?=E3=82=BF=E3=82=A4=E3=83=88=E3=83=AB=E8=A1=8C=E3=81=8C=E4=B8=80?=\r\n =?UTF-8?Q?=E4=BD=93=E5=85=A8=E4=BD=93=E3=81=A9=E3=81=AE=E3=82=88=E3=81=86?=\r\n =?UTF-8?Q?=E3=81=AB=E3=81=97=E3=81=A6Encode=E3=81=95=E3=82=8C=E3=82=8B?=\r\n =?UTF-8?Q?=E3=81=AE=E3=81=8B=EF=BC=9F?=",
+ # double encode
+ "What is =?UTF-8?B?w4RwZmVs?= ?" => "=?UTF-8?B?V2hhdCBpcyA9P1VURi04P0I/dzRSd1ptVnM/PSA/?=", "=?UTF-8?Q?What_is_=3D=3FUTF-8=3FB=3Fw4RwZmVs=3F=3D_=3F?=",
+ # pound 1024
+ "\N{POUND SIGN}1024" => "=?UTF-8?B?wqMxMDI0?=", "=?UTF-8?Q?=C2=A31024?=",
+ # latin1 characters
+ "\x{fc}" => "=?UTF-8?B?w7w=?=", "=?UTF-8?Q?=C3=BC?=",
+ # RT42627
+ Encode::decode_utf8("\x{c2}\x{a3}xxxxxxxxxxxxxxxxxxx0") => "=?UTF-8?B?wqN4eHh4eHh4eHh4eHh4eHh4eHh4MA==?=", "=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx0?=",
+ # RT87831
+ "0" => "=?UTF-8?B?MA==?=", "=?UTF-8?Q?0?=",
+ # RT88717
+ "Hey foo\x{2024}bar:whee" => "=?UTF-8?B?SGV5IGZvb+KApGJhcjp3aGVl?=", "=?UTF-8?Q?Hey_foo=E2=80=A4bar=3Awhee?=",
+ # valid q chars
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !*+-/" => "=?UTF-8?B?MDEyMzQ1Njc4OUFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaYWJjZGVmZ2hp?=\r\n =?UTF-8?B?amtsbW5vcHFyc3R1dnd4eXogISorLS8=?=", "=?UTF-8?Q?0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_?=\r\n =?UTF-8?Q?!*+-/?=",
+ # invalid q chars
+ "." => "=?UTF-8?B?Lg==?=", "=?UTF-8?Q?=2E?=",
+ "," => "=?UTF-8?B?LA==?=", "=?UTF-8?Q?=2C?=",
+ # long ascii sequence
+ "a" x 100 => "=?UTF-8?B?YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh?=\r\n =?UTF-8?B?YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh?=\r\n =?UTF-8?B?YWFhYWFhYWFhYQ==?=", "=?UTF-8?Q?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa?=\r\n =?UTF-8?Q?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa?=",
+ # long unicode sequence
+ "😀" x 100 => "=?UTF-8?B?8J+YgPCfmIDwn5iA8J+YgPCfmIDwn5iA8J+YgPCfmIDwn5iA8J+YgPCfmIA=?=\r\n " x 9 . "=?UTF-8?B?8J+YgA==?=", join("\r\n ", ("=?UTF-8?Q?=F0=9F=98=80=F0=9F=98=80=F0=9F=98=80=F0=9F=98=80=F0=9F=98=80?=") x 20),
+);
+
+sub info {
+ my ($str, $str1, $str2) = @_;
+ substr $str1, 1000, -3, "..." if defined $str1 and length $str1 > 1000;
+ substr $str2, 1000, -3, "..." if defined $str2 and length $str2 > 1000;
+ $str .= ": $str1" if defined $str1;
+ $str .= " => $str2" if defined $str2;
+ $str = Encode::encode_utf8($str);
+ $str =~ s/\r/\\r/gs;
+ $str =~ s/\n/\\n/gs;
+ return $str;
+}
+
+sub check_length {
+ my ($str) = @_;
+ my @lines = split /\r\n /, $str;
+ my @long = grep { length($_) > 75 } @lines;
+ return scalar @long == 0;
+}
+
+my @splice;
+
+@splice = @encode_tests;
+while (my ($d, $b, $q) = splice @splice, 0, 3) {
+ is Encode::encode("MIME-Header", $d) => $b, info("encode default", $d => $b);
+ is Encode::encode("MIME-B", $d) => $b, info("encode base64", $d => $b);
+ is Encode::encode("MIME-Q", $d) => $q, info("encode qp", $d => $q);
+ is Encode::decode("MIME-B", $b) => $d, info("decode base64", $b => $d);
+ is Encode::decode("MIME-Q", $q) => $d, info("decode qp", $b => $d);
+ ok check_length($b), info("correct encoded length base64", $b);
+ ok check_length($q), info("correct encoded length qp", $q);
+}
+
+@splice = @decode_default_tests;
+while (my ($e, $d) = splice @splice, 0, 2) {
+ is Encode::decode("MIME-Header", $e) => $d, info("decode default", $e => $d);
+}
+
+local $Encode::MIME::Header::STRICT_DECODE = 1;
+
+@splice = @decode_strict_tests;
+while (my ($e, $d) = splice @splice, 0, 2) {
+ is Encode::decode("MIME-Header", $e) => $d, info("decode strict", $e => $d);
+}
+
+my $valid_unicode = "á";
+my $invalid_unicode = "\x{1000000}";
+{
+ my $input = $valid_unicode;
+ my $output = Encode::encode("MIME-Header", $input, Encode::FB_QUIET);
+ is $output => Encode::encode("MIME-Header", $valid_unicode), "encode valid with FB_QUIET flag: output string is valid";
+ is $input => "", "encode valid with FB_QUIET flag: input string is modified and empty";
+}
+{
+ my $input = $valid_unicode . $invalid_unicode;
+ my $output = Encode::encode("MIME-Header", $input, Encode::FB_QUIET);
+ is $output => Encode::encode("MIME-Header", $valid_unicode), "encode with FB_QUIET flag: output string stops before first invalid character";
+ is $input => $invalid_unicode, "encode with FB_QUIET flag: input string is modified and starts with first invalid character";
+}
+{
+ my $input = $valid_unicode . $invalid_unicode;
+ my $output = Encode::encode("MIME-Header", $input, Encode::FB_QUIET | Encode::LEAVE_SRC);
+ is $output => Encode::encode("MIME-Header", $valid_unicode), "encode with FB_QUIET and LEAVE_SRC flags: output string stops before first invalid character";
+ is $input => $valid_unicode . $invalid_unicode, "encode with FB_QUIET and LEAVE_SRC flags: input string is not modified";
+}
+{
+ my $input = $valid_unicode . $invalid_unicode;
+ my $output = Encode::encode("MIME-Header", $input, Encode::FB_PERLQQ);
+ is $output => Encode::encode("MIME-Header", $valid_unicode . '\x{1000000}'), "encode with FB_PERLQQ flag: output string contains perl qq representation of invalid character";
+ is $input => $valid_unicode . $invalid_unicode, "encode with FB_PERLQQ flag: input string is not modified";
+}
+{
+ my $input = $valid_unicode;
+ my $output = Encode::encode("MIME-Header", $input, sub { sprintf("!0x%X!", $_[0]) });
+ is $output => Encode::encode("MIME-Header", $valid_unicode), "encode valid with coderef check: output string is valid";
+ is $input => $valid_unicode, "encode valid with coderef check: input string is not modified";
+}
+{
+ my $input = $valid_unicode . $invalid_unicode;
+ my $output = Encode::encode("MIME-Header", $input, sub { sprintf("!0x%X!", $_[0]) });
+ is $output => Encode::encode("MIME-Header", $valid_unicode . '!0x1000000!'), "encode with coderef check: output string contains output from coderef";
+ is $input => $valid_unicode . $invalid_unicode, "encode with coderef check: input string is not modified";
+}
+
+my $valid_mime = "=?US-ASCII?Q?d=20e=20f?=";
+my $invalid_mime = "=?unknown?Q?a=20b=20c?=";
+my $invalid_mime_unicode = "=?utf-8?Q?=28=c3=29?=";
+{
+ my $input = $valid_mime;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_QUIET);
+ is $output => Encode::decode("MIME-Header", $valid_mime), "decode valid with FB_QUIET flag: output string is valid";
+ is $input => "", "decode valid with FB_QUIET flag: input string is modified and empty";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_QUIET);
+ is $output => Encode::decode("MIME-Header", $valid_mime), "decode with FB_QUIET flag: output string stops before first mime word with unknown charset";
+ is $input => $invalid_mime, "decode with FB_QUIET flag: input string is modified and starts with first mime word with unknown charset";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime_unicode;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_QUIET);
+ is $output => Encode::decode("MIME-Header", $valid_mime), "decode with FB_QUIET flag: output string stops before first mime word with invalid unicode character";
+ is $input => $invalid_mime_unicode, "decode with FB_QUIET flag: input string is modified and starts with first mime word with invalid unicode character";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_QUIET | Encode::LEAVE_SRC);
+ is $output => Encode::decode("MIME-Header", $valid_mime), "decode with FB_QUIET and LEAVE_SRC flags: output string stops before first mime word with unknown charset";
+ is $input => $valid_mime . " " . $invalid_mime, "decode with FB_QUIET flag: input string is not modified";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime_unicode;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_QUIET | Encode::LEAVE_SRC);
+ is $output => Encode::decode("MIME-Header", $valid_mime), "decode with FB_QUIET and LEAVE_SRC flags: output string stops before first mime word with invalid unicode character";
+ is $input => $valid_mime . " " . $invalid_mime_unicode, "decode with FB_QUIET flag: input string is not modified";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_PERLQQ);
+ is $output => Encode::decode("MIME-Header", $valid_mime) . " " . $invalid_mime, "decode with FB_PERLQQ flag: output string contains unmodified mime word with unknown charset";
+ is $input => $valid_mime . " " . $invalid_mime, "decode with FB_QUIET flag: input string is not modified";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime_unicode;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_PERLQQ);
+ is $output => Encode::decode("MIME-Header", $valid_mime) . '(\xC3)', "decode with FB_PERLQQ flag: output string contains perl qq representation of invalid unicode character";
+ is $input => $valid_mime . " " . $invalid_mime_unicode, "decode with FB_QUIET flag: input string is not modified";
+}
+{
+ my $input = $valid_mime;
+ my $output = Encode::decode("MIME-Header", $input, sub { sprintf("!0x%X!", $_[0]) });
+ is $output => Encode::decode("MIME-Header", $valid_mime), "decode valid with coderef check: output string is valid";
+ is $input => $valid_mime, "decode valid with coderef check: input string is not modified";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime;
+ my $output = Encode::decode("MIME-Header", $input, sub { sprintf("!0x%X!", $_[0]) });
+ is $output => Encode::decode("MIME-Header", $valid_mime) . " " . $invalid_mime, "decode with coderef check: output string contains unmodified mime word with unknown charset";
+ is $input => $valid_mime . " " . $invalid_mime, "decode with coderef check: input string is not modified";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime_unicode;
+ my $output = Encode::decode("MIME-Header", $input, sub { sprintf("!0x%X!", $_[0]) });
+ is $output => Encode::decode("MIME-Header", $valid_mime) . '(!0xC3!)', "decode with coderef check: output string contains output from coderef for invalid unicode character";
+ is $input => $valid_mime . " " . $invalid_mime_unicode, "decode with coderef check: input string is not modified";
+}
-my $uheader =<<'EOS';
-From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
-To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
-CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
-Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
- =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
-EOS
-
-is(Encode::decode('MIME-Header', $uheader), $dheader, "decode UTF-8 (RFC2047)");
-
-my $lheader =<<'EOS';
-From: =?US-ASCII*en-US?Q?Keith_Moore?= <moore@cs.utk.edu>
-To: =?ISO-8859-1*da-DK?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
-CC: =?ISO-8859-1*fr-BE?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
-Subject: =?ISO-8859-1*en?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
- =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
-EOS
-
-is(Encode::decode('MIME-Header', $lheader), $dheader, "decode language tag (RFC2231)");
-
-
-$dheader=<<'EOS';
-From: 小飼 弾 <dankogai@dan.co.jp>
-To: dankogai@dan.co.jp (小飼=Kogai, 弾=Dan)
-Subject: 漢字、カタカナ、ひらがなを含む、非常に長いタイトル行が一体全体どのようにしてEncodeされるのか?
-EOS
-
-my $bheader =<<'EOS';
-From: =?UTF-8?B?5bCP6aO8IOW8viA8ZGFua29nYWlAZGFuLmNvLmpwPg==?=
-To: =?UTF-8?B?ZGFua29nYWlAZGFuLmNvLmpwICjlsI/po7w9S29nYWksIOW8vj1EYW4p?=
-Subject:
- =?UTF-8?B?5ryi5a2X44CB44Kr44K/44Kr44OK44CB44Gy44KJ44GM44Gq44KS5ZCr44KA?=
- =?UTF-8?B?44CB6Z2e5bi444Gr6ZW344GE44K/44Kk44OI44Or6KGM44GM5LiA5L2T5YWo?=
- =?UTF-8?B?5L2T44Gp44Gu44KI44GG44Gr44GX44GmRW5jb2Rl44GV44KM44KL44Gu44GL?=
- =?UTF-8?B?77yf?=
-EOS
-
-my $qheader=<<'EOS';
-From: =?UTF-8?Q?=E5=B0=8F=E9=A3=BC=20=E5=BC=BE=20=3Cdankogai=40?=
- =?UTF-8?Q?dan=2Eco=2Ejp=3E?=
-To: =?UTF-8?Q?dankogai=40dan=2Eco=2Ejp=20=28?=
- =?UTF-8?Q?=E5=B0=8F=E9=A3=BC=3DKogai=2C=20=E5=BC=BE=3DDan?= =?UTF-8?Q?=29?=
-Subject:
- =?UTF-8?Q?=E6=BC=A2=E5=AD=97=E3=80=81=E3=82=AB=E3=82=BF=E3=82=AB=E3=83=8A?=
- =?UTF-8?Q?=E3=80=81=E3=81=B2=E3=82=89=E3=81=8C=E3=81=AA=E3=82=92=E5=90=AB?=
- =?UTF-8?Q?=E3=82=80=E3=80=81=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84?=
- =?UTF-8?Q?=E3=82=BF=E3=82=A4=E3=83=88=E3=83=AB=E8=A1=8C=E3=81=8C=E4=B8=80?=
- =?UTF-8?Q?=E4=BD=93=E5=85=A8=E4=BD=93=E3=81=A9=E3=81=AE=E3=82=88=E3=81=86?=
- =?UTF-8?Q?=E3=81=AB=E3=81=97=E3=81=A6Encode=E3=81=95=E3=82=8C?=
- =?UTF-8?Q?=E3=82=8B=E3=81=AE=E3=81=8B=EF=BC=9F?=
-EOS
-
-is(Encode::decode('MIME-Header', $bheader), $dheader, "decode B");
-is(Encode::decode('MIME-Header', $qheader), $dheader, "decode Q");
-is(Encode::encode('MIME-B', $dheader)."\n", $bheader, "encode B");
-is(Encode::encode('MIME-Q', $dheader)."\n", $qheader, "encode Q");
-
-$dheader = "What is =?UTF-8?B?w4RwZmVs?= ?";
-$bheader = "=?UTF-8?B?V2hhdCBpcyA9P1VURi04P0I/dzRSd1ptVnM/PSA/?=";
-$qheader = "=?UTF-8?Q?What=20is=20=3D=3FUTF=2D8=3FB=3Fw4R?="
- . "\n " . "=?UTF-8?Q?wZmVs=3F=3D=20=3F?=";
-is(Encode::encode('MIME-B', $dheader), $bheader, "Double decode B");
-is(Encode::encode('MIME-Q', $dheader), $qheader, "Double decode Q");
-{
- # From: Dave Evans <dave@rudolf.org.uk>
- # Subject: Bug in Encode::MIME::Header
- # Message-Id: <3F43440B.7060606@rudolf.org.uk>
- use charnames ":full";
- my $pound_1024 = "\N{POUND SIGN}1024";
- is(Encode::encode('MIME-Q' => $pound_1024), '=?UTF-8?Q?=C2=A31024?=',
- 'pound 1024');
-}
-
-is(Encode::encode('MIME-Q', "\x{fc}"), '=?UTF-8?Q?=C3=BC?=', 'Encode latin1 characters');
-
-# RT42627
-
-my $rt42627 = Encode::decode_utf8("\x{c2}\x{a3}xxxxxxxxxxxxxxxxxxx0");
-is(Encode::encode('MIME-Q', $rt42627),
- '=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx?= =?UTF-8?Q?0?=',
- 'MIME-Q encoding does not truncate trailing zeros');
-
-# RT87831
-is(Encode::encode('MIME-Header', '0'), '=?UTF-8?B?MA==?=', 'RT87831');
-__END__;
+__END__
diff --git a/gnu/usr.bin/perl/cpan/Encode/t/mime-name.t b/gnu/usr.bin/perl/cpan/Encode/t/mime-name.t
index 02ff49053a9..cec86c0362a 100755
--- a/gnu/usr.bin/perl/cpan/Encode/t/mime-name.t
+++ b/gnu/usr.bin/perl/cpan/Encode/t/mime-name.t
@@ -1,5 +1,5 @@
#
-# $Id: mime-name.t,v 1.1 2007/05/12 06:42:19 dankogai Exp $
+# $Id: mime-name.t,v 1.3 2017/10/06 22:21:53 dankogai Exp $
# This script is written in utf8
#
BEGIN {
@@ -23,14 +23,40 @@ use strict;
use warnings;
use Encode;
#use Test::More qw(no_plan);
-use Test::More tests => 68;
+use Test::More tests => 281;
+
+BEGIN {
+ use_ok("Encode::MIME::Name");
+}
-use_ok("Encode::MIME::Name");
for my $canon ( sort keys %Encode::MIME::Name::MIME_NAME_OF ) {
my $enc = find_encoding($canon);
my $mime_name = $Encode::MIME::Name::MIME_NAME_OF{$canon};
is $enc->mime_name, $mime_name,
- qq(\$enc->mime_name("$canon") eq $mime_name);
+ qq(find_encoding($canon)->mime_name eq $mime_name);
+ is $enc->name, $canon,
+ qq(find_encoding($canon)->name eq $canon);
+}
+for my $mime_name ( sort keys %Encode::MIME::Name::ENCODE_NAME_OF ) {
+ my $enc = find_mime_encoding($mime_name);
+ my $canon = $Encode::MIME::Name::ENCODE_NAME_OF{$mime_name};
+ my $mime_name = $Encode::MIME::Name::MIME_NAME_OF{$canon};
+ is $enc->mime_name, $mime_name,
+ qq(find_mime_encoding($mime_name)->mime_name eq $mime_name);
+ is $enc->name, $canon,
+ qq(find_mime_encoding($mime_name)->name eq $canon);
}
+ok find_encoding("utf8");
+ok find_encoding("UTF8");
+ok find_encoding("utf-8-strict");
+ok find_encoding("utf-8");
+ok find_encoding("UTF-8");
+
+ok not find_mime_encoding("utf8");
+ok not find_mime_encoding("UTF8");
+ok not find_mime_encoding("utf-8-strict");
+ok find_mime_encoding("utf-8");
+ok find_mime_encoding("UTF-8");
+
__END__;
diff --git a/gnu/usr.bin/perl/cpan/Encode/t/taint.t b/gnu/usr.bin/perl/cpan/Encode/t/taint.t
index 2446dd76d37..6fa46bd957e 100644
--- a/gnu/usr.bin/perl/cpan/Encode/t/taint.t
+++ b/gnu/usr.bin/perl/cpan/Encode/t/taint.t
@@ -1,13 +1,17 @@
#!/usr/bin/perl -T
use strict;
use Encode qw(encode decode);
+local %Encode::ExtModule = %Encode::Config::ExtModule;
use Scalar::Util qw(tainted);
use Test::More;
my $taint = substr($ENV{PATH},0,0);
my $str = "dan\x{5f3e}" . $taint; # tainted string to encode
my $bin = encode('UTF-8', $str); # tainted binary to decode
+my $notaint = "";
+my $notaint_str = "dan\x{5f3e}" . $notaint;
+my $notaint_bin = encode('UTF-8', $notaint_str);
my @names = Encode->encodings(':all');
-plan tests => 2 * @names;
+plan tests => 4 * @names + 2;
for my $name (@names) {
my ($d, $e, $s);
eval {
@@ -26,3 +30,25 @@ for my $name (@names) {
ok tainted($d), "decode $name";
}
}
+for my $name (@names) {
+ my ($d, $e, $s);
+ eval {
+ $e = encode($name, $notaint_str);
+ };
+ SKIP: {
+ skip $@, 1 if $@;
+ ok ! tainted($e), "encode $name";
+ }
+ $notaint_bin = $e.$notaint if $e;
+ eval {
+ $d = decode($name, $notaint_bin);
+ };
+ SKIP: {
+ skip $@, 1 if $@;
+ ok ! tainted($d), "decode $name";
+ }
+}
+Encode::_utf8_on($bin);
+ok(!Encode::is_utf8($bin), "Encode::_utf8_on does not work on tainted values");
+Encode::_utf8_off($str);
+ok(Encode::is_utf8($str), "Encode::_utf8_off does not work on tainted values");