summaryrefslogtreecommitdiffstats
path: root/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm
diff options
context:
space:
mode:
authorsthen <sthen@openbsd.org>2013-03-25 20:06:16 +0000
committersthen <sthen@openbsd.org>2013-03-25 20:06:16 +0000
commit898184e3e61f9129feb5978fad5a8c6865f00b92 (patch)
tree56f32aefc1eed60b534611007c7856f82697a205 /gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm
parentPGSHIFT -> PAGE_SHIFT (diff)
downloadwireguard-openbsd-898184e3e61f9129feb5978fad5a8c6865f00b92.tar.xz
wireguard-openbsd-898184e3e61f9129feb5978fad5a8c6865f00b92.zip
import perl 5.16.3 from CPAN - worked on by Andrew Fresh and myself
Diffstat (limited to 'gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm')
-rw-r--r--gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm173
1 files changed, 130 insertions, 43 deletions
diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm
index 6a8fb7e97e4..9d31db0badd 100644
--- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm
+++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm
@@ -23,13 +23,29 @@ This is a subclass of L<Pod::Simple::Methody> and inherits all its
methods. The implementation is entirely different than
L<Pod::Simple::HTML>, but it largely preserves the same interface.
+=head2 Minimal code
+
+ use Pod::Simple::XHTML;
+ my $psx = Pod::Simple::XHTML->new;
+ $psx->output_string(\my $html);
+ $psx->parse_file('path/to/Module/Name.pm');
+ open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n";
+ print $out $html;
+
+You can also control the character encoding and entities. For example, if
+you're sure that the POD is properly encoded (using the C<=encoding> command),
+you can prevent high-bit characters from being encoded as HTML entities and
+declare the output character set as UTF-8 before parsing, like so:
+
+ $psx->html_charset('UTF-8');
+ $psx->html_encode_chars('&<>">');
+
=cut
package Pod::Simple::XHTML;
use strict;
use vars qw( $VERSION @ISA $HAS_HTML_ENTITIES );
-$VERSION = '3.14';
-use Carp ();
+$VERSION = '3.20';
use Pod::Simple::Methody ();
@ISA = ('Pod::Simple::Methody');
@@ -46,10 +62,17 @@ my %entities = (
);
sub encode_entities {
- return HTML::Entities::encode_entities( $_[0] ) if $HAS_HTML_ENTITIES;
+ my $self = shift;
+ my $ents = $self->html_encode_chars;
+ return HTML::Entities::encode_entities( $_[0], $ents ) if $HAS_HTML_ENTITIES;
+ if (defined $ents) {
+ $ents =~ s,(?<!\\)([]/]),\\$1,g;
+ $ents =~ s,(?<!\\)\\\z,\\\\,;
+ } else {
+ $ents = join '', keys %entities;
+ }
my $str = $_[0];
- my $ents = join '', keys %entities;
- $str =~ s/([$ents])/'&' . $entities{$1} . ';'/ge;
+ $str =~ s/([$ents])/'&' . ($entities{$1} || sprintf '#x%X', ord $1) . ';'/ge;
return $str;
}
@@ -107,6 +130,12 @@ not set by default.
A document type tag for the file. This option is not set by default.
+=head2 html_charset
+
+The charater set to declare in the Content-Type meta tag created by default
+for C<html_header_tags>. Note that this option will be ignored if the value of
+C<html_header_tags> is changed. Defaults to "ISO-8859-1".
+
=head2 html_header_tags
Additional arbitrary HTML tags for the header of the document. The
@@ -117,6 +146,15 @@ default value is just a content type header tag:
Add additional meta tags here, or blocks of inline CSS or JavaScript
(wrapped in the appropriate tags).
+=head3 html_encode_chars
+
+A string containing all characters that should be encoded as HTML entities,
+specified using the regular expression character class syntax (what you find
+within brackets in regular expressions). This value will be passed as the
+second argument to the C<encode_entities> fuction of L<HTML::Entities>. IF
+L<HTML::Entities> is not installed, then any characters other than C<&<>"'>
+will be encoded numerically.
+
=head2 html_h_level
This is the level of HTML "Hn" element to which a Pod "head1" corresponds. For
@@ -156,6 +194,16 @@ to the empty string.
Whether to add a table-of-contents at the top of each page (called an
index for the sake of tradition).
+=head2 anchor_items
+
+Whether to anchor every definition C<=item> directive. This needs to be
+enabled if you want to be able to link to specific C<=item> directives, which
+are output as C<< <dt> >> elements. Disabled by default.
+
+=head2 backlink
+
+Whether to turn every =head1 directive into a link pointing to the top
+of the page (specifically, the opening body tag).
=cut
@@ -165,10 +213,11 @@ __PACKAGE__->_accessorize(
'man_url_prefix',
'man_url_postfix',
'title_prefix', 'title_postfix',
- 'html_css',
+ 'html_css',
'html_javascript',
'html_doctype',
- 'html_header_tags',
+ 'html_charset',
+ 'html_encode_chars',
'html_h_level',
'title', # Used internally for the title extracted from the content
'default_title',
@@ -176,6 +225,8 @@ __PACKAGE__->_accessorize(
'html_header',
'html_footer',
'index',
+ 'anchor_items',
+ 'backlink',
'batch_mode', # whether we're in batch mode
'batch_mode_current_level',
# When in batch mode, how deep the current module is: 1 for "LWP",
@@ -198,14 +249,15 @@ sub new {
$new->{'output_fh'} ||= *STDOUT{IO};
$new->perldoc_url_prefix('http://search.cpan.org/perldoc?');
$new->man_url_prefix('http://man.he.net/man');
- $new->html_header_tags('<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />');
+ $new->html_charset('ISO-8859-1');
$new->nix_X_codes(1);
$new->codes_in_verbatim(1);
$new->{'scratch'} = '';
$new->{'to_index'} = [];
$new->{'output'} = [];
$new->{'saved'} = [];
- $new->{'ids'} = {};
+ $new->{'ids'} = { '_podtop_' => 1 }; # used in <body>
+ $new->{'in_li'} = [];
$new->{'__region_targets'} = [];
$new->{'__literal_targets'} = {};
@@ -214,6 +266,14 @@ sub new {
return $new;
}
+sub html_header_tags {
+ my $self = shift;
+ return $self->{html_header_tags} = shift if @_;
+ return $self->{html_header_tags}
+ ||= '<meta http-equiv="Content-Type" content="text/html; charset='
+ . $self->html_charset . '" />';
+}
+
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
=head2 handle_text
@@ -225,7 +285,7 @@ want to override this if you are adding a custom element type that does
more than just display formatted text. Perhaps adding a way to generate
HTML tables from an extended version of POD.
-So, let's say you want add a custom element called 'foo'. In your
+So, let's say you want to add a custom element called 'foo'. In your
subclass's C<new> method, after calling C<SUPER::new> you'd call:
$new->accept_targets_as_text( 'foo' );
@@ -270,7 +330,7 @@ sub handle_text {
# escape special characters in HTML (<, >, &, etc)
$_[0]{'scratch'} .= $_[0]->__in_literal_xhtml_region
? $_[1]
- : encode_entities( $_[1] );
+ : $_[0]->encode_entities( $_[1] );
}
sub start_Para { $_[0]{'scratch'} = '<p>' }
@@ -282,28 +342,24 @@ sub start_head3 { $_[0]{'in_head'} = 3 }
sub start_head4 { $_[0]{'in_head'} = 4 }
sub start_item_number {
- $_[0]{'scratch'} = "</li>\n" if $_[0]{'in_li'};
+ $_[0]{'scratch'} = "</li>\n" if ($_[0]{'in_li'}->[-1] && pop @{$_[0]{'in_li'}});
$_[0]{'scratch'} .= '<li><p>';
- $_[0]{'in_li'} = 1
+ push @{$_[0]{'in_li'}}, 1;
}
sub start_item_bullet {
- $_[0]{'scratch'} = "</li>\n" if $_[0]{'in_li'};
+ $_[0]{'scratch'} = "</li>\n" if ($_[0]{'in_li'}->[-1] && pop @{$_[0]{'in_li'}});
$_[0]{'scratch'} .= '<li><p>';
- $_[0]{'in_li'} = 1
+ push @{$_[0]{'in_li'}}, 1;
}
sub start_item_text {
- if ($_[0]{'in_dd'}[ $_[0]{'dl_level'} ]) {
- $_[0]{'scratch'} = "</dd>\n";
- $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 0;
- }
- $_[0]{'scratch'} .= '<dt>';
+ # see end_item_text
}
-sub start_over_bullet { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
+sub start_over_bullet { $_[0]{'scratch'} = '<ul>'; push @{$_[0]{'in_li'}}, 0; $_[0]->emit }
sub start_over_block { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
-sub start_over_number { $_[0]{'scratch'} = '<ol>'; $_[0]->emit }
+sub start_over_number { $_[0]{'scratch'} = '<ol>'; push @{$_[0]{'in_li'}}, 0; $_[0]->emit }
sub start_over_text {
$_[0]{'scratch'} = '<dl>';
$_[0]{'dl_level'}++;
@@ -314,14 +370,16 @@ sub start_over_text {
sub end_over_block { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit }
sub end_over_number {
- $_[0]{'scratch'} = "</li>\n" if delete $_[0]{'in_li'};
+ $_[0]{'scratch'} = "</li>\n" if ( pop @{$_[0]{'in_li'}} );
$_[0]{'scratch'} .= '</ol>';
+ pop @{$_[0]{'in_li'}};
$_[0]->emit;
}
sub end_over_bullet {
- $_[0]{'scratch'} = "</li>\n" if delete $_[0]{'in_li'};
+ $_[0]{'scratch'} = "</li>\n" if ( pop @{$_[0]{'in_li'}} );
$_[0]{'scratch'} .= '</ul>';
+ pop @{$_[0]{'in_li'}};
$_[0]->emit;
}
@@ -352,7 +410,10 @@ sub _end_head {
my $id = $_[0]->idify($_[0]{scratch});
my $text = $_[0]{scratch};
- $_[0]{'scratch'} = qq{<h$h id="$id">$text</h$h>};
+ $_[0]{'scratch'} = $_[0]->backlink && ($h - $add == 0)
+ # backlinks enabled && =head1
+ ? qq{<a href="#_podtop_"><h$h id="$id">$text</h$h></a>}
+ : qq{<h$h id="$id">$text</h$h>};
$_[0]->emit;
push @{ $_[0]{'to_index'} }, [$h, $id, $text];
}
@@ -366,13 +427,27 @@ sub end_item_bullet { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
sub end_item_number { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
sub end_item_text {
- $_[0]{'scratch'} .= "</dt>\n<dd>";
+ # idify and anchor =item content if wanted
+ my $dt_id = $_[0]{'anchor_items'}
+ ? ' id="'. $_[0]->idify($_[0]{'scratch'}) .'"'
+ : '';
+
+ # reset scratch
+ my $text = $_[0]{scratch};
+ $_[0]{'scratch'} = '';
+
+ if ($_[0]{'in_dd'}[ $_[0]{'dl_level'} ]) {
+ $_[0]{'scratch'} = "</dd>\n";
+ $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 0;
+ }
+
+ $_[0]{'scratch'} .= qq{<dt$dt_id>$text</dt>\n<dd>};
$_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 1;
$_[0]->emit;
}
# This handles =begin and =for blocks of all kinds.
-sub start_for {
+sub start_for {
my ($self, $flags) = @_;
push @{ $self->{__region_targets} }, $flags->{target_matching};
@@ -386,7 +461,7 @@ sub start_for {
$self->emit;
}
-sub end_for {
+sub end_for {
my ($self) = @_;
$self->{'scratch'} .= '</div>' unless $self->__in_literal_xhtml_region;
@@ -395,24 +470,29 @@ sub end_for {
$self->emit;
}
-sub start_Document {
+sub start_Document {
my ($self) = @_;
if (defined $self->html_header) {
$self->{'scratch'} .= $self->html_header;
$self->emit unless $self->html_header eq "";
} else {
- my ($doctype, $title, $metatags);
+ my ($doctype, $title, $metatags, $bodyid);
$doctype = $self->html_doctype || '';
$title = $self->force_title || $self->title || $self->default_title || '';
$metatags = $self->html_header_tags || '';
- if ($self->html_css) {
- $metatags .= "\n<link rel='stylesheet' href='" .
- $self->html_css . "' type='text/css'>";
+ if (my $css = $self->html_css) {
+ $metatags .= $css;
+ if ($css !~ /<link/) {
+ # this is required to be compatible with Pod::Simple::BatchHTML
+ $metatags .= '<link rel="stylesheet" href="'
+ . $self->encode_entities($css) . '" type="text/css" />';
+ }
}
if ($self->html_javascript) {
- $metatags .= "\n<script type='text/javascript' src='" .
+ $metatags .= qq{\n<script type="text/javascript" src="} .
$self->html_javascript . "'></script>";
}
+ $bodyid = $self->backlink ? ' id="_podtop_"' : '';
$self->{'scratch'} .= <<"HTML";
$doctype
<html>
@@ -420,7 +500,7 @@ $doctype
<title>$title</title>
$metatags
</head>
-<body>
+<body$bodyid>
HTML
$self->emit;
}
@@ -500,10 +580,12 @@ sub end_I { $_[0]{'scratch'} .= '</i>' }
sub start_L {
my ($self, $flags) = @_;
my ($type, $to, $section) = @{$flags}{'type', 'to', 'section'};
- my $url = $type eq 'url' ? $to
+ my $url = $self->encode_entities(
+ $type eq 'url' ? $to
: $type eq 'pod' ? $self->resolve_pod_page_link($to, $section)
: $type eq 'man' ? $self->resolve_man_page_link($to, $section)
- : undef;
+ : undef
+ );
# If it's an unknown type, use an attribute-less <a> like HTML.pm.
$self->{'scratch'} .= '<a' . ($url ? ' href="'. $url . '">' : '>');
@@ -511,8 +593,8 @@ sub start_L {
sub end_L { $_[0]{'scratch'} .= '</a>' }
-sub start_S { $_[0]{'scratch'} .= '<nobr>' }
-sub end_S { $_[0]{'scratch'} .= '</nobr>' }
+sub start_S { $_[0]{'scratch'} .= '<span style="white-space: nowrap;">' }
+sub end_S { $_[0]{'scratch'} .= '</span>' }
sub emit {
my($self) = @_;
@@ -547,14 +629,14 @@ sub resolve_pod_page_link {
my ($self, $to, $section) = @_;
return undef unless defined $to || defined $section;
if (defined $section) {
- $section = '#' . $self->idify($section, 1);
+ $section = '#' . $self->idify($self->encode_entities($section), 1);
return $section unless defined $to;
} else {
$section = ''
}
return ($self->perldoc_url_prefix || '')
- . encode_entities($to) . $section
+ . $self->encode_entities($to) . $section
. ($self->perldoc_url_postfix || '');
}
@@ -583,7 +665,7 @@ sub resolve_man_page_link {
my ($page, $part) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
return undef unless $page;
return ($self->man_url_prefix || '')
- . ($part || 1) . "/" . encode_entities($page)
+ . ($part || 1) . "/" . $self->encode_entities($page)
. ($self->man_url_postfix || '');
}
@@ -627,6 +709,7 @@ sub idify {
for ($t) {
s/<[^>]+>//g; # Strip HTML.
s/&[^;]+;//g; # Strip entities.
+ s/^\s+//; s/\s+$//; # Strip white space.
s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
s/^[^a-zA-Z]+//; # First char must be a letter.
s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
@@ -655,6 +738,10 @@ sub batch_mode_page_object_init {
return $self;
}
+sub html_header_after_title {
+}
+
+
1;
__END__
@@ -689,7 +776,7 @@ merchantability or fitness for a particular purpose.
=head1 ACKNOWLEDGEMENTS
-Thanks to L<Hurricane Electrict|http://he.net/> for permission to use its
+Thanks to L<Hurricane Electric|http://he.net/> for permission to use its
L<Linux man pages online|http://man.he.net/> site for man page links.
Thanks to L<search.cpan.org|http://search.cpan.org/> for permission to use the