376 lines
6.7 KiB
Perl
376 lines
6.7 KiB
Perl
package HTML::FormatMarkdown;
|
|
|
|
# ABSTRACT: Format HTML as Markdown
|
|
|
|
|
|
use 5.006_001;
|
|
use strict;
|
|
use warnings;
|
|
|
|
use parent 'HTML::Formatter';
|
|
|
|
our $VERSION = '2.12'; # VERSION
|
|
our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
|
|
|
|
sub default_values {
|
|
( shift->SUPER::default_values(),
|
|
lm => 0,
|
|
rm => 70,
|
|
);
|
|
}
|
|
|
|
sub configure {
|
|
my ( $self, $hash ) = @_;
|
|
|
|
my $lm = $self->{lm};
|
|
my $rm = $self->{rm};
|
|
|
|
$lm = delete $hash->{lm} if exists $hash->{lm};
|
|
$lm = delete $hash->{leftmargin} if exists $hash->{leftmargin};
|
|
$rm = delete $hash->{rm} if exists $hash->{rm};
|
|
$rm = delete $hash->{rightmargin} if exists $hash->{rightmargin};
|
|
|
|
my $width = $rm - $lm;
|
|
if ( $width < 1 ) {
|
|
warn "Bad margins, ignored" if $^W;
|
|
return;
|
|
}
|
|
if ( $width < 20 ) {
|
|
warn "Page probably too narrow" if $^W;
|
|
}
|
|
|
|
for ( keys %$hash ) {
|
|
warn "Unknown configure option '$_'" if $^W;
|
|
}
|
|
|
|
$self->{lm} = $lm;
|
|
$self->{rm} = $rm;
|
|
$self;
|
|
}
|
|
|
|
sub begin {
|
|
my $self = shift;
|
|
|
|
$self->SUPER::begin();
|
|
$self->{maxpos} = 0;
|
|
$self->{curpos} = 0; # current output position.
|
|
}
|
|
|
|
sub end {
|
|
shift->collect("\n");
|
|
}
|
|
|
|
sub header_start {
|
|
my ( $self, $level ) = @_;
|
|
|
|
$self->vspace(1);
|
|
$self->out( '#' x $level . ' ' );
|
|
1;
|
|
}
|
|
|
|
sub header_end {
|
|
my ( $self, $level ) = @_;
|
|
|
|
$self->out( ' ' . '#' x $level );
|
|
$self->vspace(1);
|
|
}
|
|
|
|
sub bullet {
|
|
my $self = shift;
|
|
|
|
$self->SUPER::bullet( $_[0] . ' ' );
|
|
|
|
}
|
|
|
|
sub hr_start {
|
|
my $self = shift;
|
|
|
|
$self->vspace(1);
|
|
$self->out('- - -');
|
|
$self->vspace(1);
|
|
}
|
|
|
|
sub img_start {
|
|
my ( $self, $node ) = @_;
|
|
|
|
my $alt = $node->attr('alt');
|
|
my $src = $node->attr('src');
|
|
|
|
$self->out("");
|
|
}
|
|
|
|
sub a_start {
|
|
my ( $self, $node ) = @_;
|
|
|
|
# ignore named anchors
|
|
if ( $node->attr('name') ) {
|
|
1;
|
|
}
|
|
elsif ( $node->attr('href') =~ /^#/ ) {
|
|
1;
|
|
}
|
|
else {
|
|
$self->out("[");
|
|
}
|
|
|
|
}
|
|
|
|
sub a_end {
|
|
my ( $self, $node ) = @_;
|
|
|
|
if ( $node->attr('name') ) {
|
|
return;
|
|
}
|
|
elsif ( my $href = $node->attr('href') ) {
|
|
if ( $href =~ /^#/ ) {
|
|
return;
|
|
}
|
|
$self->out("]($href)");
|
|
}
|
|
}
|
|
|
|
sub b_start { shift->out("**") }
|
|
sub b_end { shift->out("**") }
|
|
sub i_start { shift->out("*") }
|
|
sub i_end { shift->out("*") }
|
|
|
|
sub tt_start {
|
|
my $self = shift;
|
|
|
|
if ( $self->{pre} ) {
|
|
return 1;
|
|
}
|
|
else {
|
|
$self->out("`");
|
|
}
|
|
}
|
|
|
|
sub tt_end {
|
|
my $self = shift;
|
|
|
|
if ( $self->{pre} ) {
|
|
return;
|
|
}
|
|
else {
|
|
$self->out("`");
|
|
}
|
|
}
|
|
|
|
sub blockquote_start {
|
|
my $self = shift;
|
|
|
|
$self->{blockquote}++;
|
|
$self->vspace(1);
|
|
$self->adjust_rm(-4);
|
|
|
|
1;
|
|
}
|
|
|
|
sub blockquote_end {
|
|
my $self = shift;
|
|
|
|
$self->{blockquote}--;
|
|
$self->vspace(1);
|
|
$self->adjust_rm(+4);
|
|
|
|
}
|
|
|
|
sub blockquote_out {
|
|
my ( $self, $text ) = @_;
|
|
|
|
$self->nl;
|
|
$self->goto_lm;
|
|
|
|
my $line = "> ";
|
|
$self->{curpos} += 2;
|
|
|
|
foreach my $word ( split /\s/, $text ) {
|
|
$line .= "$word ";
|
|
if ( ( $self->{curpos} + length($line) ) > $self->{rm} ) {
|
|
$self->collect($line);
|
|
$self->nl;
|
|
$self->goto_lm;
|
|
$line = "> ";
|
|
$self->{curpos} += 2;
|
|
}
|
|
}
|
|
|
|
$self->collect($line);
|
|
$self->nl;
|
|
|
|
}
|
|
|
|
# Quoted from HTML::FormatText
|
|
sub pre_out {
|
|
my $self = shift;
|
|
|
|
if ( defined $self->{vspace} ) {
|
|
if ( $self->{out} ) {
|
|
$self->nl() while $self->{vspace}-- >= 0;
|
|
$self->{vspace} = undef;
|
|
}
|
|
}
|
|
|
|
my $indent = ' ' x $self->{lm};
|
|
$indent .= ' ' x 4;
|
|
my $pre = shift;
|
|
$pre =~ s/^/$indent/mg;
|
|
$self->collect($pre);
|
|
$self->{out}++;
|
|
}
|
|
|
|
sub out {
|
|
my $self = shift;
|
|
my $text = shift;
|
|
|
|
$text =~ tr/\xA0\xAD/ /d;
|
|
|
|
if ( $text =~ /^\s*$/ ) {
|
|
$self->{hspace} = 1;
|
|
return;
|
|
}
|
|
|
|
if ( defined $self->{vspace} ) {
|
|
if ( $self->{out} ) {
|
|
$self->nl while $self->{vspace}-- >= 0;
|
|
}
|
|
$self->goto_lm;
|
|
$self->{vspace} = undef;
|
|
$self->{hspace} = 0;
|
|
}
|
|
|
|
if ( $self->{hspace} ) {
|
|
if ( $self->{curpos} + length($text) > $self->{rm} ) {
|
|
|
|
# word will not fit on line; do a line break
|
|
$self->nl;
|
|
$self->goto_lm;
|
|
}
|
|
else {
|
|
|
|
# word fits on line; use a space
|
|
$self->collect(' ');
|
|
++$self->{curpos};
|
|
}
|
|
$self->{hspace} = 0;
|
|
}
|
|
|
|
$self->collect($text);
|
|
my $pos = $self->{curpos} += length $text;
|
|
$self->{maxpos} = $pos if $self->{maxpos} < $pos;
|
|
$self->{'out'}++;
|
|
}
|
|
|
|
sub goto_lm {
|
|
my $self = shift;
|
|
|
|
my $pos = $self->{curpos};
|
|
my $lm = $self->{lm};
|
|
if ( $pos < $lm ) {
|
|
$self->{curpos} = $lm;
|
|
$self->collect( " " x ( $lm - $pos ) );
|
|
}
|
|
}
|
|
|
|
sub nl {
|
|
my $self = shift;
|
|
|
|
$self->{'out'}++;
|
|
$self->{curpos} = 0;
|
|
$self->collect("\n");
|
|
}
|
|
|
|
sub adjust_lm {
|
|
my $self = shift;
|
|
|
|
$self->{lm} += $_[0];
|
|
$self->goto_lm;
|
|
}
|
|
|
|
sub adjust_rm {
|
|
shift->{rm} += $_[0];
|
|
}
|
|
|
|
1;
|
|
|
|
__END__
|
|
|
|
=pod
|
|
|
|
=for stopwords CPAN Markdown homepage
|
|
|
|
=for test_synopsis 1;
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
HTML::FormatMarkdown - Format HTML as Markdown
|
|
|
|
=head1 VERSION
|
|
|
|
version 2.12
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use HTML::FormatMarkdown;
|
|
|
|
my $string = HTML::FormatMarkdown->format_file(
|
|
'test.html'
|
|
);
|
|
|
|
open my $fh, ">", "test.md" or die "$!\n";
|
|
print $fh $string;
|
|
close $fh;
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
HTML::FormatMarkdown is a formatter that outputs Markdown.
|
|
|
|
HTML::FormatMarkdown is built on L<HTML::Formatter> and documentation for that
|
|
module applies to this - especially L<HTML::Formatter/new>,
|
|
L<HTML::Formatter/format_file> and L<HTML::Formatter/format_string>.
|
|
|
|
=head1 INSTALLATION
|
|
|
|
See perlmodinstall for information and options on installing Perl modules.
|
|
|
|
=head1 BUGS AND LIMITATIONS
|
|
|
|
You can make new bug reports, and view existing ones, through the
|
|
web interface at L<http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Format>.
|
|
|
|
=head1 AVAILABILITY
|
|
|
|
The project homepage is L<https://metacpan.org/release/HTML-Format>.
|
|
|
|
The latest version of this module is available from the Comprehensive Perl
|
|
Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
|
|
site near you, or see L<https://metacpan.org/module/HTML::Format/>.
|
|
|
|
=head1 AUTHORS
|
|
|
|
=over 4
|
|
|
|
=item *
|
|
|
|
Nigel Metheringham <nigelm@cpan.org>
|
|
|
|
=item *
|
|
|
|
Sean M Burke <sburke@cpan.org>
|
|
|
|
=item *
|
|
|
|
Gisle Aas <gisle@ActiveState.com>
|
|
|
|
=back
|
|
|
|
=head1 COPYRIGHT AND LICENSE
|
|
|
|
This software is copyright (c) 2015 by Nigel Metheringham, 2002-2005 Sean M Burke, 1999-2002 Gisle Aas.
|
|
|
|
This is free software; you can redistribute it and/or modify it under
|
|
the same terms as the Perl 5 programming language system itself.
|
|
|
|
=cut
|