#! /usr/bin/perl -w # The AFE FAQ HTMLizer, faq2html.pl # Perl version written by Kamion; based loosely on a sed script by Aquarius. # I don't claim this to be any kind of general program. It's a special-case # hack - actually, a big pile of special-case hacks - but it works well for # our application. # # Copyright (c) 2000, 2001 Colin Watson # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # A copy of the GNU General Public License is available on the World Wide # Web at http://www.gnu.org/copyleft/gpl.html. # # faq2html.pl 1.0.x: # * Lower-case HTML tag names. # * Fix html_header invocation not to pass a previous link for the first # section. # # faq2html.pl 1.0.2 (8 July 2001): # * Tolerate the changes coming after the Archive-name: block. # * Minor cleanups. # * Explicitly licensed under the GPL, and added this changelog. # # faq2html.pl 1.0.1 (30 April 2001): # * Moved the "Sephrenia the Styric" Easter-egg link to groups.google.com. # # faq2html.pl 1.0.0 (dawn of time): # * Initial release. use strict; require 5.000; use lib qw(.); use Style qw(html_header html_footer); sub usage () { print STDERR <<"EOF"; Usage: $0 index-filename [up-URL] < faq-document URLs may be absolute or relative. EOF exit 1; } sub html_escape (\$) { my $line = shift; return unless defined $$line; $$line =~ s/\&/&/g; $$line =~ s//>/g; $$line =~ s/"/"/g; $$line =~ s/ć/æ/g; return $$line; } #my $sts_link = "http://riva.ucam.org/~kamion/archive-bin/" . # "article.pl?msgid=5n0qq5\$d9p\@mercury.dur.ac.uk"; my $sts_link = 'http://groups.google.com/groups?' . 'q=Easter+Egg&ic=1&selm=5n0qq5%24d9p%40mercury.dur.ac.uk'; # Paragraph to format and undefine, followed optionally by open and close tags. sub print_paragraph (\$;$$) { my $para = shift; return unless defined $$para && $$para !~ /^\s*$/; my $open_tags = shift; my $close_tags = shift; # Make URLs links $$para =~ s#(http:(?:[^\ \&<]|\&(?!gt;))*) #$1#gx; # Make all text within pairs of underscores outside URLs emphasized $$para =~ s[_((?:[^_.-])+\.?)_ (?![^"]*">|[^<]*)][$1]gx; # Similarly, strengthen bold text within asterisks $$para =~ s[\*((?:[^*.])+\.?)\* (?![^"]*">|[^<]*)][$1]gx; # Easter egg $$para =~ s|(Sephrenia the Styric)|$1|g; $$para = "$open_tags$$para" if defined $open_tags; $$para .= $close_tags if defined $close_tags; print "$$para\n"; undef $$para; } my $index_file = shift or usage; my $up_url = shift; open CONTROL, ">control.html" or die "Can't write to control.html: $!"; select CONTROL; html_header "Revision information", $index_file; print "\n"; my $changes_done = 0; print "

\n";
while (<>)
{
    $changes_done = 1 if /^Archive-name:/;
    next unless $changes_done;
    chomp;
    html_escape $_;
    last if /^\s*$/;
    unless (/^([^:]+:)\s*(.+)$/)
    {
	warn "Non-header line found in control section: $_";
	next;
    }
    print "$_\n";
}
print "

\n\n"; html_footer $index_file; open INDEX, ">$index_file" or die "Can't write to $index_file: $!"; select INDEX; html_header "Contents", $up_url; print "\n"; print qq(

\n); while (<>) { if (/^Changes:/) { # Oh, the changes are after the Archive-name: block this time, are # they? OK ... while (<>) { last if /^\s$/; } next; } last if /^\s$/; html_escape $_; print; } print "

\n\n"; print qq(

Revision information

\n); my $paragraph; my $listentry = 0; my $level = 0; my @lastvalues = (); while (<>) { chomp; s/^\s*//; html_escape $_; # Stop at TOC-body separator if (/--------/) { print_paragraph $paragraph, "", $listentry ? "" : ""; last; } # Convert headings to links my $line = $_; if ($line =~ /([0-9]+)((?:.[0-9]+)*)[\)\s]/) { my $firstcomp = $1; my $heading = "$1$2"; my @components = split /\./, $heading; if (@components > 0) { $line =~ /\s+(.*)$/g; my $linetail = $1; print_paragraph $paragraph, "", $listentry ? "" : ""; # Change level of ordered list if necessary if (@components > $level) { print "

\n" x (@components - $level); } elsif (@components < $level) { print "\n" x ($level - @components); } #print "

\n\n" if @components == 1 && $level > 0; print "\n
\n\n" if @components == 1 && $level > 0; $level = @components; $#lastvalues = $level; #print "

\n" if $level == 1; # Reformat the current line my $value = $components[$#components]; if (defined $lastvalues[$level] && $lastvalues[$level] + 1 != $value) { warn "Missing item in contents before $heading"; print qq(

); } elsif ($value != 1) { print qq(

); } else { print "

"; } $lastvalues[$level] = $value; print qq(\n); $paragraph = $linetail; $listentry = 1; next; } } elsif (/^[A-Z][^a-z]+$/) { print_paragraph $paragraph, "", $listentry ? "" : ""; if ($level == 0) { print "\n

$_

\n\n"; } else { #print "

\n\n"; print "\n\n" x ($level - 1); print "\n" if $level == 1; print "

$_

\n\n"; print "

\n"; $#lastvalues = 0; } $listentry = 0; next; } elsif (/^$/) { print_paragraph $paragraph, "", $listentry ? "" : ""; $listentry = 0; next; } print_paragraph $paragraph; $paragraph = $_; } # Do all the necessary end tags #print "

\n\n" x $level; html_footer $up_url; select STDOUT; close INDEX; $paragraph = <>; my $oldsection; my $section; my $section_open = 0; my $num_sections = $lastvalues[1]; my $in_heading = 0; my $in_h2 = 0; $level = 0; $#lastvalues = -1; my $heading_pat = qr/^\s*([0-9]+(?:.[0-9]+)*)\)\s+/; while (<>) { s/\s+\n/\n/; html_escape $_; if (/^\s*([0-9]+)\)\s/) { chomp $paragraph if defined $paragraph; if ($section_open) { if ($in_heading) { print_paragraph $paragraph, "", ""; } else { print_paragraph $paragraph, "

\n", "\n

"; } } $in_heading = $in_h2 = 0; $oldsection = $section; $section = $1; print "\n" if $section_open; print "\n\n" x $level if $section_open; html_footer $index_file, ($oldsection > 1) ? ("part" . ($oldsection - 1) . ".html") : undef, "part" . ($oldsection + 1) . ".html" if $section_open; open SECTION, ">part$section.html" or die "Couldn't write to part$section.html: $!"; select SECTION; html_header "section $section", $index_file, ($section > 1) ? ("part" . ($section - 1) . ".html") : undef, ($section < $num_sections) ? ("part" . ($section + 1) . ".html") : undef; $section_open = 1; $level = 0; $#lastvalues = -1; } # Convert headings to anchors in list items if (/$heading_pat/) { my $heading = $1; my @components = split /\./, $heading; chomp $paragraph if defined $paragraph; if ($in_heading) { print_paragraph $paragraph, "", "\n"; } else { print_paragraph $paragraph, "

\n", "\n

\n"; } $in_h2 = 0; # Change level of ordered list if necessary if (@components > $level) { print "

\n\n" x (@components - $level); } elsif (@components < $level) { print "\n\n" x ($level - @components); } $level = @components; $#lastvalues = $level; my $value = $components[$#components]; if (@components == 1) { chomp; s|$heading_pat|

|; $in_h2 = 1; } elsif (defined $lastvalues[$level] && $lastvalues[$level] + 1 != $value) { warn "Missing item in main document before $heading"; s|$heading_pat|
|; } elsif ($value != 1) { s|$heading_pat|
|; } else { s|$heading_pat|
|; } $lastvalues[$level] = $value; warn "Heading $heading encountered outside a section" unless defined $section; warn "Heading $heading encountered in section $section" if defined $section && $heading !~ /^$section(?:\.|$)/; $in_heading = 1; } # Mark unnumbered headings if (/^([A-Z][^a-z][A-Z])\n/) { chomp; $paragraph = $_; print "\n\n" x ($level - 1); print "\n" if $level != 1; print_paragraph $paragraph, "
", "
"; $in_heading = $in_h2 = 0; print "\n
\n" x ($level - 1); $#lastvalues = 0; $_ = ""; } # If this is a paragraph delimiter of some kind (usually a blank line), # print the preceding paragraph, handling things we recognize as tables. if (defined $paragraph && (/^\n/ || ($in_heading && $paragraph =~ /]>.{0,40}$/))) { if (($paragraph =~ /ISBN +[0-9]-/ && $paragraph =~ /$(?:hc|pb|trade pb)$/) || $paragraph =~ /c\/o Del Rey Books/ || $paragraph =~ /esper\.net$/m) { $paragraph =~ s/\n/
\n/g; } else { # We have to split up the paragraph and work line-by-line here; # multi-line regexps are really hard to get right. my @parlines = split /\n/, $paragraph; my $prevline; foreach my $parline (@parlines) { unless (defined $prevline) { $prevline = \$parline; next; } if ($parline =~ /^(\s)((?:\w\)| \[\w\]| \w+(?:\s+\w+)?:\shttp).| \<http(?:[^&]|\&(?!gt;))\>)$/x) { $parline = "$1$2
"; $$prevline .= "
" if $$prevline !~ /
$/; } $prevline = \$parline; } $paragraph = (join "\n", @parlines) . "\n"; $paragraph =~ s/:$/:
/gm; } chomp $paragraph; $paragraph =~ s/(\S) +/$1 /g unless /
/; if ($in_h2) { print_paragraph $paragraph, "", "

\n"; $in_h2 = 0; } else { if ($in_heading) { print_paragraph $paragraph, "", ""; } else { print_paragraph $paragraph, "
\n", "\n
"; } } $in_heading = 0; $paragraph = "$_" unless /^\n?$/; } elsif (defined $paragraph) { $paragraph .= "$_"; } else { $paragraph = "$_" unless /^\n?$/; } } if ($section_open) { chomp $paragraph if defined $paragraph; if ($in_heading) { print_paragraph $paragraph, "", ""; } else { print_paragraph $paragraph, "
\n", "\n
"; } print "\n"; print "\n\n" x $level; html_footer $index_file, ($section > 1) ? ("part" . ($section - 1) . ".html") : undef, undef; close SECTION if $section_open; } else { warn "No sections encountered"; }

\n); while (<>) { if (/^Changes:/) { # Oh, the changes are after the Archive-name: block this time, are # they? OK ... while (<>) { last if /^\s*$/; } next; } last if /^\s*$/; html_escape $_; print; } print "

$_

$_

", "

\n); while (<>) { if (/^Changes:/) { # Oh, the changes are after the Archive-name: block this time, are # they? OK ... while (<>) { last if /^\s$/; } next; } last if /^\s$/; html_escape $_; print; } print "