chiark / gitweb /
transform source xml into HTML
authorThomas Thurman <tthurman@gnome.org>
Mon, 15 Mar 2010 00:04:33 +0000 (20:04 -0400)
committerThomas Thurman <tthurman@gnome.org>
Mon, 15 Mar 2010 00:04:33 +0000 (20:04 -0400)
htmlfury.pl [new file with mode: 0644]

diff --git a/htmlfury.pl b/htmlfury.pl
new file mode 100644 (file)
index 0000000..dd4bd0c
--- /dev/null
@@ -0,0 +1,113 @@
+use strict;
+use warnings;
+use lib 'lib';
+use Fury::Parse;
+use Lingua::EN::Alphabet::Shaw;
+
+open ALICE, ">alice.html" or die;
+binmode ALICE, ":utf8";
+
+my $furyparse = Fury::Parse->new();
+my $shaw = Lingua::EN::Alphabet::Shaw->new();
+
+my %settings;
+my $in_stanza = 0;
+
+my $chapcount = 0;
+my $maxchaps = 12;
+
+print ALICE << "EOT";
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en-GB" xml:lang="en-GB">
+<head><title>Alice</title>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<style type="text/css">
+    .poetry { font-style: italic; }
+</style>
+</head>
+<body>
+EOT
+
+$furyparse->parse('source.xml',
+                 sub {
+                     my ($type, @args) = @_;
+
+                     if ($type eq 'CHAP') {
+                         my $latntitle = $args[0];
+                         my $shawtitle = $args[1];
+
+                         $shawtitle = $shaw->mapping($shawtitle);
+
+                         $chapcount++;
+                         return 0 if $chapcount > $maxchaps;
+
+                         print ALICE '<h1>'.$shawtitle."</h1>\n\n";
+
+                     } elsif ($type eq 'PARA') {
+                         print ALICE '<p>';
+                         my $emphasis = 0;
+                         my $emphasis_was = 0;
+                         my $first = 1;
+                         for my $word (@{$args[0]}) {
+                             $emphasis = $word->[2] & 1;
+                             print ALICE '</em>' if $emphasis_was && !$emphasis;
+                             print ALICE ' ' unless $first;
+                             print ALICE '<em>' if $emphasis && !$emphasis_was;
+                             my $shawword;
+
+                             if ($word->[2] & 2) {
+                                 $shawword = $word->[1];
+                             } else {
+                                 $shawword = $shaw->mapping($word->[1]);
+                             }
+                             print ALICE $shawword;
+                             $emphasis_was = $emphasis;
+                             $first = 0;
+                         }
+                         print ALICE '</em>' if $emphasis;
+                         print ALICE "<p>\n\n";
+                     } elsif ($type eq 'POET') {
+                         if ($in_stanza) {
+                             print ALICE "<br/>\n";
+                         } else {
+                             print ALICE '<p class="poetry">';
+                         }
+                         $in_stanza = 1;
+                         my $first = 1;
+                         for my $word (@{$args[0]}) {
+                             print ALICE ' ' unless $first;
+                             my $shawword = $shaw->mapping($word->[1]);
+                             print ALICE $shawword;
+                             $first = 0;
+                         }
+                     } elsif ($type eq 'STAN') {
+                         print ALICE "</p>\n\n";
+                         $in_stanza = 0;
+                     } elsif ($type eq 'SETT') {
+                         $settings{$args[0]} = $args[1];
+                     } elsif ($type eq 'IMAG') {
+                         # (nothing)
+                     } elsif ($type eq 'BOOK') {
+                         # (nothing)
+                     } elsif ($type eq 'PGBK') {
+                         # (nothing)
+                     } elsif ($type eq 'MAGI') {
+
+                         my $magic = $args[2];
+                         $magic =~ s/\+//g; # for "Fury"
+                         $magic = $shaw->mapping($magic);
+                         $magic =~ s!\*(.)!<em>$1</em>!; # for "Fury"
+                         $magic =~ s!_[a-z]*\b!!g; # for "Fury"
+                         $magic =~ s!\n!<br/>\n!g; # for "Fury"
+
+                         print ALICE '<p class="special '.$args[0].'">';
+                         print ALICE $magic;
+                         print ALICE "</p>\n\n";
+                     }
+                     return 1;
+                 });
+
+print ALICE "</body></html>\n";
+close ALICE;
+
+$furyparse->printUnknownWords();