chiark / gitweb /
Cope with some features of the Cobalt WP chart
authorIan Jackson <ijackson@chiark.greenend.org.uk>
Tue, 1 Sep 2009 15:31:17 +0000 (16:31 +0100)
committerIan Jackson <Ian.Jackson@eu.citrix.com>
Tue, 1 Sep 2009 15:31:17 +0000 (16:31 +0100)
yarrg/yppedia-chart-parser
yarrg/yppedia-ocean-scraper

index a093a0c..b35d816 100755 (executable)
@@ -118,7 +118,7 @@ sub yppedia_chart_parse () {
        s/\<--.*--\>//g;
        s/^\s*//; chomp; s/\s+$//; s/\s+/ /g;
        s/\<\/?(?:b|em)\>//g;
-       s/\{\{Chart\ style\|[^{}]*\}\}//g;
+       s/\{\{chart\ style\|[^{}]*\}\}//gi;
        next unless m/\{\{/; # only interested in chart template stuff
 
        my ($x,$y, $arch,$island,$solid,$dirn);
@@ -126,9 +126,9 @@ sub yppedia_chart_parse () {
     
        if (($x,$y,$arch) =
            m/^\{\{ chart\ label \|(\d+)\|(\d+)\| .*
-                   (?: \<big\>)? \'+
+                   (?: \<(?: big|center )\>)* \'+
                    \[\[ [^][\']* \| ([^][\'|]+)\ archipelago \]\]
-                   \'+ (?: \<\/big\>)? \}\}$/xi) {
+                   \'+ (?: \<\/(?: big|center )\>)* \}\}$/xi) {
            printf DEBUG "%2d,%-2d arch %s\n", $x,$y,$arch;
            push @wiarchlabels, [ $x,$y,$arch ];
        } elsif (m/^\{\{ chart\ label \|\d+\|\d+\|
index 30d0c4a..ad35c4d 100755 (executable)
@@ -89,6 +89,7 @@ def parse_chart():
        debug('s',s)
        s = regexp.sub(r'\&lt\;', '<', s)
        s = regexp.sub(r'\&gt\;', '>', s)
+       s = regexp.sub(r'\&quot\;', '"', s)
        s = regexp.sub(r'\&amp\;', '&', s)
        debug('s',s)
        return s