X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-test.git;a=blobdiff_plain;f=yarrg%2Fyppedia-chart-parser;h=41ef985e6b590801041dfbdcc2635f6d8d15e631;hp=92b609cf3268a97a72a0c921e5e98128a09ed4f2;hb=d3e7c3bd4fdd22952861782197f02115da645bc6;hpb=d28e2807c293c638df1956d2dc65f3245d62d096 diff --git a/yarrg/yppedia-chart-parser b/yarrg/yppedia-chart-parser index 92b609c..41ef985 100755 --- a/yarrg/yppedia-chart-parser +++ b/yarrg/yppedia-chart-parser @@ -1,4 +1,33 @@ #!/usr/bin/perl +# +# Normally run from +# update-master-info +# +# usage: ./yppedia-chart-parser +# updates OCEAN-Oceanname.db and _ocean-.txt +# from YPPedia (chart and ocean page) and source-info.txt + +# This is part of ypp-sc-tools, a set of third-party tools for assisting +# players of Yohoho Puzzle Pirates. +# +# Copyright (C) 2009 Ian Jackson +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Yohoho and Puzzle Pirates are probably trademarks of Three Rings and +# are used without permission. This program is not endorsed or +# sponsored by Three Rings. use strict (qw(vars)); use warnings; @@ -7,9 +36,6 @@ use Graph::Undirected; use Commods; use CommodsDatabase; -my $ocean= 'Midnight'; - - my $widists= Graph::Undirected->new(); my $wiarchs= Graph::Undirected->new(); my $wispr; @@ -20,31 +46,59 @@ my %winode2island; my %winode2lines; my %wiccix2arch; my $wialldists; +my %wtisland2arch; -my $dbdists= Graph::Undirected->new(); +my $dbdists; my %dbisland2arch; +my @msgkinds= qw(change warning error); my %msgs; -sub pmsg ($$) { push @{ $msgs{$_[0]} }, "$_[0]: $_[1]\n"; } +my %msgprinted; +my %msgkindprinted; +sub pmsg ($$) { + my $m= "$_[0]: $_[1]\n"; + print DEBUG "D $m"; + push @{ $msgs{$_[0]} }, $m; +} sub warning ($) { pmsg("warning",$_[0]); } sub error ($) { pmsg("error", $_[0]); } sub change ($) { pmsg("change", $_[0]); } sub print_messages () { - foreach my $k (qw(change warning error)) { - my $m= $msgs{$k}; - next unless $m; - print sort @$m or die $!; + foreach my $k (@msgkinds) { + my $ms= $msgs{$k}; + next unless $ms; + foreach my $m (sort @$ms) { + next if $msgprinted{$m}; + print $m or die $!; + $msgprinted{$m}++; + $msgkindprinted{$k}++; + } } } - -if (@ARGV && $ARGV[0] eq '--debug') { - shift @ARGV; - open DEBUG, ">&STDOUT" or die $!; - select(DEBUG); $|=1; -} else { - open DEBUG, ">/dev/null" or die $!; +sub progress ($) { print "($_[0])\n"; } + +my $stdin_chart=0; + +open DEBUG, ">/dev/null" or die $!; + +while (@ARGV) { + last unless $ARGV[0] =~ m/^-/; + $_= shift @ARGV; + last if m/^--$/; + if ($_ eq '--debug') { + open DEBUG, ">&STDOUT" or die $!; + select(DEBUG); $|=1; select(STDOUT); + } elsif ($_ eq '--stdin-chart') { + $stdin_chart=1; + } else { + die; + } } -select(STDOUT); $|=1; +$|=1; + +@ARGV==1 or die; +my $ocean= shift @ARGV; + my $parity; sub nn_xy ($$) { @@ -60,11 +114,11 @@ sub nn_xy ($$) { sub yppedia_chart_parse () { # We don't even bother with tag soup; instead we do line-oriented parsing. - while (<>) { + while () { s/\<--.*--\>//g; s/^\s*//; chomp; s/\s+$//; s/\s+/ /g; s/\<\/?(?:b|em)\>//g; - s/\{\{Chart\ style\|[^{}]*\}\}//g; + s/\{\{chart\ style\|[^{}]*\}\}//gi; next unless m/\{\{/; # only interested in chart template stuff my ($x,$y, $arch,$island,$solid,$dirn); @@ -72,9 +126,15 @@ sub yppedia_chart_parse () { if (($x,$y,$arch) = m/^\{\{ chart\ label \|(\d+)\|(\d+)\| .* - \'\[\[ [^][\']* \| (\S+)\ archipelago \]\]\'*\}\}$/xi) { + (?: \<(?: big|center )\>)* \'+ + (?: \[\[ | \{\{ ) + [^][\']* \| ([^][\'|]+)\ archipelago + (?: \]\] | \}\} ) + \'+ (?: \<\/(?: big|center )\>)* \}\}$/xi) { printf DEBUG "%2d,%-2d arch %s\n", $x,$y,$arch; push @wiarchlabels, [ $x,$y,$arch ]; + } elsif (m/^\{\{ chart\ label \|\d+\|\d+\| + \ \'+ \[\[ .* \b ocean \]\]/xi) { } elsif (($x,$y,$island) = m/^\{\{ chart\ island\ icon \|(\d+)\|(\d+)\| ([^| ][^|]*[^| ]) \| .*\}\}$/xi) { @@ -112,29 +172,6 @@ sub yppedia_chart_parse () { } } -sub database_fetch_ocean () { - my ($row,$sth); - $sth= $dbh->prepare('SELECT islandname, archipelago FROM islands'); - $sth->execute(); - while ($row= $sth->fetchrow_hashref) { - print DEBUG "database-island $row->{'islandname'}". - " $row->{'archipelago'}\n"; - $dbisland2arch{$row->{'islandname'}}= $row->{'archipelago'}; - } - $sth= $dbh->prepare('SELECT dist, a.islandname a, b.islandname b - FROM dists - JOIN islands AS a ON dists.aiid==a.islandid - JOIN islands AS b ON dists.biid==b.islandid'); - $sth->execute(); - while ($row= $sth->fetchrow_hashref) { - $dbdists->add_weighted_edge($row->{'a'}, $row->{'b'}, $row->{'dist'}); - } -} - -sub database_graph_spr () { - $dbspr= shortest_path_reduction('db',$dbdists); -} - sub yppedia_graphs_add_shortcuts () { # We add edges between LPs we know about, as you can chart # between them. Yppedia often lacks these edges. @@ -186,12 +223,14 @@ sub yppedia_archs_sourceinfo () { foreach my $arch (sort keys %{ $oceans{$ocean} }) { foreach my $islename (sort keys %{ $oceans{$ocean}{$arch} }) { my $islenode= $wiisland2node{$islename}; - defined $islenode or + if (!defined $islenode) { error("island $islename in source-info but not in WP map"); + next; + } my $ccix= $wiarchs->connected_component_by_vertex($islenode); my $oldarch= $wiccix2arch{$ccix}; - error("island $islename in $arch in source-info". - " connected to $oldarch as well") + error("island in $arch in source-info". + " connected to $oldarch as well: $islename") if defined $oldarch && $oldarch ne $arch; printf DEBUG "%-5s force-island-arch cc%-2d %-10s %s\n", $islenode, $ccix, $arch, $islename; @@ -205,8 +244,7 @@ sub yppedia_archs_chart_labels () { # foreach my $label (@wiarchlabels) { my ($ax,$ay,$arch) = @$label; - my $best_ccmulti= -1; - my $best_d2= 0; + my $best_d2= 9999999; my $best_n; # print DEBUG "$ax,$ay arch-island-search $arch\n"; $ay += 1; $ax += 2; # coords are rather to the top left of label @@ -214,19 +252,16 @@ sub yppedia_archs_chart_labels () { next unless exists $winode2island{$vertex}; my $ccix= $wiarchs->connected_component_by_vertex($vertex); my @cc= $wiarchs->connected_component_by_index($ccix); - my $ccmulti= @cc > 1; my ($vx,$vy) = split /,/, $vertex; my $d2= ($vx-$ax)*($vx-$ax) + ($vy-$ay)*($vy-$ay); - my $cmp= $ccmulti <=> $best_ccmulti - || $best_d2 <=> $d2; + my $cmp= $best_d2 <=> $d2; printf DEBUG "%2d,%-2d arch-island-search %5s d2=%4d cc%-2d". - " #cc=%2d ccmulti=%d cmp=%2d %s\n", - $ax,$ay, $vertex, $d2, $ccix, scalar(@cc), $ccmulti, $cmp, + " #cc=%2d cmp=%2d %s\n", + $ax,$ay, $vertex, $d2, $ccix, scalar(@cc), $cmp, $winode2island{$vertex}; next unless $cmp > 0; $best_n= $vertex; $best_d2= $d2; - $best_ccmulti= $ccmulti; } die 'no island vertices?!' unless defined $best_n; my $ccix= $wiarchs->connected_component_by_vertex($best_n); @@ -354,11 +389,22 @@ sub compare_island_lists () { } my $dbarch= $dbisland2arch{$island}; if ($wiarch ne $dbarch) { - change("change archipelago from $dbarch to $wiarch". + change("archipelago change from $dbarch to $wiarch". " for island $island"); } } foreach my $island (sort keys %wiisland2node) { + my $wtarch= $wtisland2arch{$island}; + my $wiarch= wiisland2arch($island); + if (!$stdin_chart) { + if (!defined $wtarch) { + error("island from chart not found on ocean page: $island"); + } elsif (defined $wiarch and $wtarch ne $wiarch) { + error("island in $wtarch on ocean page but". + " concluded $wiarch from chart: $island"); + } + } + my $dbarch= $dbisland2arch{$island}; if (!defined $dbarch) { my $wiarch= wiisland2arch($island); @@ -367,17 +413,24 @@ sub compare_island_lists () { next; # We check arches of non-new islands above } - change("new island in $wiarch: $island"); + change("island new in $wiarch: $island"); + } + } + if (!$stdin_chart) { + foreach my $island (sort keys %wtisland2arch) { + my $node= $wiisland2node{$island}; + next if defined $node; + error("island on ocean page but not in chart: $island"); } } } sub shortest_path_reduction ($$) { - my ($what,$base) = @_; + my ($what,$g) = @_; # - # Takes a graph $base (and a string for messages $what) and returns + # Takes a graph $g (and a string for messages $what) and returns # a new graph which is the miminal shortest path transient reduction - # of $base. + # of $g. # # We also check that the shortest path closure of the intended result # is the same graph as the input. Thus the input must itself be @@ -387,15 +440,17 @@ sub shortest_path_reduction ($$) { Premises and definitions: - 1. F is a connected undirected weighted graph with positive edge - weights. + 1. F is an undirected weighted graph with positive edge weights. - 2. All graphs we will consider have the same vertices as F. + 2. All graphs we will consider have the same vertices as F + and none have self-edges. - 3. G = Closure(F) is the complete graph whose edge weights - are the shortest paths in F. (G is the input graph $base.) + 3. G = Closure(F) is the graph of cliques whose edge weights + are the shortest paths in F, one clique for each connected + component in F. 3a. |XY| for vertices X, Y is the weight of the edge XY in G. + If XY is not in G, |XY| is infinite. 4. A `reduction' of G is a subgraph K of G such that Closure(K) = G. The reduction is `minimal' if there is no strict subgraph K' @@ -441,20 +496,22 @@ sub shortest_path_reduction ($$) { END - printf DEBUG "spr %s before %d\n", $what, scalar($base->edges()); + printf DEBUG "spr %s before %d\n", $what, scalar($g->edges()); my $result= Graph::Undirected->new(); - foreach my $edge_ac ($base->edges()) { - my $edgename_ac= join '..', @$edge_ac; + foreach my $edge_ac ($g->edges()) { + $result->add_vertex($edge_ac->[0]); # just in case + next if $edge_ac->[0] eq $edge_ac->[1]; + my $edgename_ac= join ' .. ', @$edge_ac; printf DEBUG "spr %s edge %s\n", $what, $edgename_ac; - my $w_ac= $base->get_edge_weight(@$edge_ac); + my $w_ac= $g->get_edge_weight(@$edge_ac); my $needed= 1; - foreach my $vertex_b ($base->vertices()) { + foreach my $vertex_b ($g->vertices()) { next if grep { $_ eq $vertex_b } @$edge_ac; - my $w_ab= $base->get_edge_weight($edge_ac->[0], $vertex_b); + my $w_ab= $g->get_edge_weight($edge_ac->[0], $vertex_b); next unless defined $w_ab; next if $w_ab >= $w_ac; - my $w_bc= $base->get_edge_weight($vertex_b, $edge_ac->[1]); + my $w_bc= $g->get_edge_weight($vertex_b, $edge_ac->[1]); next unless defined $w_ac; next if $w_ab + $w_bc > $w_ac; # found path @@ -471,13 +528,14 @@ END printf DEBUG "spr %s result %d\n", $what, scalar($result->edges()); my $apsp= $result->APSP_Floyd_Warshall(); - foreach my $ia (sort $base->vertices()) { - foreach my $ib (sort $base->vertices()) { - my $din= $base->get_edge_weight($ia,$ib); + foreach my $ia (sort $g->vertices()) { + foreach my $ib (sort $g->vertices()) { + my $din= $g->get_edge_weight($ia,$ib); my $dout= $apsp->path_length($ia,$ib); $din= defined($din) ? $din : 'infinity'; $dout= defined($dout) ? $dout : 'infinity'; - error("$what spr apsp discrepancy in=$din out=$dout for $ia..$ib") + error("$what spr apsp discrepancy in=$din out=$dout". + " for $ia .. $ib") if $din != $dout; } } @@ -496,6 +554,47 @@ sub yppedia_graph_spr () { $wispr= shortest_path_reduction('wi',$base); } +sub yppedia_ocean_fetch_start ($) { + my ($chart) = @_; + my @args= (); + push @args, '--chart' if $chart; + push @args, $ocean; + open OCEAN, '-|', "./yppedia-ocean-scraper", @args or die $!; +} +sub yppedia_ocean_fetch_done () { + $?=0; $!=0; close OCEAN; $? and die $?; $! and die $!; +} + +sub yppedia_ocean_fetch_chart () { + if ($stdin_chart) { + open OCEAN, "<& STDIN" or die $!; + yppedia_chart_parse(); + } else { + yppedia_ocean_fetch_start(1); + yppedia_chart_parse(); + yppedia_ocean_fetch_done(); + } +} + +sub yppedia_ocean_fetch_text () { + yppedia_ocean_fetch_start(0); + my $arch; + while () { + chomp; + if (m/^ocean /) { + $' eq $ocean or die; + } elsif (m/^ /) { + die unless defined $arch; + $wtisland2arch{$'}= $arch; + } elsif (m/^ /) { + $arch= $'; + } else { + die; + } + } + yppedia_ocean_fetch_done(); +} + sub compare_distances () { foreach my $ia (sort keys %dbisland2arch) { my $na= $wiisland2node{$ia}; @@ -507,42 +606,199 @@ sub compare_distances () { next unless defined $dbdist || defined $widist; if (!defined $widist) { - warning(sprintf "route delete %2d for %s..%s", + warning(sprintf "route delete %2d for %s .. %s", $dbdist, $ia,$ib); } elsif (!defined $dbdist) { - change(sprintf "route create %2d for %s..%s", + change(sprintf "route new %2d for %s .. %s", $widist, $ia,$ib); } elsif ($dbdist != $widist) { - change(sprintf "route change %2d to %2d for %s..%s", + change(sprintf "route change %2d to %2d for %s .. %s", $dbdist, $widist, $ia,$ib); } } } } +#========== database handling ========== + +sub database_fetch_ocean () { + my ($row,$sth); + $sth= $dbh->prepare('SELECT islandname, archipelago FROM islands'); + $sth->execute(); + undef %dbisland2arch; + $dbdists= Graph::Undirected->new(); + while ($row= $sth->fetchrow_hashref) { + print DEBUG "database-island $row->{'islandname'}". + " $row->{'archipelago'}\n"; + $dbisland2arch{$row->{'islandname'}}= $row->{'archipelago'}; + } + $sth= $dbh->prepare('SELECT dist, a.islandname a, b.islandname b + FROM dists + JOIN islands AS a ON dists.aiid==a.islandid + JOIN islands AS b ON dists.biid==b.islandid'); + $sth->execute(); + while ($row= $sth->fetchrow_hashref) { + $dbdists->add_weighted_edge($row->{'a'}, $row->{'b'}, $row->{'dist'}); + } +} + +sub database_graph_spr () { + $dbspr= shortest_path_reduction('db',$dbdists); +} + +sub database_do_updates () { + my $addisland= $dbh->prepare(<<'END') + INSERT OR IGNORE INTO islands (islandname, archipelago) VALUES (?, ?); +END + ; + foreach my $island (sort keys %wiisland2node) { + my $wiarch= wiisland2arch($island); + $addisland->execute($island, $wiarch); + } + + db_doall(<prepare(<<'END') + INSERT INTO dists VALUES + ((SELECT islandid FROM islands WHERE islandname == ?), + (SELECT islandid FROM islands WHERE islandname == ?), + ?); +END + ; + my $addroute= $dbh->prepare(<<'END') + INSERT INTO routes VALUES + ((SELECT islandid FROM islands WHERE islandname == ?), + (SELECT islandid FROM islands WHERE islandname == ?), + ?); +END + ; + foreach my $ia (sort keys %wiisland2node) { + my $na= $wiisland2node{$ia}; + foreach my $ib (sort keys %wiisland2node) { + my $nb= $wiisland2node{$ib}; + my $apdist= $ia eq $ib ? 0 : widist($na,$nb); + die "$ia $ib" unless defined $apdist; + my $sprdist= $wispr->get_edge_weight($ia,$ib); + die "$ia $ib $apdist $sprdist" if + defined($sprdist) && $sprdist != $apdist; + + $adddist->execute($ia,$ib,$apdist); + $addroute->execute($ia,$ib,$sprdist) if defined $sprdist; + } + } + + # select ia.islandname, ib.islandname, d.dist from dists as d, islands as ia on d.aiid = ia.islandid, islands as ib on d.biid = ib.islandid order by ia.islandname, ib.islandname; + +} + +#========== update _ocean-*.txt ========== + +our $localtopo_path; + +sub localtopo_rewrite () { + $localtopo_path= '_ocean-'.(lc $ocean).'.txt'; + my $fh= new IO::File "$localtopo_path.tmp", 'w'; + print $fh "# autogenerated - do not edit\n" or die $!; + print $fh "ocean $ocean\n" or die $!; + my %arches; + foreach my $isle (sort keys %wtisland2arch) { + my $arch= $wtisland2arch{$isle}; + push @{ $arches{$arch} }, $isle; + } + foreach my $arch (sort keys %arches) { + print $fh " $arch\n" or die $!; + foreach my $isle (@{ $arches{$arch} }) { + print $fh " $isle\n" or die $!; + } + } + print $fh "\n" or die $!; + close $fh or die $!; +} + +sub localtopo_commit () { + rename "$localtopo_path.tmp", $localtopo_path or die $!; +} + +#========== main program ========== + parse_info_serverside(); -print "reading database\n"; +progress("fetching yppedia chart"); yppedia_ocean_fetch_chart(); +progress("adding shortcuts"); yppedia_graphs_add_shortcuts(); +progress("pruning boring vertices"); yppedia_graphs_prune_boring(); +progress("checking yppedia graphs"); yppedia_graphs_check(); +progress("setting archs from source-info"); yppedia_archs_sourceinfo(); +progress("computing shortest paths"); yppedia_graph_shortest_paths(); +progress("setting archs from labels"); yppedia_archs_chart_labels(); +progress("setting archs from nearby"); yppedia_archs_fillbynearest(); +progress("computing yppedia spr"); yppedia_graph_spr(); + +if (!$stdin_chart) { + progress("fetching yppedia ocean text"); yppedia_ocean_fetch_text(); +} db_setocean($ocean); db_connect(); -database_fetch_ocean(); +my $iteration=0; +for (;;) { + progress("reading database"); + database_fetch_ocean(); + progress("computing database spr"); database_graph_spr(); + + progress("comparing islands"); compare_island_lists(); + progress("comparing distances"); compare_distances(); + + print "\n"; + print_messages(); + + foreach my $k (@msgkinds) { + my $n= $msgkindprinted{$k}; + next unless $n; + printf STDERR "*** %d%s %ss\n", $n, $iteration?' additional':'', $k; + } + + if ($msgs{'error'}) { + print STDERR "*** errors, aborting update\n"; + exit 1; + } -print "computing database spr\n"; database_graph_spr(); + if (!%msgkindprinted) { + progress("updating database"); database_do_updates(); + progress("updating _ocean-*.txt"); localtopo_rewrite(); + if ($stdin_chart) { + print STDERR "*** --stdin-chart, aborting!\n"; + exit 1; + } + progress("committing database"); $dbh->commit(); + progress("committing _ocean-*.txt"); localtopo_commit(); + exit 0; + } + $dbh->rollback(); + + my $default= !$msgkindprinted{'warning'}; + printf STDERR "*** confirm update %s ? ", $default?'(y/n)':'(n/y)'; + + if ($stdin_chart) { + printf STDERR "[--stdin-chart]\n"; + exit 1; + } -print "reading yppedia chart\n"; yppedia_chart_parse(); -print "adding shortcuts\n"; yppedia_graphs_add_shortcuts(); -print "pruning boring vertices\n"; yppedia_graphs_prune_boring(); -print "checking yppedia graphs\n"; yppedia_graphs_check(); -print "setting archs from source-info\n"; yppedia_archs_sourceinfo(); -print "computing shortest paths\n"; yppedia_graph_shortest_paths(); -print "setting archs from labels\n"; yppedia_archs_chart_labels(); -print "setting archs from nearby\n"; yppedia_archs_fillbynearest(); -print "computing yppedia spr\n"; yppedia_graph_spr(); + $!=0; my $result= ; defined $result or die $!; + $result =~ s/\s//g; + $result= $default?'y':'n' if !length $result; + $result= $result =~ m/^y/i; -print "comparing\n"; + if (!$result) { + printf STDERR "*** updated abandoned at your request\n"; + exit 1; + } -compare_island_lists(); -compare_distances(); + print "\n"; + undef %msgkindprinted; + $iteration++; +} print_messages();