#!/usr/bin/perl # # Normally run from # update-master-info # # usage: ./yppedia-chart-parser # updates OCEAN-Oceanname.db and _ocean-.txt # from YPPedia (chart and ocean page) and source-info.txt # This is part of ypp-sc-tools, a set of third-party tools for assisting # players of Yohoho Puzzle Pirates. # # Copyright (C) 2009 Ian Jackson # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Yohoho and Puzzle Pirates are probably trademarks of Three Rings and # are used without permission. This program is not endorsed or # sponsored by Three Rings. use strict (qw(vars)); use warnings; use Graph::Undirected; use Commods; use CommodsDatabase; use CommodsScrape; my $widists= Graph::Undirected->new(); my $wiarchs= Graph::Undirected->new(); my $wispr; my $dbspr; my @wiarchlabels; my %wiisland2node; my %winode2island; my %winode2lines; my %wiccix2arch; my $wialldists; my %wtisland2arch; my $dbdists; my %dbisland2arch; my $debugfh; my @msgkinds= qw(change warning error); my %msgs; my %msgprinted; my %msgkindprinted; sub pmsg ($$) { my $m= "$_[0]: $_[1]\n"; print $debugfh "D $m"; push @{ $msgs{$_[0]} }, $m; } sub warning ($) { pmsg("warning",$_[0]); } sub error ($) { pmsg("error", $_[0]); } sub change ($) { pmsg("change", $_[0]); } sub print_messages () { foreach my $k (@msgkinds) { my $ms= $msgs{$k}; next unless $ms; foreach my $m (sort @$ms) { next if $msgprinted{$m}; print $m or die $!; $msgprinted{$m}++; $msgkindprinted{$k}++; } } } sub progress ($) { print "($_[0])\n"; } my $stdin_chart=0; $debugfh= new IO::File ">/dev/null" or die $!; while (@ARGV) { last unless $ARGV[0] =~ m/^-/; $_= shift @ARGV; last if m/^--$/; if ($_ eq '--debug') { $debugfh= new IO::File ">&STDOUT" or die $!; select($debugfh); $|=1; select(STDOUT); } elsif ($_ eq '--stdin-chart') { $stdin_chart=1; } else { die; } } $|=1; @ARGV==1 or die; my $ocean= shift @ARGV; my $parity; sub nn_xy ($$) { my ($x,$y) = @_; my $tp= (0+$x ^ 0+$y) & 1; defined $parity or $parity=$tp; $tp==$parity or warning("line $.: parity error $x,$y is $tp not $parity"); my $n= "$_[0],$_[1]"; $winode2lines{$n}{$.}++; return $n; } sub run_yppedia_chart_parse ($) { my ($oceanfh) = @_; yppedia_chart_parse($oceanfh, $debugfh, \&nn_xy, sub { my ($x,$y,$arch) = @_; push @wiarchlabels, [ $x,$y,$arch ]; }, sub { my ($n, $island) = @_; $wiisland2node{$island}= $n; $winode2island{$n}= $island; $widists->add_vertex($n); $wiarchs->add_vertex($n); }, sub { my ($na, $nb, $solid) = @_; $widists->add_weighted_edge($na, $nb, 1); $wiarchs->add_edge($na, $nb) if $solid; $wiarchs->add_edge($na, $nb) if $solid; }, sub { my ($lno,$l) = @_; warning("line $l: ignoring incomprehensible: $l"); }); } sub yppedia_graphs_add_shortcuts () { # We add edges between LPs we know about, as you can chart # between them. Yppedia often lacks these edges. # foreach my $p ($widists->vertices) { my ($ax,$ay) = $p =~ m/^(\d+)\,(\d+)$/ or die; my $add_shortcut= sub { my $q= sprintf "%d,%d", $ax+$_[0], $ay+$_[1]; return unless $widists->has_vertex($q); return if $widists->has_edge($p,$q); printf $debugfh "%-5s league-shortcut %-5s\n", $p, $q; $widists->add_weighted_edge($p,$q,1); }; $add_shortcut->( 2,0); $add_shortcut->(+1,1); $add_shortcut->(-1,1); } } sub yppedia_graphs_prune_boring () { # Prune the LP database by eliminating boring intermediate vertices foreach my $delete ($widists->vertices()) { next if exists $winode2island{$delete}; my @neigh= $widists->neighbours($delete); next unless @neigh==2; my $weight= 0; map { $weight += $widists->get_edge_weight($delete, $_) } @neigh; $widists->add_weighted_edge(@neigh, $weight); $widists->delete_vertex($delete); printf $debugfh "%-5s elide %5s %-5s %2d\n", $delete, @neigh, $weight; } } sub yppedia_graphs_check () { # Check that it's connected. foreach my $cc ($widists->connected_components()) { next if 2*@$cc > $widists->vertices(); my $m= "disconnected league point(s):"; foreach my $n (@$cc) { $m .= "\n LP $n, def. yppedia line(s): ". join(',', sort keys %{ $winode2lines{$n} }); } warning($m); } } sub yppedia_archs_sourceinfo () { # Assign archipelagoes according to the source-info file foreach my $arch (sort keys %{ $oceans{$ocean} }) { foreach my $islename (sort keys %{ $oceans{$ocean}{$arch} }) { my $islenode= $wiisland2node{$islename}; if (!defined $islenode) { error("island $islename in source-info but not in WP map"); next; } my $ccix= $wiarchs->connected_component_by_vertex($islenode); my $oldarch= $wiccix2arch{$ccix}; error("island in $arch in source-info". " connected to $oldarch as well: $islename") if defined $oldarch && $oldarch ne $arch; printf $debugfh "%-5s force-island-arch cc%-2d %-10s %s\n", $islenode, $ccix, $arch, $islename; $wiccix2arch{$ccix}= $arch; } } } sub yppedia_archs_chart_labels () { # Assign archipelago labels to groups of islands # foreach my $label (@wiarchlabels) { my ($ax,$ay,$arch) = @$label; my $best_d2= 9999999; my $best_n; # print $debugfh "$ax,$ay arch-island-search $arch\n"; $ay += 1; $ax += 2; # coords are rather to the top left of label foreach my $vertex ($wiarchs->vertices()) { next unless exists $winode2island{$vertex}; my $ccix= $wiarchs->connected_component_by_vertex($vertex); my @cc= $wiarchs->connected_component_by_index($ccix); my ($vx,$vy) = split /,/, $vertex; # / my $d2= ($vx-$ax)*($vx-$ax) + ($vy-$ay)*($vy-$ay); my $cmp= $best_d2 <=> $d2; printf $debugfh "%2d,%-2d arch-island-search %5s d2=%4d cc%-2d". " #cc=%2d cmp=%2d %s\n", $ax,$ay, $vertex, $d2, $ccix, scalar(@cc), $cmp, $winode2island{$vertex}; next unless $cmp > 0; $best_n= $vertex; $best_d2= $d2; } die 'no island vertices?!' unless defined $best_n; my $ccix= $wiarchs->connected_component_by_vertex($best_n); printf $debugfh "%2d,%-2d arch-island-select %-5s d2=%4d cc%-2d %-10s %s\n", $ax,$ay, $best_n, $ccix, $best_d2, $arch, $winode2island{$best_n}; my $desc= join "\n", map { my $in= $winode2island{$_}; " LP $_". (defined $in ? ", $in" : ""); } sort $wiarchs->connected_component_by_index($ccix); if (exists $wiccix2arch{$ccix} and $wiccix2arch{$ccix} ne $arch) { error("archipelago determination failed, wrongly merged:\n". " archipelago $arch\n". " archipelago $wiccix2arch{$ccix}\n". $desc); next; } $wiccix2arch{$ccix}= $arch; # print "$ccix $arch ::\n$desc\n"; } } sub yppedia_archs_fillbynearest() { # Assign islands not labelled above to archipelagoes. # # We do this by, for each connected component (set of islands # linked by purchaseable charts), searching for the nearest other # connected component which has already been assigned an arch. # `Nearest' means shortest distance of unpurchaseable charts, in # leagues. # # we need only consider vertices which weren't `boring intermediate # vertices' (removed during optimisation as being of order 2) my @ccs_useful= map { [ grep { $widists->has_vertex($_) } @$_ ] } $wiarchs->connected_components(); my @assignments; foreach my $sourceccix (0..$#ccs_useful) { next if defined $wiccix2arch{$sourceccix}; next unless $ccs_useful[$sourceccix]; my @sourcecc= $wiarchs->connected_component_by_index($sourceccix); my @islandnodes= grep { $winode2island{$_} } @sourcecc; next unless @islandnodes; # don't care, then foreach my $islandnode (@islandnodes) { printf $debugfh "%-5s arch-join-need cc%-2d %s\n", $islandnode, $sourceccix, $winode2island{$islandnode}; } my $best_dist= 9999999; my ($best_target, $best_targetccix, $best_source); foreach my $targetccix (0..$#ccs_useful) { next unless defined $wiccix2arch{$targetccix}; # not helpful next unless $ccs_useful[$targetccix]; foreach my $target ($wiarchs-> connected_component_by_index($targetccix)) { next unless $widists->has_vertex($target); foreach my $source (@sourcecc) { my $target_dist= widist($target,$source); next unless defined $target_dist; next if $target_dist >= $best_dist; $best_dist= $target_dist; $best_source= $source; $best_target= $target; $best_targetccix= $targetccix; } } } die "no possible target ?!" unless defined $best_target; my $arch= $wiccix2arch{$best_targetccix}; my $best_island= $winode2island{$best_target}; printf $debugfh "%-5s arch-join-to %-5s dist=%2d cc%-2d %-10s %s\n", $best_source, $best_target, $best_dist, $best_targetccix, $arch, defined($best_island) ? $best_island : "-"; push @assignments, [ $sourceccix, $arch ]; } foreach my $assign (@assignments) { $wiccix2arch{$assign->[0]}= $assign->[1]; } } sub yppedia_graph_shortest_paths () { $wialldists= $widists->APSP_Floyd_Warshall(); } sub widist ($$) { my ($p,$q) = @_; my $pl= $wialldists->path_length($p,$q); # die "$p $q" unless defined $pl; # my @pv= $wialldists->path_vertices($p,$q); # if (@pv == $pl) { return $pl; } # printf $debugfh "%-5s PATHLENGTH %-5s pl=%s pv=%s\n", $p,$q,$pl,join('|',@pv); return $pl; } sub winode2arch ($) { my ($node) = @_; my $ccix= $wiarchs->connected_component_by_vertex($node); return $wiccix2arch{$ccix}; } sub wiisland2arch ($) { my ($island) = @_; my $node= $wiisland2node{$island}; die "$island ?" unless defined $node; return winode2arch($node); } sub compare_island_lists () { foreach my $island (sort keys %dbisland2arch) { my $node= $wiisland2node{$island}; if (!defined $node) { error("would delete island: $island"); next; } my $wiarch= winode2arch($node); if (!defined $wiarch) { error("island has no arch: $island"); next; } my $dbarch= $dbisland2arch{$island}; if ($wiarch ne $dbarch) { change("archipelago change from $dbarch to $wiarch". " for island $island"); } } foreach my $island (sort keys %wiisland2node) { my $wtarch= $wtisland2arch{$island}; my $wiarch= wiisland2arch($island); if (!$stdin_chart) { if (!defined $wtarch) { error("island from chart not found on ocean page: $island"); } elsif (defined $wiarch and $wtarch ne $wiarch) { error("island in $wtarch on ocean page but". " concluded $wiarch from chart: $island"); } } my $dbarch= $dbisland2arch{$island}; if (!defined $dbarch) { my $wiarch= wiisland2arch($island); if (!defined $wiarch) { error("new island has no arch: $island"); next; # We check arches of non-new islands above } change("island new in $wiarch: $island"); } } if (!$stdin_chart) { foreach my $island (sort keys %wtisland2arch) { my $node= $wiisland2node{$island}; next if defined $node; error("island on ocean page but not in chart: $island"); } } } sub shortest_path_reduction ($$) { my ($what,$g) = @_; # # Takes a graph $g (and a string for messages $what) and returns # a new graph which is the miminal shortest path transient reduction # of $g. # # We also check that the shortest path closure of the intended result # is the same graph as the input. Thus the input must itself be # a shortest path closure; if it isn't, we die. my $proof=<<'END'; # way to make a big comment Premises and definitions: 1. F is an undirected weighted graph with positive edge weights. 2. All graphs we will consider have the same vertices as F and none have self-edges. 3. G = Closure(F) is the graph of cliques whose edge weights are the shortest paths in F, one clique for each connected component in F. 3a. |XY| for vertices X, Y is the weight of the edge XY in G. If XY is not in G, |XY| is infinite. 4. A `reduction' of G is a subgraph K of G such that Closure(K) = G. The reduction is `minimal' if there is no strict subgraph K' of K such that Closure(K') = G. 5. Now each edge of G may be: - `unnecessary': included in no minimal reductions of G. - `essential': included in all minimal reductions of G. - `contingent': included in some but not all. 6. Consider for any edge AC between the vertices A and C, whether there is any B such that |AB|+|BC| = |AC| ? (There can be no B such that the sum < |AC| since that would mean that |AC| wasn't equal to the shortest path length.) 6a. No such B: AC is therefore the only shortest path from A to C (since G is not a multigraph). AC is thus an essential edge. 6b. Some such B: Call all such edges AC `questionable'. 6c. Thus all edges are essential or questionable. 7. Suppose AC is a shortest contingent edge. AC must be questionable since it is not essential. Suppose it is made questionable by the existence of B such that |AB|+|BC| = |AC|. Consider AB and BC. Since |AB| and |BC| are positive, |BC| and |AB| must be < |AC| ie AB and BC are shorter than AC. Since AC is a shortest contingent edge, there must be shortest paths in G for AB and BC consisting entirely of essential edges. 8. Therefore it is always safe to remove AC since the paths A..B and B..C will definitely still remain and provide a path A..B..C with length |AB|+|BC| = |AC|. 9. Thus AC is unnecessary, contradicting the assumption in 7. There are therefore no shortest contingent edges, and thus no contingent edges. 10. We can construct a minimal reduction directly: for each edge AC in G, search for a vertex B such that |AB|+|BC| = |AC|. If we find none, AC is essential. If we find one then AC is not essential and is therefore unnecessary. END # ` printf $debugfh "spr %s before %d\n", $what, scalar($g->edges()); my $result= Graph::Undirected->new(); foreach my $edge_ac ($g->edges()) { $result->add_vertex($edge_ac->[0]); # just in case next if $edge_ac->[0] eq $edge_ac->[1]; my $edgename_ac= join ' .. ', @$edge_ac; printf $debugfh "spr %s edge %s\n", $what, $edgename_ac; my $w_ac= $g->get_edge_weight(@$edge_ac); my $needed= 1; foreach my $vertex_b ($g->vertices()) { next if grep { $_ eq $vertex_b } @$edge_ac; my $w_ab= $g->get_edge_weight($edge_ac->[0], $vertex_b); next unless defined $w_ab; next if $w_ab >= $w_ac; my $w_bc= $g->get_edge_weight($vertex_b, $edge_ac->[1]); next unless defined $w_ac; next if $w_ab + $w_bc > $w_ac; # found path printf $debugfh "spr %s edge %s unnecessary %s\n", $what, $edgename_ac, $vertex_b; $needed= 0; last; } if ($needed) { printf $debugfh "spr %s edge %s essential\n", $what, $edgename_ac; $result->add_weighted_edge(@$edge_ac,$w_ac); } } printf $debugfh "spr %s result %d\n", $what, scalar($result->edges()); my $apsp= $result->APSP_Floyd_Warshall(); foreach my $ia (sort $g->vertices()) { foreach my $ib (sort $g->vertices()) { my $din= $g->get_edge_weight($ia,$ib); my $dout= $apsp->path_length($ia,$ib); $din= defined($din) ? $din : 'infinity'; $dout= defined($dout) ? $dout : 'infinity'; error("$what spr apsp discrepancy in=$din out=$dout". " for $ia .. $ib") if $din != $dout; } } return $result; } sub yppedia_graph_spr () { my $base= Graph::Undirected->new(); foreach my $na (sort keys %winode2island) { my $ia= $winode2island{$na}; foreach my $nb (sort keys %winode2island) { my $ib= $winode2island{$nb}; $base->add_weighted_edge($ia,$ib, widist($na,$nb)); } } $wispr= shortest_path_reduction('wi',$base); } sub yppedia_ocean_fetch_start ($) { my ($chart) = @_; my @args= (); push @args, '--chart' if $chart; push @args, $ocean; open OCEAN, '-|', "./yppedia-ocean-scraper", @args or die $!; } sub yppedia_ocean_fetch_done () { $?=0; $!=0; close OCEAN; $? and die $?; $! and die $!; } sub yppedia_ocean_fetch_chart () { if ($stdin_chart) { run_yppedia_chart_parse('::STDIN'); } else { yppedia_ocean_fetch_start(1); run_yppedia_chart_parse('::OCEAN'); yppedia_ocean_fetch_done(); } } sub yppedia_ocean_fetch_text () { yppedia_ocean_fetch_start(0); my $arch; while () { chomp; if (m/^ocean /) { $' eq $ocean or die; } elsif (m/^ /) { die unless defined $arch; $wtisland2arch{$'}= $arch; } elsif (m/^ /) { $arch= $'; # ' } else { die; } } yppedia_ocean_fetch_done(); } sub compare_distances () { foreach my $ia (sort keys %dbisland2arch) { my $na= $wiisland2node{$ia}; next unless defined $na; foreach my $ib (sort keys %dbisland2arch) { next unless $ia le $ib; # do every pair only once my $dbdist= $dbspr->get_edge_weight($ia,$ib); my $widist= $wispr->get_edge_weight($ia,$ib); next unless defined $dbdist || defined $widist; if (!defined $widist) { warning(sprintf "route delete %2d for %s .. %s", $dbdist, $ia,$ib); } elsif (!defined $dbdist) { change(sprintf "route new %2d for %s .. %s", $widist, $ia,$ib); } elsif ($dbdist != $widist) { change(sprintf "route change %2d to %2d for %s .. %s", $dbdist, $widist, $ia,$ib); } } } } #========== database handling ========== sub database_fetch_ocean () { my ($row,$sth); $sth= $dbh->prepare('SELECT islandname, archipelago FROM islands'); $sth->execute(); undef %dbisland2arch; $dbdists= Graph::Undirected->new(); while ($row= $sth->fetchrow_hashref) { print $debugfh "database-island $row->{'islandname'}". " $row->{'archipelago'}\n"; $dbisland2arch{$row->{'islandname'}}= $row->{'archipelago'}; } $sth= $dbh->prepare('SELECT dist, a.islandname a, b.islandname b FROM dists JOIN islands AS a ON dists.aiid==a.islandid JOIN islands AS b ON dists.biid==b.islandid'); $sth->execute(); while ($row= $sth->fetchrow_hashref) { $dbdists->add_weighted_edge($row->{'a'}, $row->{'b'}, $row->{'dist'}); } } sub database_graph_spr () { $dbspr= shortest_path_reduction('db',$dbdists); } sub database_do_updates () { my $addisland= $dbh->prepare(<<'END') INSERT OR IGNORE INTO islands (islandname, archipelago) VALUES (?, ?); END ; foreach my $island (sort keys %wiisland2node) { my $wiarch= wiisland2arch($island); $addisland->execute($island, $wiarch); } db_doall(<prepare(<<'END') INSERT INTO dists VALUES ((SELECT islandid FROM islands WHERE islandname == ?), (SELECT islandid FROM islands WHERE islandname == ?), ?); END ; my $addroute= $dbh->prepare(<<'END') INSERT INTO routes VALUES ((SELECT islandid FROM islands WHERE islandname == ?), (SELECT islandid FROM islands WHERE islandname == ?), ?); END ; foreach my $ia (sort keys %wiisland2node) { my $na= $wiisland2node{$ia}; foreach my $ib (sort keys %wiisland2node) { my $nb= $wiisland2node{$ib}; my $apdist= $ia eq $ib ? 0 : widist($na,$nb); die "$ia $ib" unless defined $apdist; my $sprdist= $wispr->get_edge_weight($ia,$ib); die "$ia $ib $apdist $sprdist" if defined($sprdist) && $sprdist != $apdist; $adddist->execute($ia,$ib,$apdist); $addroute->execute($ia,$ib,$sprdist) if defined $sprdist; } } # select ia.islandname, ib.islandname, d.dist from dists as d, islands as ia on d.aiid = ia.islandid, islands as ib on d.biid = ib.islandid order by ia.islandname, ib.islandname; } #========== update _ocean-*.txt ========== our $localtopo_path; sub localtopo_rewrite () { $localtopo_path= '_ocean-'.(lc $ocean).'.txt'; my $fh= new IO::File "$localtopo_path.tmp", 'w'; print $fh "# autogenerated - do not edit\n" or die $!; print $fh "ocean $ocean\n" or die $!; my %arches; foreach my $isle (sort keys %wtisland2arch) { my $arch= $wtisland2arch{$isle}; push @{ $arches{$arch} }, $isle; } foreach my $arch (sort keys %arches) { print $fh " $arch\n" or die $!; foreach my $isle (@{ $arches{$arch} }) { print $fh " $isle\n" or die $!; } } print $fh "\n" or die $!; close $fh or die $!; } sub localtopo_commit () { rename "$localtopo_path.tmp", $localtopo_path or die $!; } #========== main program ========== parse_info_serverside(); progress("fetching yppedia chart"); yppedia_ocean_fetch_chart(); progress("adding shortcuts"); yppedia_graphs_add_shortcuts(); progress("pruning boring vertices"); yppedia_graphs_prune_boring(); progress("checking yppedia graphs"); yppedia_graphs_check(); progress("setting archs from source-info"); yppedia_archs_sourceinfo(); progress("computing shortest paths"); yppedia_graph_shortest_paths(); progress("setting archs from labels"); yppedia_archs_chart_labels(); progress("setting archs from nearby"); yppedia_archs_fillbynearest(); progress("computing yppedia spr"); yppedia_graph_spr(); if (!$stdin_chart) { progress("fetching yppedia ocean text"); yppedia_ocean_fetch_text(); } db_setocean($ocean); db_connect(); my $iteration=0; for (;;) { progress("reading database"); database_fetch_ocean(); progress("computing database spr"); database_graph_spr(); progress("comparing islands"); compare_island_lists(); progress("comparing distances"); compare_distances(); print "\n"; print_messages(); foreach my $k (@msgkinds) { my $n= $msgkindprinted{$k}; next unless $n; printf STDERR "*** %d%s %ss\n", $n, $iteration?' additional':'', $k; } if ($msgs{'error'}) { print STDERR "*** errors, aborting update\n"; exit 1; } if (!%msgkindprinted) { progress("updating database"); database_do_updates(); progress("updating _ocean-*.txt"); localtopo_rewrite(); if ($stdin_chart) { print STDERR "*** --stdin-chart, aborting!\n"; exit 1; } progress("checking database"); db_check_referential_integrity(1); progress("committing database"); $dbh->commit(); progress("committing _ocean-*.txt"); localtopo_commit(); exit 0; } $dbh->rollback(); my $default= !$msgkindprinted{'warning'}; printf STDERR "*** confirm update %s ? ", $default?'(y/n)':'(n/y)'; if ($stdin_chart) { printf STDERR "[--stdin-chart]\n"; exit 1; } $!=0; my $result= ; defined $result or die $!; $result =~ s/\s//g; $result= $default?'y':'n' if !length $result; $result= $result =~ m/^y/i; if (!$result) { printf STDERR "*** updated abandoned at your request\n"; exit 1; } print "\n"; undef %msgkindprinted; $iteration++; } print_messages();