6 # usage: ./yppedia-chart-parser <Oceanname>
7 # updates OCEAN-Oceanname.db and _ocean-<oceanname>.txt
8 # from YPPedia (chart and ocean page) and source-info.txt
10 # This is part of ypp-sc-tools, a set of third-party tools for assisting
11 # players of Yohoho Puzzle Pirates.
13 # Copyright (C) 2009 Ian Jackson <ijackson@chiark.greenend.org.uk>
15 # This program is free software: you can redistribute it and/or modify
16 # it under the terms of the GNU General Public License as published by
17 # the Free Software Foundation, either version 3 of the License, or
18 # (at your option) any later version.
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
25 # You should have received a copy of the GNU General Public License
26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
28 # Yohoho and Puzzle Pirates are probably trademarks of Three Rings and
29 # are used without permission. This program is not endorsed or
30 # sponsored by Three Rings.
32 use strict (qw(vars));
35 use Graph::Undirected;
40 my $widists= Graph::Undirected->new();
41 my $wiarchs= Graph::Undirected->new();
57 my @msgkinds= qw(change warning error);
62 my $m= "$_[0]: $_[1]\n";
63 print $debugfh "D $m";
64 push @{ $msgs{$_[0]} }, $m;
66 sub warning ($) { pmsg("warning",$_[0]); }
67 sub error ($) { pmsg("error", $_[0]); }
68 sub change ($) { pmsg("change", $_[0]); }
69 sub print_messages () {
70 foreach my $k (@msgkinds) {
73 foreach my $m (sort @$ms) {
74 next if $msgprinted{$m};
77 $msgkindprinted{$k}++;
81 sub progress ($) { print "($_[0])\n"; }
85 $debugfh= new IO::File ">/dev/null" or die $!;
88 last unless $ARGV[0] =~ m/^-/;
91 if ($_ eq '--debug') {
92 $debugfh= new IO::File ">&STDOUT" or die $!;
93 select($debugfh); $|=1; select(STDOUT);
94 } elsif ($_ eq '--stdin-chart') {
103 my $ocean= shift @ARGV;
109 my $tp= (0+$x ^ 0+$y) & 1;
110 defined $parity or $parity=$tp;
111 $tp==$parity or warning("line $.: parity error $x,$y is $tp not $parity");
112 my $n= "$_[0],$_[1]";
113 $winode2lines{$n}{$.}++;
117 sub run_yppedia_chart_parse ($) {
119 yppedia_chart_parse($oceanfh, $debugfh,
122 my ($x,$y,$arch) = @_;
123 push @wiarchlabels, [ $x,$y,$arch ];
126 my ($n, $island) = @_;
127 $wiisland2node{$island}= $n;
128 $winode2island{$n}= $island;
129 $widists->add_vertex($n);
130 $wiarchs->add_vertex($n);
133 my ($na, $nb, $solid) = @_;
134 $widists->add_weighted_edge($na, $nb, 1);
135 $wiarchs->add_edge($na, $nb) if $solid;
136 $wiarchs->add_edge($na, $nb) if $solid;
140 warning("line $l: ignoring incomprehensible: $l");
144 sub yppedia_graphs_add_shortcuts () {
145 # We add edges between LPs we know about, as you can chart
146 # between them. Yppedia often lacks these edges.
148 foreach my $p ($widists->vertices) {
149 my ($ax,$ay) = $p =~ m/^(\d+)\,(\d+)$/ or die;
150 my $add_shortcut= sub {
151 my $q= sprintf "%d,%d", $ax+$_[0], $ay+$_[1];
152 return unless $widists->has_vertex($q);
153 return if $widists->has_edge($p,$q);
154 printf $debugfh "%-5s league-shortcut %-5s\n", $p, $q;
155 $widists->add_weighted_edge($p,$q,1);
157 $add_shortcut->( 2,0);
158 $add_shortcut->(+1,1);
159 $add_shortcut->(-1,1);
163 sub yppedia_graphs_prune_boring () {
164 # Prune the LP database by eliminating boring intermediate vertices
165 foreach my $delete ($widists->vertices()) {
166 next if exists $winode2island{$delete};
167 my @neigh= $widists->neighbours($delete);
168 next unless @neigh==2;
170 map { $weight += $widists->get_edge_weight($delete, $_) } @neigh;
171 $widists->add_weighted_edge(@neigh, $weight);
172 $widists->delete_vertex($delete);
173 printf $debugfh "%-5s elide %5s %-5s %2d\n", $delete, @neigh, $weight;
177 sub yppedia_graphs_check () {
178 # Check that it's connected.
179 foreach my $cc ($widists->connected_components()) {
180 next if 2*@$cc > $widists->vertices();
181 my $m= "disconnected league point(s):";
182 foreach my $n (@$cc) {
183 $m .= "\n LP $n, def. yppedia line(s): ".
184 join(',', sort keys %{ $winode2lines{$n} });
190 sub yppedia_archs_sourceinfo () {
191 # Assign archipelagoes according to the source-info file
192 foreach my $arch (sort keys %{ $oceans{$ocean} }) {
193 foreach my $islename (sort keys %{ $oceans{$ocean}{$arch} }) {
194 my $islenode= $wiisland2node{$islename};
195 if (!defined $islenode) {
196 error("island $islename in source-info but not in WP map");
199 my $ccix= $wiarchs->connected_component_by_vertex($islenode);
200 my $oldarch= $wiccix2arch{$ccix};
201 error("island in $arch in source-info".
202 " connected to $oldarch as well: $islename")
203 if defined $oldarch && $oldarch ne $arch;
204 printf $debugfh "%-5s force-island-arch cc%-2d %-10s %s\n",
205 $islenode, $ccix, $arch, $islename;
206 $wiccix2arch{$ccix}= $arch;
211 sub yppedia_archs_chart_labels () {
212 # Assign archipelago labels to groups of islands
214 foreach my $label (@wiarchlabels) {
215 my ($ax,$ay,$arch) = @$label;
216 my $best_d2= 9999999;
218 # print $debugfh "$ax,$ay arch-island-search $arch\n";
219 $ay += 1; $ax += 2; # coords are rather to the top left of label
220 foreach my $vertex ($wiarchs->vertices()) {
221 next unless exists $winode2island{$vertex};
222 my $ccix= $wiarchs->connected_component_by_vertex($vertex);
223 my @cc= $wiarchs->connected_component_by_index($ccix);
224 my ($vx,$vy) = split /,/, $vertex; # /
225 my $d2= ($vx-$ax)*($vx-$ax) + ($vy-$ay)*($vy-$ay);
226 my $cmp= $best_d2 <=> $d2;
227 printf $debugfh "%2d,%-2d arch-island-search %5s d2=%4d cc%-2d".
228 " #cc=%2d cmp=%2d %s\n",
229 $ax,$ay, $vertex, $d2, $ccix, scalar(@cc), $cmp,
230 $winode2island{$vertex};
231 next unless $cmp > 0;
235 die 'no island vertices?!' unless defined $best_n;
236 my $ccix= $wiarchs->connected_component_by_vertex($best_n);
238 "%2d,%-2d arch-island-select %-5s d2=%4d cc%-2d %-10s %s\n",
239 $ax,$ay, $best_n, $ccix, $best_d2, $arch, $winode2island{$best_n};
240 my $desc= join "\n", map {
241 my $in= $winode2island{$_};
242 " LP $_". (defined $in ? ", $in" : "");
243 } sort $wiarchs->connected_component_by_index($ccix);
245 if (exists $wiccix2arch{$ccix} and $wiccix2arch{$ccix} ne $arch) {
246 error("archipelago determination failed, wrongly merged:\n".
247 " archipelago $arch\n".
248 " archipelago $wiccix2arch{$ccix}\n".
252 $wiccix2arch{$ccix}= $arch;
253 # print "$ccix $arch ::\n$desc\n";
257 sub yppedia_archs_fillbynearest() {
258 # Assign islands not labelled above to archipelagoes.
260 # We do this by, for each connected component (set of islands
261 # linked by purchaseable charts), searching for the nearest other
262 # connected component which has already been assigned an arch.
263 # `Nearest' means shortest distance of unpurchaseable charts, in
266 # we need only consider vertices which weren't `boring intermediate
267 # vertices' (removed during optimisation as being of order 2)
268 my @ccs_useful= map {
269 [ grep { $widists->has_vertex($_) } @$_ ]
270 } $wiarchs->connected_components();
274 foreach my $sourceccix (0..$#ccs_useful) {
275 next if defined $wiccix2arch{$sourceccix};
276 next unless $ccs_useful[$sourceccix];
278 my @sourcecc= $wiarchs->connected_component_by_index($sourceccix);
279 my @islandnodes= grep { $winode2island{$_} } @sourcecc;
280 next unless @islandnodes; # don't care, then
282 foreach my $islandnode (@islandnodes) {
283 printf $debugfh "%-5s arch-join-need cc%-2d %s\n",
284 $islandnode, $sourceccix, $winode2island{$islandnode};
286 my $best_dist= 9999999;
287 my ($best_target, $best_targetccix, $best_source);
288 foreach my $targetccix (0..$#ccs_useful) {
289 next unless defined $wiccix2arch{$targetccix}; # not helpful
290 next unless $ccs_useful[$targetccix];
291 foreach my $target ($wiarchs->
292 connected_component_by_index($targetccix)) {
293 next unless $widists->has_vertex($target);
294 foreach my $source (@sourcecc) {
295 my $target_dist= widist($target,$source);
296 next unless defined $target_dist;
297 next if $target_dist >= $best_dist;
298 $best_dist= $target_dist;
299 $best_source= $source;
300 $best_target= $target;
301 $best_targetccix= $targetccix;
305 die "no possible target ?!" unless defined $best_target;
307 my $arch= $wiccix2arch{$best_targetccix};
308 my $best_island= $winode2island{$best_target};
309 printf $debugfh "%-5s arch-join-to %-5s dist=%2d cc%-2d %-10s %s\n",
310 $best_source, $best_target, $best_dist,
311 $best_targetccix, $arch,
312 defined($best_island) ? $best_island : "-";
314 push @assignments, [ $sourceccix, $arch ];
316 foreach my $assign (@assignments) {
317 $wiccix2arch{$assign->[0]}= $assign->[1];
321 sub yppedia_graph_shortest_paths () {
322 $wialldists= $widists->APSP_Floyd_Warshall();
327 my $pl= $wialldists->path_length($p,$q);
328 # die "$p $q" unless defined $pl;
329 # my @pv= $wialldists->path_vertices($p,$q);
330 # if (@pv == $pl) { return $pl; }
331 # printf $debugfh "%-5s PATHLENGTH %-5s pl=%s pv=%s\n", $p,$q,$pl,join('|',@pv);
335 sub winode2arch ($) {
337 my $ccix= $wiarchs->connected_component_by_vertex($node);
338 return $wiccix2arch{$ccix};
340 sub wiisland2arch ($) {
342 my $node= $wiisland2node{$island};
343 die "$island ?" unless defined $node;
344 return winode2arch($node);
347 sub compare_island_lists () {
348 foreach my $island (sort keys %dbisland2arch) {
349 my $node= $wiisland2node{$island};
350 if (!defined $node) {
351 error("would delete island: $island");
354 my $wiarch= winode2arch($node);
355 if (!defined $wiarch) {
356 error("island has no arch: $island");
359 my $dbarch= $dbisland2arch{$island};
360 if ($wiarch ne $dbarch) {
361 change("archipelago change from $dbarch to $wiarch".
362 " for island $island");
365 foreach my $island (sort keys %wiisland2node) {
366 my $wtarch= $wtisland2arch{$island};
367 my $wiarch= wiisland2arch($island);
369 if (!defined $wtarch) {
370 error("island from chart not found on ocean page: $island");
371 } elsif (defined $wiarch and $wtarch ne $wiarch) {
372 # error("island in $wtarch on ocean page but".
373 warning("island in $wtarch on ocean page but".
374 " concluded $wiarch from chart: $island");
378 my $dbarch= $dbisland2arch{$island};
379 if (!defined $dbarch) {
380 my $wiarch= wiisland2arch($island);
381 if (!defined $wiarch) {
382 error("new island has no arch: $island");
384 # We check arches of non-new islands above
386 change("island new in $wiarch: $island");
390 foreach my $island (sort keys %wtisland2arch) {
391 my $node= $wiisland2node{$island};
392 next if defined $node;
393 error("island on ocean page but not in chart: $island");
398 sub shortest_path_reduction ($$) {
401 # Takes a graph $g (and a string for messages $what) and returns
402 # a new graph which is the miminal shortest path transient reduction
405 # We also check that the shortest path closure of the intended result
406 # is the same graph as the input. Thus the input must itself be
407 # a shortest path closure; if it isn't, we die.
409 my $proof=<<'END'; # way to make a big comment
411 Premises and definitions:
413 1. F is an undirected weighted graph with positive edge weights.
415 2. All graphs we will consider have the same vertices as F
416 and none have self-edges.
418 3. G = Closure(F) is the graph of cliques whose edge weights
419 are the shortest paths in F, one clique for each connected
422 3a. |XY| for vertices X, Y is the weight of the edge XY in G.
423 If XY is not in G, |XY| is infinite.
425 4. A `reduction' of G is a subgraph K of G such that Closure(K) = G.
426 The reduction is `minimal' if there is no strict subgraph K'
427 of K such that Closure(K') = G.
429 5. Now each edge of G may be:
430 - `unnecessary': included in no minimal reductions of G.
431 - `essential': included in all minimal reductions of G.
432 - `contingent': included in some but not all.
434 6. Consider for any edge AC between the vertices A and C,
435 whether there is any B such that |AB|+|BC| = |AC| ?
436 (There can be no B such that the sum < |AC| since that would
437 mean that |AC| wasn't equal to the shortest path length.)
439 6a. No such B: AC is therefore the only shortest path from A to C
440 (since G is not a multigraph). AC is thus an essential edge.
442 6b. Some such B: Call all such edges AC `questionable'.
444 6c. Thus all edges are essential or questionable.
446 7. Suppose AC is a shortest contingent edge. AC must be
447 questionable since it is not essential. Suppose it is
448 made questionable by the existence of B such that |AB|+|BC| =
449 |AC|. Consider AB and BC. Since |AB| and |BC| are positive,
450 |BC| and |AB| must be < |AC| ie AB and BC are shorter than AC.
451 Since AC is a shortest contingent edge, there must be shortest
452 paths in G for AB and BC consisting entirely of essential edges.
454 8. Therefore it is always safe to remove AC since the paths
455 A..B and B..C will definitely still remain and provide a path
456 A..B..C with length |AB|+|BC| = |AC|.
458 9. Thus AC is unnecessary, contradicting the assumption in 7.
459 There are therefore no shortest contingent edges, and
460 thus no contingent edges.
462 10. We can construct a minimal reduction directly: for each edge
463 AC in G, search for a vertex B such that |AB|+|BC| = |AC|.
464 If we find none, AC is essential. If we find one then AC is
465 not essential and is therefore unnecessary.
470 printf $debugfh "spr %s before %d\n", $what, scalar($g->edges());
472 my $result= Graph::Undirected->new();
473 foreach my $edge_ac ($g->edges()) {
474 $result->add_vertex($edge_ac->[0]); # just in case
475 next if $edge_ac->[0] eq $edge_ac->[1];
476 my $edgename_ac= join ' .. ', @$edge_ac;
477 printf $debugfh "spr %s edge %s\n", $what, $edgename_ac;
478 my $w_ac= $g->get_edge_weight(@$edge_ac);
480 foreach my $vertex_b ($g->vertices()) {
481 next if grep { $_ eq $vertex_b } @$edge_ac;
482 my $w_ab= $g->get_edge_weight($edge_ac->[0], $vertex_b);
483 next unless defined $w_ab;
484 next if $w_ab >= $w_ac;
485 my $w_bc= $g->get_edge_weight($vertex_b, $edge_ac->[1]);
486 next unless defined $w_ac;
487 next if $w_ab + $w_bc > $w_ac;
489 printf $debugfh "spr %s edge %s unnecessary %s\n",
490 $what, $edgename_ac, $vertex_b;
495 printf $debugfh "spr %s edge %s essential\n", $what, $edgename_ac;
496 $result->add_weighted_edge(@$edge_ac,$w_ac);
499 printf $debugfh "spr %s result %d\n", $what, scalar($result->edges());
501 my $apsp= $result->APSP_Floyd_Warshall();
502 foreach my $ia (sort $g->vertices()) {
503 foreach my $ib (sort $g->vertices()) {
504 my $din= $g->get_edge_weight($ia,$ib);
505 my $dout= $apsp->path_length($ia,$ib);
506 $din= defined($din) ? $din : 'infinity';
507 $dout= defined($dout) ? $dout : 'infinity';
508 error("$what spr apsp discrepancy in=$din out=$dout".
516 sub yppedia_graph_spr () {
517 my $base= Graph::Undirected->new();
518 foreach my $na (sort keys %winode2island) {
519 my $ia= $winode2island{$na};
520 foreach my $nb (sort keys %winode2island) {
521 my $ib= $winode2island{$nb};
522 $base->add_weighted_edge($ia,$ib, widist($na,$nb));
525 $wispr= shortest_path_reduction('wi',$base);
528 sub yppedia_ocean_fetch_start ($) {
531 push @args, '--chart' if $chart;
533 open OCEAN, '-|', "./yppedia-ocean-scraper", @args or die $!;
535 sub yppedia_ocean_fetch_done () {
536 $?=0; $!=0; close OCEAN; $? and die $?; $! and die $!;
539 sub yppedia_ocean_fetch_chart () {
541 run_yppedia_chart_parse('::STDIN');
543 yppedia_ocean_fetch_start(1);
544 run_yppedia_chart_parse('::OCEAN');
545 yppedia_ocean_fetch_done();
549 sub yppedia_ocean_fetch_text () {
550 yppedia_ocean_fetch_start(0);
557 die unless defined $arch;
558 $wtisland2arch{$'}= $arch;
565 yppedia_ocean_fetch_done();
568 sub compare_distances () {
569 foreach my $ia (sort keys %dbisland2arch) {
570 my $na= $wiisland2node{$ia};
571 next unless defined $na;
572 foreach my $ib (sort keys %dbisland2arch) {
573 next unless $ia le $ib; # do every pair only once
574 my $dbdist= $dbspr->get_edge_weight($ia,$ib);
575 my $widist= $wispr->get_edge_weight($ia,$ib);
576 next unless defined $dbdist || defined $widist;
578 if (!defined $widist) {
579 warning(sprintf "route delete %2d for %s .. %s",
581 } elsif (!defined $dbdist) {
582 change(sprintf "route new %2d for %s .. %s",
584 } elsif ($dbdist != $widist) {
585 change(sprintf "route change %2d to %2d for %s .. %s",
586 $dbdist, $widist, $ia,$ib);
592 #========== database handling ==========
594 sub database_fetch_ocean () {
596 $sth= $dbh->prepare('SELECT islandname, archipelago FROM islands');
598 undef %dbisland2arch;
599 $dbdists= Graph::Undirected->new();
600 while ($row= $sth->fetchrow_hashref) {
601 print $debugfh "database-island $row->{'islandname'}".
602 " $row->{'archipelago'}\n";
603 $dbisland2arch{$row->{'islandname'}}= $row->{'archipelago'};
605 $sth= $dbh->prepare('SELECT dist, a.islandname a, b.islandname b
607 JOIN islands AS a ON dists.aiid==a.islandid
608 JOIN islands AS b ON dists.biid==b.islandid');
610 while ($row= $sth->fetchrow_hashref) {
611 $dbdists->add_weighted_edge($row->{'a'}, $row->{'b'}, $row->{'dist'});
615 sub database_graph_spr () {
616 $dbspr= shortest_path_reduction('db',$dbdists);
619 sub database_do_updates () {
620 my $addisland= $dbh->prepare(<<'END')
621 INSERT OR IGNORE INTO islands (islandname, archipelago) VALUES (?, ?);
624 foreach my $island (sort keys %wiisland2node) {
625 my $wiarch= wiisland2arch($island);
626 $addisland->execute($island, $wiarch);
634 my $adddist= $dbh->prepare(<<'END')
635 INSERT INTO dists VALUES
636 ((SELECT islandid FROM islands WHERE islandname == ?),
637 (SELECT islandid FROM islands WHERE islandname == ?),
641 my $addroute= $dbh->prepare(<<'END')
642 INSERT INTO routes VALUES
643 ((SELECT islandid FROM islands WHERE islandname == ?),
644 (SELECT islandid FROM islands WHERE islandname == ?),
648 foreach my $ia (sort keys %wiisland2node) {
649 my $na= $wiisland2node{$ia};
650 foreach my $ib (sort keys %wiisland2node) {
651 my $nb= $wiisland2node{$ib};
652 my $apdist= $ia eq $ib ? 0 : widist($na,$nb);
653 die "$ia $ib" unless defined $apdist;
654 my $sprdist= $wispr->get_edge_weight($ia,$ib);
655 die "$ia $ib $apdist $sprdist" if
656 defined($sprdist) && $sprdist != $apdist;
658 $adddist->execute($ia,$ib,$apdist);
659 $addroute->execute($ia,$ib,$sprdist) if defined $sprdist;
663 # select ia.islandname, ib.islandname, d.dist from dists as d, islands as ia on d.aiid = ia.islandid, islands as ib on d.biid = ib.islandid order by ia.islandname, ib.islandname;
667 #========== update _ocean-*.txt ==========
671 sub localtopo_rewrite () {
672 $localtopo_path= '_ocean-'.(lc $ocean).'.txt';
673 my $fh= new IO::File "$localtopo_path.tmp", 'w';
674 print $fh "# autogenerated - do not edit\n" or die $!;
675 print $fh "ocean $ocean\n" or die $!;
677 foreach my $isle (sort keys %wtisland2arch) {
678 my $arch= $wtisland2arch{$isle};
679 push @{ $arches{$arch} }, $isle;
681 foreach my $arch (sort keys %arches) {
682 print $fh " $arch\n" or die $!;
683 foreach my $isle (@{ $arches{$arch} }) {
684 print $fh " $isle\n" or die $!;
687 print $fh "\n" or die $!;
691 sub localtopo_commit () {
692 rename "$localtopo_path.tmp", $localtopo_path or die $!;
695 #========== main program ==========
697 parse_info_serverside();
699 progress("fetching yppedia chart"); yppedia_ocean_fetch_chart();
700 progress("adding shortcuts"); yppedia_graphs_add_shortcuts();
701 progress("pruning boring vertices"); yppedia_graphs_prune_boring();
702 progress("checking yppedia graphs"); yppedia_graphs_check();
703 progress("setting archs from source-info"); yppedia_archs_sourceinfo();
704 progress("computing shortest paths"); yppedia_graph_shortest_paths();
705 progress("setting archs from labels"); yppedia_archs_chart_labels();
706 progress("setting archs from nearby"); yppedia_archs_fillbynearest();
707 progress("computing yppedia spr"); yppedia_graph_spr();
710 progress("fetching yppedia ocean text"); yppedia_ocean_fetch_text();
717 progress("reading database");
718 database_fetch_ocean();
719 progress("computing database spr"); database_graph_spr();
721 progress("comparing islands"); compare_island_lists();
722 progress("comparing distances"); compare_distances();
727 foreach my $k (@msgkinds) {
728 my $n= $msgkindprinted{$k};
730 printf STDERR "*** %d%s %ss\n", $n, $iteration?' additional':'', $k;
733 if ($msgs{'error'}) {
734 print STDERR "*** errors, aborting update\n";
738 if (!%msgkindprinted) {
739 progress("updating database"); database_do_updates();
740 progress("updating _ocean-*.txt"); localtopo_rewrite();
742 print STDERR "*** --stdin-chart, aborting!\n";
745 progress("checking database"); db_check_referential_integrity(1);
746 progress("committing database"); $dbh->commit();
747 progress("committing _ocean-*.txt"); localtopo_commit();
752 my $default= !$msgkindprinted{'warning'};
753 printf STDERR "*** confirm update %s ? ", $default?'(y/n)':'(n/y)';
756 printf STDERR "[--stdin-chart]\n";
760 $!=0; my $result= <STDIN>; defined $result or die $!;
762 $result= $default?'y':'n' if !length $result;
763 $result= $result =~ m/^y/i;
766 printf STDERR "*** updated abandoned at your request\n";
771 undef %msgkindprinted;