6 # usage: ./yppedia-chart-parser <Oceanname>
7 # updates OCEAN-Oceanname.db and _ocean-<oceanname>.txt
8 # from YPPedia (chart and ocean page) and source-info.txt
10 # This is part of ypp-sc-tools, a set of third-party tools for assisting
11 # players of Yohoho Puzzle Pirates.
13 # Copyright (C) 2009 Ian Jackson <ijackson@chiark.greenend.org.uk>
15 # This program is free software: you can redistribute it and/or modify
16 # it under the terms of the GNU General Public License as published by
17 # the Free Software Foundation, either version 3 of the License, or
18 # (at your option) any later version.
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
25 # You should have received a copy of the GNU General Public License
26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
28 # Yohoho and Puzzle Pirates are probably trademarks of Three Rings and
29 # are used without permission. This program is not endorsed or
30 # sponsored by Three Rings.
32 use strict (qw(vars));
35 use Graph::Undirected;
40 my $widists= Graph::Undirected->new();
41 my $wiarchs= Graph::Undirected->new();
57 my @msgkinds= qw(change warning error);
62 my $m= "$_[0]: $_[1]\n";
63 print $debugfh "D $m";
64 push @{ $msgs{$_[0]} }, $m;
66 sub warning ($) { pmsg("warning",$_[0]); }
67 sub error ($) { pmsg("error", $_[0]); }
68 sub change ($) { pmsg("change", $_[0]); }
69 sub print_messages () {
70 foreach my $k (@msgkinds) {
73 foreach my $m (sort @$ms) {
74 next if $msgprinted{$m};
77 $msgkindprinted{$k}++;
81 sub progress ($) { print "($_[0])\n"; }
85 $debugfh= new IO::File ">/dev/null" or die $!;
88 last unless $ARGV[0] =~ m/^-/;
91 if ($_ eq '--debug') {
92 $debugfh= new IO::File ">&STDOUT" or die $!;
93 select($debugfh); $|=1; select(STDOUT);
94 } elsif ($_ eq '--stdin-chart') {
103 my $ocean= shift @ARGV;
109 my $tp= (0+$x ^ 0+$y) & 1;
110 defined $parity or $parity=$tp;
111 $tp==$parity or warning("line $.: parity error $x,$y is $tp not $parity");
112 my $n= "$_[0],$_[1]";
113 $winode2lines{$n}{$.}++;
117 sub run_yppedia_chart_parse ($) {
119 yppedia_chart_parse($oceanfh, $debugfh,
122 my ($x,$y,$arch) = @_;
123 push @wiarchlabels, [ $x,$y,$arch ];
126 my ($n, $island) = @_;
127 $wiisland2node{$island}= $n;
128 $winode2island{$n}= $island;
129 $widists->add_vertex($n);
130 $wiarchs->add_vertex($n);
133 my ($na, $nb, $solid) = @_;
134 $widists->add_weighted_edge($na, $nb, 1);
135 $wiarchs->add_edge($na, $nb) if $solid;
136 $wiarchs->add_edge($na, $nb) if $solid;
140 warning("line $l: ignoring incomprehensible: $l");
144 sub yppedia_graphs_add_shortcuts () {
145 # We add edges between LPs we know about, as you can chart
146 # between them. Yppedia often lacks these edges.
148 foreach my $p ($widists->vertices) {
149 my ($ax,$ay) = $p =~ m/^(\d+)\,(\d+)$/ or die;
150 my $add_shortcut= sub {
151 my $q= sprintf "%d,%d", $ax+$_[0], $ay+$_[1];
152 return unless $widists->has_vertex($q);
153 return if $widists->has_edge($p,$q);
154 printf $debugfh "%-5s league-shortcut %-5s\n", $p, $q;
155 $widists->add_weighted_edge($p,$q,1);
157 $add_shortcut->( 2,0);
158 $add_shortcut->(+1,1);
159 $add_shortcut->(-1,1);
163 sub yppedia_graphs_prune_boring () {
164 # Prune the LP database by eliminating boring intermediate vertices
165 foreach my $delete ($widists->vertices()) {
166 next if exists $winode2island{$delete};
167 my @neigh= $widists->neighbours($delete);
168 next unless @neigh==2;
170 map { $weight += $widists->get_edge_weight($delete, $_) } @neigh;
171 $widists->add_weighted_edge(@neigh, $weight);
172 $widists->delete_vertex($delete);
173 printf $debugfh "%-5s elide %5s %-5s %2d\n", $delete, @neigh, $weight;
177 sub yppedia_graphs_check () {
178 # Check that it's connected.
179 foreach my $cc ($widists->connected_components()) {
180 next if 2*@$cc > $widists->vertices();
181 my $m= "disconnected league point(s):";
182 foreach my $n (@$cc) {
183 $m .= "\n LP $n, def. yppedia line(s): ".
184 join(',', sort keys %{ $winode2lines{$n} });
190 sub yppedia_archs_sourceinfo () {
191 # Assign archipelagoes according to the source-info file
192 foreach my $arch (sort keys %{ $oceans{$ocean} }) {
193 foreach my $islename (sort keys %{ $oceans{$ocean}{$arch} }) {
194 my $islenode= $wiisland2node{$islename};
195 if (!defined $islenode) {
196 error("island $islename in source-info but not in WP map");
199 my $ccix= $wiarchs->connected_component_by_vertex($islenode);
200 my $oldarch= $wiccix2arch{$ccix};
201 error("island in $arch in source-info".
202 " connected to $oldarch as well: $islename")
203 if defined $oldarch && $oldarch ne $arch;
204 printf $debugfh "%-5s force-island-arch cc%-2d %-10s %s\n",
205 $islenode, $ccix, $arch, $islename;
206 $wiccix2arch{$ccix}= $arch;
211 sub yppedia_archs_chart_labels () {
212 # Assign archipelago labels to groups of islands
214 foreach my $label (@wiarchlabels) {
215 my ($ax,$ay,$arch) = @$label;
216 my $best_d2= 9999999;
218 # print $debugfh "$ax,$ay arch-island-search $arch\n";
219 $ay += 1; $ax += 2; # coords are rather to the top left of label
220 foreach my $vertex ($wiarchs->vertices()) {
221 next unless exists $winode2island{$vertex};
222 my $ccix= $wiarchs->connected_component_by_vertex($vertex);
223 my @cc= $wiarchs->connected_component_by_index($ccix);
224 my ($vx,$vy) = split /,/, $vertex; # /
225 my $d2= ($vx-$ax)*($vx-$ax) + ($vy-$ay)*($vy-$ay);
226 my $cmp= $best_d2 <=> $d2;
227 printf $debugfh "%2d,%-2d arch-island-search %5s d2=%4d cc%-2d".
228 " #cc=%2d cmp=%2d %s\n",
229 $ax,$ay, $vertex, $d2, $ccix, scalar(@cc), $cmp,
230 $winode2island{$vertex};
231 next unless $cmp > 0;
235 die 'no island vertices?!' unless defined $best_n;
236 my $ccix= $wiarchs->connected_component_by_vertex($best_n);
238 "%2d,%-2d arch-island-select %-5s d2=%4d cc%-2d %-10s %s\n",
239 $ax,$ay, $best_n, $ccix, $best_d2, $arch, $winode2island{$best_n};
240 my $desc= join "\n", map {
241 my $in= $winode2island{$_};
242 " LP $_". (defined $in ? ", $in" : "");
243 } sort $wiarchs->connected_component_by_index($ccix);
245 if (exists $wiccix2arch{$ccix} and $wiccix2arch{$ccix} ne $arch) {
246 error("archipelago determination failed, wrongly merged:\n".
247 " archipelago $arch\n".
248 " archipelago $wiccix2arch{$ccix}\n".
252 $wiccix2arch{$ccix}= $arch;
253 # print "$ccix $arch ::\n$desc\n";
257 sub yppedia_archs_fillbynearest() {
258 # Assign islands not labelled above to archipelagoes.
260 # We do this by, for each connected component (set of islands
261 # linked by purchaseable charts), searching for the nearest other
262 # connected component which has already been assigned an arch.
263 # `Nearest' means shortest distance of unpurchaseable charts, in
266 # we need only consider vertices which weren't `boring intermediate
267 # vertices' (removed during optimisation as being of order 2)
268 my @ccs_useful= map {
269 [ grep { $widists->has_vertex($_) } @$_ ]
270 } $wiarchs->connected_components();
274 foreach my $sourceccix (0..$#ccs_useful) {
275 next if defined $wiccix2arch{$sourceccix};
276 next unless $ccs_useful[$sourceccix];
278 my @sourcecc= $wiarchs->connected_component_by_index($sourceccix);
279 my @islandnodes= grep { $winode2island{$_} } @sourcecc;
280 next unless @islandnodes; # don't care, then
282 foreach my $islandnode (@islandnodes) {
283 printf $debugfh "%-5s arch-join-need cc%-2d %s\n",
284 $islandnode, $sourceccix, $winode2island{$islandnode};
286 my $best_dist= 9999999;
287 my ($best_target, $best_targetccix, $best_source);
288 foreach my $targetccix (0..$#ccs_useful) {
289 next unless defined $wiccix2arch{$targetccix}; # not helpful
290 next unless $ccs_useful[$targetccix];
291 foreach my $target ($wiarchs->
292 connected_component_by_index($targetccix)) {
293 next unless $widists->has_vertex($target);
294 foreach my $source (@sourcecc) {
295 my $target_dist= widist($target,$source);
296 next unless defined $target_dist;
297 next if $target_dist >= $best_dist;
298 $best_dist= $target_dist;
299 $best_source= $source;
300 $best_target= $target;
301 $best_targetccix= $targetccix;
305 die "no possible target ?!" unless defined $best_target;
307 my $arch= $wiccix2arch{$best_targetccix};
308 my $best_island= $winode2island{$best_target};
309 printf $debugfh "%-5s arch-join-to %-5s dist=%2d cc%-2d %-10s %s\n",
310 $best_source, $best_target, $best_dist,
311 $best_targetccix, $arch,
312 defined($best_island) ? $best_island : "-";
314 push @assignments, [ $sourceccix, $arch ];
316 foreach my $assign (@assignments) {
317 $wiccix2arch{$assign->[0]}= $assign->[1];
321 sub yppedia_graph_shortest_paths () {
322 $wialldists= $widists->APSP_Floyd_Warshall();
327 my $pl= $wialldists->path_length($p,$q);
328 # die "$p $q" unless defined $pl;
329 # my @pv= $wialldists->path_vertices($p,$q);
330 # if (@pv == $pl) { return $pl; }
331 # printf $debugfh "%-5s PATHLENGTH %-5s pl=%s pv=%s\n", $p,$q,$pl,join('|',@pv);
335 sub winode2arch ($) {
337 my $ccix= $wiarchs->connected_component_by_vertex($node);
338 return $wiccix2arch{$ccix};
340 sub wiisland2arch ($) {
342 my $node= $wiisland2node{$island};
343 die "$island ?" unless defined $node;
344 return winode2arch($node);
347 sub compare_island_lists () {
348 foreach my $island (sort keys %dbisland2arch) {
349 my $node= $wiisland2node{$island};
350 if (!defined $node) {
351 error("would delete island: $island");
354 my $wiarch= winode2arch($node);
355 if (!defined $wiarch) {
356 error("island has no arch: $island");
359 my $dbarch= $dbisland2arch{$island};
360 if ($wiarch ne $dbarch) {
361 change("archipelago change from $dbarch to $wiarch".
362 " for island $island");
365 foreach my $island (sort keys %wiisland2node) {
366 my $wtarch= $wtisland2arch{$island};
367 my $wiarch= wiisland2arch($island);
369 if (!defined $wtarch) {
370 error("island from chart not found on ocean page: $island");
371 } elsif (defined $wiarch and $wtarch ne $wiarch) {
372 error("island in $wtarch on ocean page but".
373 " concluded $wiarch from chart: $island");
377 my $dbarch= $dbisland2arch{$island};
378 if (!defined $dbarch) {
379 my $wiarch= wiisland2arch($island);
380 if (!defined $wiarch) {
381 error("new island has no arch: $island");
383 # We check arches of non-new islands above
385 change("island new in $wiarch: $island");
389 foreach my $island (sort keys %wtisland2arch) {
390 my $node= $wiisland2node{$island};
391 next if defined $node;
392 error("island on ocean page but not in chart: $island");
397 sub shortest_path_reduction ($$) {
400 # Takes a graph $g (and a string for messages $what) and returns
401 # a new graph which is the miminal shortest path transient reduction
404 # We also check that the shortest path closure of the intended result
405 # is the same graph as the input. Thus the input must itself be
406 # a shortest path closure; if it isn't, we die.
408 my $proof=<<'END'; # way to make a big comment
410 Premises and definitions:
412 1. F is an undirected weighted graph with positive edge weights.
414 2. All graphs we will consider have the same vertices as F
415 and none have self-edges.
417 3. G = Closure(F) is the graph of cliques whose edge weights
418 are the shortest paths in F, one clique for each connected
421 3a. |XY| for vertices X, Y is the weight of the edge XY in G.
422 If XY is not in G, |XY| is infinite.
424 4. A `reduction' of G is a subgraph K of G such that Closure(K) = G.
425 The reduction is `minimal' if there is no strict subgraph K'
426 of K such that Closure(K') = G.
428 5. Now each edge of G may be:
429 - `unnecessary': included in no minimal reductions of G.
430 - `essential': included in all minimal reductions of G.
431 - `contingent': included in some but not all.
433 6. Consider for any edge AC between the vertices A and C,
434 whether there is any B such that |AB|+|BC| = |AC| ?
435 (There can be no B such that the sum < |AC| since that would
436 mean that |AC| wasn't equal to the shortest path length.)
438 6a. No such B: AC is therefore the only shortest path from A to C
439 (since G is not a multigraph). AC is thus an essential edge.
441 6b. Some such B: Call all such edges AC `questionable'.
443 6c. Thus all edges are essential or questionable.
445 7. Suppose AC is a shortest contingent edge. AC must be
446 questionable since it is not essential. Suppose it is
447 made questionable by the existence of B such that |AB|+|BC| =
448 |AC|. Consider AB and BC. Since |AB| and |BC| are positive,
449 |BC| and |AB| must be < |AC| ie AB and BC are shorter than AC.
450 Since AC is a shortest contingent edge, there must be shortest
451 paths in G for AB and BC consisting entirely of essential edges.
453 8. Therefore it is always safe to remove AC since the paths
454 A..B and B..C will definitely still remain and provide a path
455 A..B..C with length |AB|+|BC| = |AC|.
457 9. Thus AC is unnecessary, contradicting the assumption in 7.
458 There are therefore no shortest contingent edges, and
459 thus no contingent edges.
461 10. We can construct a minimal reduction directly: for each edge
462 AC in G, search for a vertex B such that |AB|+|BC| = |AC|.
463 If we find none, AC is essential. If we find one then AC is
464 not essential and is therefore unnecessary.
469 printf $debugfh "spr %s before %d\n", $what, scalar($g->edges());
471 my $result= Graph::Undirected->new();
472 foreach my $edge_ac ($g->edges()) {
473 $result->add_vertex($edge_ac->[0]); # just in case
474 next if $edge_ac->[0] eq $edge_ac->[1];
475 my $edgename_ac= join ' .. ', @$edge_ac;
476 printf $debugfh "spr %s edge %s\n", $what, $edgename_ac;
477 my $w_ac= $g->get_edge_weight(@$edge_ac);
479 foreach my $vertex_b ($g->vertices()) {
480 next if grep { $_ eq $vertex_b } @$edge_ac;
481 my $w_ab= $g->get_edge_weight($edge_ac->[0], $vertex_b);
482 next unless defined $w_ab;
483 next if $w_ab >= $w_ac;
484 my $w_bc= $g->get_edge_weight($vertex_b, $edge_ac->[1]);
485 next unless defined $w_ac;
486 next if $w_ab + $w_bc > $w_ac;
488 printf $debugfh "spr %s edge %s unnecessary %s\n",
489 $what, $edgename_ac, $vertex_b;
494 printf $debugfh "spr %s edge %s essential\n", $what, $edgename_ac;
495 $result->add_weighted_edge(@$edge_ac,$w_ac);
498 printf $debugfh "spr %s result %d\n", $what, scalar($result->edges());
500 my $apsp= $result->APSP_Floyd_Warshall();
501 foreach my $ia (sort $g->vertices()) {
502 foreach my $ib (sort $g->vertices()) {
503 my $din= $g->get_edge_weight($ia,$ib);
504 my $dout= $apsp->path_length($ia,$ib);
505 $din= defined($din) ? $din : 'infinity';
506 $dout= defined($dout) ? $dout : 'infinity';
507 error("$what spr apsp discrepancy in=$din out=$dout".
515 sub yppedia_graph_spr () {
516 my $base= Graph::Undirected->new();
517 foreach my $na (sort keys %winode2island) {
518 my $ia= $winode2island{$na};
519 foreach my $nb (sort keys %winode2island) {
520 my $ib= $winode2island{$nb};
521 $base->add_weighted_edge($ia,$ib, widist($na,$nb));
524 $wispr= shortest_path_reduction('wi',$base);
527 sub yppedia_ocean_fetch_start ($) {
530 push @args, '--chart' if $chart;
532 open OCEAN, '-|', "./yppedia-ocean-scraper", @args or die $!;
534 sub yppedia_ocean_fetch_done () {
535 $?=0; $!=0; close OCEAN; $? and die $?; $! and die $!;
538 sub yppedia_ocean_fetch_chart () {
540 run_yppedia_chart_parse('::STDIN');
542 yppedia_ocean_fetch_start(1);
543 run_yppedia_chart_parse('::OCEAN');
544 yppedia_ocean_fetch_done();
548 sub yppedia_ocean_fetch_text () {
549 yppedia_ocean_fetch_start(0);
556 die unless defined $arch;
557 $wtisland2arch{$'}= $arch;
564 yppedia_ocean_fetch_done();
567 sub compare_distances () {
568 foreach my $ia (sort keys %dbisland2arch) {
569 my $na= $wiisland2node{$ia};
570 next unless defined $na;
571 foreach my $ib (sort keys %dbisland2arch) {
572 next unless $ia le $ib; # do every pair only once
573 my $dbdist= $dbspr->get_edge_weight($ia,$ib);
574 my $widist= $wispr->get_edge_weight($ia,$ib);
575 next unless defined $dbdist || defined $widist;
577 if (!defined $widist) {
578 warning(sprintf "route delete %2d for %s .. %s",
580 } elsif (!defined $dbdist) {
581 change(sprintf "route new %2d for %s .. %s",
583 } elsif ($dbdist != $widist) {
584 change(sprintf "route change %2d to %2d for %s .. %s",
585 $dbdist, $widist, $ia,$ib);
591 #========== database handling ==========
593 sub database_fetch_ocean () {
595 $sth= $dbh->prepare('SELECT islandname, archipelago FROM islands');
597 undef %dbisland2arch;
598 $dbdists= Graph::Undirected->new();
599 while ($row= $sth->fetchrow_hashref) {
600 print $debugfh "database-island $row->{'islandname'}".
601 " $row->{'archipelago'}\n";
602 $dbisland2arch{$row->{'islandname'}}= $row->{'archipelago'};
604 $sth= $dbh->prepare('SELECT dist, a.islandname a, b.islandname b
606 JOIN islands AS a ON dists.aiid==a.islandid
607 JOIN islands AS b ON dists.biid==b.islandid');
609 while ($row= $sth->fetchrow_hashref) {
610 $dbdists->add_weighted_edge($row->{'a'}, $row->{'b'}, $row->{'dist'});
614 sub database_graph_spr () {
615 $dbspr= shortest_path_reduction('db',$dbdists);
618 sub database_do_updates () {
619 my $addisland= $dbh->prepare(<<'END')
620 INSERT OR IGNORE INTO islands (islandname, archipelago) VALUES (?, ?);
623 foreach my $island (sort keys %wiisland2node) {
624 my $wiarch= wiisland2arch($island);
625 $addisland->execute($island, $wiarch);
633 my $adddist= $dbh->prepare(<<'END')
634 INSERT INTO dists VALUES
635 ((SELECT islandid FROM islands WHERE islandname == ?),
636 (SELECT islandid FROM islands WHERE islandname == ?),
640 my $addroute= $dbh->prepare(<<'END')
641 INSERT INTO routes VALUES
642 ((SELECT islandid FROM islands WHERE islandname == ?),
643 (SELECT islandid FROM islands WHERE islandname == ?),
647 foreach my $ia (sort keys %wiisland2node) {
648 my $na= $wiisland2node{$ia};
649 foreach my $ib (sort keys %wiisland2node) {
650 my $nb= $wiisland2node{$ib};
651 my $apdist= $ia eq $ib ? 0 : widist($na,$nb);
652 die "$ia $ib" unless defined $apdist;
653 my $sprdist= $wispr->get_edge_weight($ia,$ib);
654 die "$ia $ib $apdist $sprdist" if
655 defined($sprdist) && $sprdist != $apdist;
657 $adddist->execute($ia,$ib,$apdist);
658 $addroute->execute($ia,$ib,$sprdist) if defined $sprdist;
662 # select ia.islandname, ib.islandname, d.dist from dists as d, islands as ia on d.aiid = ia.islandid, islands as ib on d.biid = ib.islandid order by ia.islandname, ib.islandname;
666 #========== update _ocean-*.txt ==========
670 sub localtopo_rewrite () {
671 $localtopo_path= '_ocean-'.(lc $ocean).'.txt';
672 my $fh= new IO::File "$localtopo_path.tmp", 'w';
673 print $fh "# autogenerated - do not edit\n" or die $!;
674 print $fh "ocean $ocean\n" or die $!;
676 foreach my $isle (sort keys %wtisland2arch) {
677 my $arch= $wtisland2arch{$isle};
678 push @{ $arches{$arch} }, $isle;
680 foreach my $arch (sort keys %arches) {
681 print $fh " $arch\n" or die $!;
682 foreach my $isle (@{ $arches{$arch} }) {
683 print $fh " $isle\n" or die $!;
686 print $fh "\n" or die $!;
690 sub localtopo_commit () {
691 rename "$localtopo_path.tmp", $localtopo_path or die $!;
694 #========== main program ==========
696 parse_info_serverside();
698 progress("fetching yppedia chart"); yppedia_ocean_fetch_chart();
699 progress("adding shortcuts"); yppedia_graphs_add_shortcuts();
700 progress("pruning boring vertices"); yppedia_graphs_prune_boring();
701 progress("checking yppedia graphs"); yppedia_graphs_check();
702 progress("setting archs from source-info"); yppedia_archs_sourceinfo();
703 progress("computing shortest paths"); yppedia_graph_shortest_paths();
704 progress("setting archs from labels"); yppedia_archs_chart_labels();
705 progress("setting archs from nearby"); yppedia_archs_fillbynearest();
706 progress("computing yppedia spr"); yppedia_graph_spr();
709 progress("fetching yppedia ocean text"); yppedia_ocean_fetch_text();
716 progress("reading database");
717 database_fetch_ocean();
718 progress("computing database spr"); database_graph_spr();
720 progress("comparing islands"); compare_island_lists();
721 progress("comparing distances"); compare_distances();
726 foreach my $k (@msgkinds) {
727 my $n= $msgkindprinted{$k};
729 printf STDERR "*** %d%s %ss\n", $n, $iteration?' additional':'', $k;
732 if ($msgs{'error'}) {
733 print STDERR "*** errors, aborting update\n";
737 if (!%msgkindprinted) {
738 progress("updating database"); database_do_updates();
739 progress("updating _ocean-*.txt"); localtopo_rewrite();
741 print STDERR "*** --stdin-chart, aborting!\n";
744 progress("checking database"); db_check_referential_integrity(1);
745 progress("committing database"); $dbh->commit();
746 progress("committing _ocean-*.txt"); localtopo_commit();
751 my $default= !$msgkindprinted{'warning'};
752 printf STDERR "*** confirm update %s ? ", $default?'(y/n)':'(n/y)';
755 printf STDERR "[--stdin-chart]\n";
759 $!=0; my $result= <STDIN>; defined $result or die $!;
761 $result= $default?'y':'n' if !length $result;
762 $result= $result =~ m/^y/i;
765 printf STDERR "*** updated abandoned at your request\n";
770 undef %msgkindprinted;