6 # usage: ./yppedia-chart-parser <Oceanname>
7 # updates OCEAN-Oceanname.db and _ocean-<oceanname>.txt
8 # from YPPedia (chart and ocean page) and source-info.txt
10 # This is part of ypp-sc-tools, a set of third-party tools for assisting
11 # players of Yohoho Puzzle Pirates.
13 # Copyright (C) 2009 Ian Jackson <ijackson@chiark.greenend.org.uk>
15 # This program is free software: you can redistribute it and/or modify
16 # it under the terms of the GNU General Public License as published by
17 # the Free Software Foundation, either version 3 of the License, or
18 # (at your option) any later version.
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
25 # You should have received a copy of the GNU General Public License
26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
28 # Yohoho and Puzzle Pirates are probably trademarks of Three Rings and
29 # are used without permission. This program is not endorsed or
30 # sponsored by Three Rings.
32 use strict (qw(vars));
35 use Graph::Undirected;
40 my $widists= Graph::Undirected->new();
41 my $wiarchs= Graph::Undirected->new();
57 my @msgkinds= qw(change warning error);
62 my $m= "$_[0]: $_[1]\n";
63 print $debugfh "D $m";
64 push @{ $msgs{$_[0]} }, $m;
66 sub warning ($) { pmsg("warning",$_[0]); }
67 sub error ($) { pmsg("error", $_[0]); }
68 sub change ($) { pmsg("change", $_[0]); }
69 sub print_messages () {
70 foreach my $k (@msgkinds) {
73 foreach my $m (sort @$ms) {
74 next if $msgprinted{$m};
77 $msgkindprinted{$k}++;
81 sub progress ($) { print "($_[0])\n"; }
85 $debugfh= new IO::File ">/dev/null" or die $!;
88 last unless $ARGV[0] =~ m/^-/;
91 if ($_ eq '--debug') {
92 $debugfh= new IO::File ">&STDOUT" or die $!;
93 select($debugfh); $|=1; select(STDOUT);
94 } elsif ($_ eq '--stdin-chart') {
103 my $ocean= shift @ARGV;
109 my $tp= (0+$x ^ 0+$y) & 1;
110 defined $parity or $parity=$tp;
111 $tp==$parity or warning("line $.: parity error $x,$y is $tp not $parity");
112 my $n= "$_[0],$_[1]";
113 $winode2lines{$n}{$.}++;
117 sub run_yppedia_chart_parse ($) {
119 yppedia_chart_parse($oceanfh, $debugfh,
122 my ($x,$y,$arch) = @_;
123 push @wiarchlabels, [ $x,$y,$arch ];
126 my ($n, $island) = @_;
127 $wiisland2node{$island}= $n;
128 $winode2island{$n}= $island;
129 $widists->add_vertex($n);
130 $wiarchs->add_vertex($n);
133 my ($na, $nb, $solid) = @_;
134 $widists->add_weighted_edge($na, $nb, 1);
135 $wiarchs->add_edge($na, $nb) if $solid;
136 $wiarchs->add_edge($na, $nb) if $solid;
140 warning("line $l: ignoring incomprehensible: $l");
144 sub yppedia_graphs_add_shortcuts () {
145 # We add edges between LPs we know about, as you can chart
146 # between them. Yppedia often lacks these edges.
148 foreach my $p ($widists->vertices) {
149 my ($ax,$ay) = $p =~ m/^(\d+)\,(\d+)$/ or die;
150 my $add_shortcut= sub {
151 my $q= sprintf "%d,%d", $ax+$_[0], $ay+$_[1];
152 return unless $widists->has_vertex($q);
153 return if $widists->has_edge($p,$q);
154 printf $debugfh "%-5s league-shortcut %-5s\n", $p, $q;
155 $widists->add_weighted_edge($p,$q,1);
157 $add_shortcut->( 2,0);
158 $add_shortcut->(+1,1);
159 $add_shortcut->(-1,1);
163 sub yppedia_graphs_prune_boring () {
164 # Prune the LP database by eliminating boring intermediate vertices
165 foreach my $delete ($widists->vertices()) {
166 next if exists $winode2island{$delete};
167 my @neigh= $widists->neighbours($delete);
168 next unless @neigh==2;
170 map { $weight += $widists->get_edge_weight($delete, $_) } @neigh;
171 $widists->add_weighted_edge(@neigh, $weight);
172 $widists->delete_vertex($delete);
173 printf $debugfh "%-5s elide %5s %-5s %2d\n", $delete, @neigh, $weight;
177 sub yppedia_graphs_check () {
178 # Check that it's connected.
179 foreach my $cc ($widists->connected_components()) {
180 next if 2*@$cc > $widists->vertices();
181 my $m= "disconnected league point(s):";
182 foreach my $n (@$cc) {
183 $m .= "\n LP $n, def. yppedia line(s): ".
184 join(',', sort keys %{ $winode2lines{$n} });
190 sub yppedia_archs_sourceinfo () {
191 # Assign archipelagoes according to the source-info file
192 foreach my $arch (sort keys %{ $oceans{$ocean} }) {
193 foreach my $islename (sort keys %{ $oceans{$ocean}{$arch} }) {
194 my $islenode= $wiisland2node{$islename};
195 if (!defined $islenode) {
196 error("island $islename in source-info but not in WP map");
199 my $ccix= $wiarchs->connected_component_by_vertex($islenode);
200 my $oldarch= $wiccix2arch{$ccix};
201 error("island in $arch in source-info".
202 " connected to $oldarch as well: $islename")
203 if defined $oldarch && $oldarch ne $arch;
204 printf $debugfh "%-5s force-island-arch cc%-2d %-10s %s\n",
205 $islenode, $ccix, $arch, $islename;
206 $wiccix2arch{$ccix}= $arch;
211 sub yppedia_archs_chart_labels () {
212 # Assign archipelago labels to groups of islands
214 foreach my $label (@wiarchlabels) {
215 my ($ax,$ay,$arch) = @$label;
216 my $best_d2= 9999999;
218 # print $debugfh "$ax,$ay arch-island-search $arch\n";
219 $ay += 1; $ax += 2; # coords are rather to the top left of label
220 foreach my $vertex ($wiarchs->vertices()) {
221 next unless exists $winode2island{$vertex};
222 my $ccix= $wiarchs->connected_component_by_vertex($vertex);
223 my @cc= $wiarchs->connected_component_by_index($ccix);
224 my ($vx,$vy) = split /,/, $vertex;
225 my $d2= ($vx-$ax)*($vx-$ax) + ($vy-$ay)*($vy-$ay);
226 my $cmp= $best_d2 <=> $d2;
227 printf $debugfh "%2d,%-2d arch-island-search %5s d2=%4d cc%-2d".
228 " #cc=%2d cmp=%2d %s\n",
229 $ax,$ay, $vertex, $d2, $ccix, scalar(@cc), $cmp,
230 $winode2island{$vertex};
231 next unless $cmp > 0;
235 die 'no island vertices?!' unless defined $best_n;
236 my $ccix= $wiarchs->connected_component_by_vertex($best_n);
238 "%2d,%-2d arch-island-select %-5s d2=%4d cc%-2d %-10s %s\n",
239 $ax,$ay, $best_n, $ccix, $best_d2, $arch, $winode2island{$best_n};
240 my $desc= join "\n", map {
241 my $in= $winode2island{$_};
242 " LP $_". (defined $in ? ", $in" : "");
243 } sort $wiarchs->connected_component_by_index($ccix);
245 if (exists $wiccix2arch{$ccix} and $wiccix2arch{$ccix} ne $arch) {
246 error("archipelago determination failed, wrongly merged:\n".
247 " archipelago $arch\n".
248 " archipelago $wiccix2arch{$ccix}\n".
252 $wiccix2arch{$ccix}= $arch;
253 # print "$ccix $arch ::\n$desc\n";
257 sub yppedia_archs_fillbynearest() {
258 # Assign islands not labelled above to archipelagoes.
260 # We do this by, for each connected component (set of islands
261 # linked by purchaseable charts), searching for the nearest other
262 # connected component which has already been assigned an arch.
263 # `Nearest' means shortest distance of unpurchaseable charts, in
266 # we need only consider vertices which weren't `boring intermediate
267 # vertices' (removed during optimisation as being of order 2)
268 my @ccs_useful= map {
269 [ grep { $widists->has_vertex($_) } @$_ ]
270 } $wiarchs->connected_components();
274 foreach my $sourceccix (0..$#ccs_useful) {
275 next if defined $wiccix2arch{$sourceccix};
276 next unless $ccs_useful[$sourceccix];
278 my @sourcecc= $wiarchs->connected_component_by_index($sourceccix);
279 my @islandnodes= grep { $winode2island{$_} } @sourcecc;
280 next unless @islandnodes; # don't care, then
282 foreach my $islandnode (@islandnodes) {
283 printf $debugfh "%-5s arch-join-need cc%-2d %s\n",
284 $islandnode, $sourceccix, $winode2island{$islandnode};
286 my $best_dist= 9999999;
287 my ($best_target, $best_targetccix, $best_source);
288 foreach my $targetccix (0..$#ccs_useful) {
289 next unless defined $wiccix2arch{$targetccix}; # not helpful
290 next unless $ccs_useful[$targetccix];
291 foreach my $target ($wiarchs->
292 connected_component_by_index($targetccix)) {
293 next unless $widists->has_vertex($target);
294 foreach my $source (@sourcecc) {
295 my $target_dist= widist($target,$source);
296 next unless defined $target_dist;
297 next if $target_dist >= $best_dist;
298 $best_dist= $target_dist;
299 $best_source= $source;
300 $best_target= $target;
301 $best_targetccix= $targetccix;
305 die "no possible target ?!" unless defined $best_target;
307 my $arch= $wiccix2arch{$best_targetccix};
308 my $best_island= $winode2island{$best_target};
309 printf $debugfh "%-5s arch-join-to %-5s dist=%2d cc%-2d %-10s %s\n",
310 $best_source, $best_target, $best_dist,
311 $best_targetccix, $arch,
312 defined($best_island) ? $best_island : "-";
314 push @assignments, [ $sourceccix, $arch ];
316 foreach my $assign (@assignments) {
317 $wiccix2arch{$assign->[0]}= $assign->[1];
321 sub yppedia_graph_shortest_paths () {
322 $wialldists= $widists->APSP_Floyd_Warshall();
327 my $pl= $wialldists->path_length($p,$q);
328 # die "$p $q" unless defined $pl;
329 # my @pv= $wialldists->path_vertices($p,$q);
330 # if (@pv == $pl) { return $pl; }
331 # printf $debugfh "%-5s PATHLENGTH %-5s pl=%s pv=%s\n", $p,$q,$pl,join('|',@pv);
335 sub winode2arch ($) {
337 my $ccix= $wiarchs->connected_component_by_vertex($node);
338 return $wiccix2arch{$ccix};
340 sub wiisland2arch ($) {
342 my $node= $wiisland2node{$island};
343 die "$island ?" unless defined $node;
344 return winode2arch($node);
347 sub compare_island_lists () {
348 foreach my $island (sort keys %dbisland2arch) {
349 my $node= $wiisland2node{$island};
350 if (!defined $node) {
351 error("would delete island: $island");
354 my $wiarch= winode2arch($node);
355 if (!defined $wiarch) {
356 error("island has no arch: $island");
359 my $dbarch= $dbisland2arch{$island};
360 if ($wiarch ne $dbarch) {
361 change("archipelago change from $dbarch to $wiarch".
362 " for island $island");
365 foreach my $island (sort keys %wiisland2node) {
366 my $wtarch= $wtisland2arch{$island};
367 my $wiarch= wiisland2arch($island);
369 if (!defined $wtarch) {
370 error("island from chart not found on ocean page: $island");
371 } elsif (defined $wiarch and $wtarch ne $wiarch) {
372 error("island in $wtarch on ocean page but".
373 " concluded $wiarch from chart: $island");
377 my $dbarch= $dbisland2arch{$island};
378 if (!defined $dbarch) {
379 my $wiarch= wiisland2arch($island);
380 if (!defined $wiarch) {
381 error("new island has no arch: $island");
383 # We check arches of non-new islands above
385 change("island new in $wiarch: $island");
389 foreach my $island (sort keys %wtisland2arch) {
390 my $node= $wiisland2node{$island};
391 next if defined $node;
392 error("island on ocean page but not in chart: $island");
397 sub shortest_path_reduction ($$) {
400 # Takes a graph $g (and a string for messages $what) and returns
401 # a new graph which is the miminal shortest path transient reduction
404 # We also check that the shortest path closure of the intended result
405 # is the same graph as the input. Thus the input must itself be
406 # a shortest path closure; if it isn't, we die.
408 my $proof=<<'END'; # way to make a big comment
410 Premises and definitions:
412 1. F is an undirected weighted graph with positive edge weights.
414 2. All graphs we will consider have the same vertices as F
415 and none have self-edges.
417 3. G = Closure(F) is the graph of cliques whose edge weights
418 are the shortest paths in F, one clique for each connected
421 3a. |XY| for vertices X, Y is the weight of the edge XY in G.
422 If XY is not in G, |XY| is infinite.
424 4. A `reduction' of G is a subgraph K of G such that Closure(K) = G.
425 The reduction is `minimal' if there is no strict subgraph K'
426 of K such that Closure(K') = G.
428 5. Now each edge of G may be:
429 - `unnecessary': included in no minimal reductions of G.
430 - `essential': included in all minimal reductions of G.
431 - `contingent': included in some but not all.
433 6. Consider for any edge AC between the vertices A and C,
434 whether there is any B such that |AB|+|BC| = |AC| ?
435 (There can be no B such that the sum < |AC| since that would
436 mean that |AC| wasn't equal to the shortest path length.)
438 6a. No such B: AC is therefore the only shortest path from A to C
439 (since G is not a multigraph). AC is thus an essential edge.
441 6b. Some such B: Call all such edges AC `questionable'.
443 6c. Thus all edges are essential or questionable.
445 7. Suppose AC is a shortest contingent edge. AC must be
446 questionable since it is not essential. Suppose it is
447 made questionable by the existence of B such that |AB|+|BC| =
448 |AC|. Consider AB and BC. Since |AB| and |BC| are positive,
449 |BC| and |AB| must be < |AC| ie AB and BC are shorter than AC.
450 Since AC is a shortest contingent edge, there must be shortest
451 paths in G for AB and BC consisting entirely of essential edges.
453 8. Therefore it is always safe to remove AC since the paths
454 A..B and B..C will definitely still remain and provide a path
455 A..B..C with length |AB|+|BC| = |AC|.
457 9. Thus AC is unnecessary, contradicting the assumption in 7.
458 There are therefore no shortest contingent edges, and
459 thus no contingent edges.
461 10. We can construct a minimal reduction directly: for each edge
462 AC in G, search for a vertex B such that |AB|+|BC| = |AC|.
463 If we find none, AC is essential. If we find one then AC is
464 not essential and is therefore unnecessary.
468 printf $debugfh "spr %s before %d\n", $what, scalar($g->edges());
470 my $result= Graph::Undirected->new();
471 foreach my $edge_ac ($g->edges()) {
472 $result->add_vertex($edge_ac->[0]); # just in case
473 next if $edge_ac->[0] eq $edge_ac->[1];
474 my $edgename_ac= join ' .. ', @$edge_ac;
475 printf $debugfh "spr %s edge %s\n", $what, $edgename_ac;
476 my $w_ac= $g->get_edge_weight(@$edge_ac);
478 foreach my $vertex_b ($g->vertices()) {
479 next if grep { $_ eq $vertex_b } @$edge_ac;
480 my $w_ab= $g->get_edge_weight($edge_ac->[0], $vertex_b);
481 next unless defined $w_ab;
482 next if $w_ab >= $w_ac;
483 my $w_bc= $g->get_edge_weight($vertex_b, $edge_ac->[1]);
484 next unless defined $w_ac;
485 next if $w_ab + $w_bc > $w_ac;
487 printf $debugfh "spr %s edge %s unnecessary %s\n",
488 $what, $edgename_ac, $vertex_b;
493 printf $debugfh "spr %s edge %s essential\n", $what, $edgename_ac;
494 $result->add_weighted_edge(@$edge_ac,$w_ac);
497 printf $debugfh "spr %s result %d\n", $what, scalar($result->edges());
499 my $apsp= $result->APSP_Floyd_Warshall();
500 foreach my $ia (sort $g->vertices()) {
501 foreach my $ib (sort $g->vertices()) {
502 my $din= $g->get_edge_weight($ia,$ib);
503 my $dout= $apsp->path_length($ia,$ib);
504 $din= defined($din) ? $din : 'infinity';
505 $dout= defined($dout) ? $dout : 'infinity';
506 error("$what spr apsp discrepancy in=$din out=$dout".
514 sub yppedia_graph_spr () {
515 my $base= Graph::Undirected->new();
516 foreach my $na (sort keys %winode2island) {
517 my $ia= $winode2island{$na};
518 foreach my $nb (sort keys %winode2island) {
519 my $ib= $winode2island{$nb};
520 $base->add_weighted_edge($ia,$ib, widist($na,$nb));
523 $wispr= shortest_path_reduction('wi',$base);
526 sub yppedia_ocean_fetch_start ($) {
529 push @args, '--chart' if $chart;
531 open OCEAN, '-|', "./yppedia-ocean-scraper", @args or die $!;
533 sub yppedia_ocean_fetch_done () {
534 $?=0; $!=0; close OCEAN; $? and die $?; $! and die $!;
537 sub yppedia_ocean_fetch_chart () {
539 run_yppedia_chart_parse('STDIN');
541 yppedia_ocean_fetch_start(1);
542 run_yppedia_chart_parse('OCEAN');
543 yppedia_ocean_fetch_done();
547 sub yppedia_ocean_fetch_text () {
548 yppedia_ocean_fetch_start(0);
555 die unless defined $arch;
556 $wtisland2arch{$'}= $arch;
563 yppedia_ocean_fetch_done();
566 sub compare_distances () {
567 foreach my $ia (sort keys %dbisland2arch) {
568 my $na= $wiisland2node{$ia};
569 next unless defined $na;
570 foreach my $ib (sort keys %dbisland2arch) {
571 next unless $ia le $ib; # do every pair only once
572 my $dbdist= $dbspr->get_edge_weight($ia,$ib);
573 my $widist= $wispr->get_edge_weight($ia,$ib);
574 next unless defined $dbdist || defined $widist;
576 if (!defined $widist) {
577 warning(sprintf "route delete %2d for %s .. %s",
579 } elsif (!defined $dbdist) {
580 change(sprintf "route new %2d for %s .. %s",
582 } elsif ($dbdist != $widist) {
583 change(sprintf "route change %2d to %2d for %s .. %s",
584 $dbdist, $widist, $ia,$ib);
590 #========== database handling ==========
592 sub database_fetch_ocean () {
594 $sth= $dbh->prepare('SELECT islandname, archipelago FROM islands');
596 undef %dbisland2arch;
597 $dbdists= Graph::Undirected->new();
598 while ($row= $sth->fetchrow_hashref) {
599 print $debugfh "database-island $row->{'islandname'}".
600 " $row->{'archipelago'}\n";
601 $dbisland2arch{$row->{'islandname'}}= $row->{'archipelago'};
603 $sth= $dbh->prepare('SELECT dist, a.islandname a, b.islandname b
605 JOIN islands AS a ON dists.aiid==a.islandid
606 JOIN islands AS b ON dists.biid==b.islandid');
608 while ($row= $sth->fetchrow_hashref) {
609 $dbdists->add_weighted_edge($row->{'a'}, $row->{'b'}, $row->{'dist'});
613 sub database_graph_spr () {
614 $dbspr= shortest_path_reduction('db',$dbdists);
617 sub database_do_updates () {
618 my $addisland= $dbh->prepare(<<'END')
619 INSERT OR IGNORE INTO islands (islandname, archipelago) VALUES (?, ?);
622 foreach my $island (sort keys %wiisland2node) {
623 my $wiarch= wiisland2arch($island);
624 $addisland->execute($island, $wiarch);
632 my $adddist= $dbh->prepare(<<'END')
633 INSERT INTO dists VALUES
634 ((SELECT islandid FROM islands WHERE islandname == ?),
635 (SELECT islandid FROM islands WHERE islandname == ?),
639 my $addroute= $dbh->prepare(<<'END')
640 INSERT INTO routes VALUES
641 ((SELECT islandid FROM islands WHERE islandname == ?),
642 (SELECT islandid FROM islands WHERE islandname == ?),
646 foreach my $ia (sort keys %wiisland2node) {
647 my $na= $wiisland2node{$ia};
648 foreach my $ib (sort keys %wiisland2node) {
649 my $nb= $wiisland2node{$ib};
650 my $apdist= $ia eq $ib ? 0 : widist($na,$nb);
651 die "$ia $ib" unless defined $apdist;
652 my $sprdist= $wispr->get_edge_weight($ia,$ib);
653 die "$ia $ib $apdist $sprdist" if
654 defined($sprdist) && $sprdist != $apdist;
656 $adddist->execute($ia,$ib,$apdist);
657 $addroute->execute($ia,$ib,$sprdist) if defined $sprdist;
661 # select ia.islandname, ib.islandname, d.dist from dists as d, islands as ia on d.aiid = ia.islandid, islands as ib on d.biid = ib.islandid order by ia.islandname, ib.islandname;
665 #========== update _ocean-*.txt ==========
669 sub localtopo_rewrite () {
670 $localtopo_path= '_ocean-'.(lc $ocean).'.txt';
671 my $fh= new IO::File "$localtopo_path.tmp", 'w';
672 print $fh "# autogenerated - do not edit\n" or die $!;
673 print $fh "ocean $ocean\n" or die $!;
675 foreach my $isle (sort keys %wtisland2arch) {
676 my $arch= $wtisland2arch{$isle};
677 push @{ $arches{$arch} }, $isle;
679 foreach my $arch (sort keys %arches) {
680 print $fh " $arch\n" or die $!;
681 foreach my $isle (@{ $arches{$arch} }) {
682 print $fh " $isle\n" or die $!;
685 print $fh "\n" or die $!;
689 sub localtopo_commit () {
690 rename "$localtopo_path.tmp", $localtopo_path or die $!;
693 #========== main program ==========
695 parse_info_serverside();
697 progress("fetching yppedia chart"); yppedia_ocean_fetch_chart();
698 progress("adding shortcuts"); yppedia_graphs_add_shortcuts();
699 progress("pruning boring vertices"); yppedia_graphs_prune_boring();
700 progress("checking yppedia graphs"); yppedia_graphs_check();
701 progress("setting archs from source-info"); yppedia_archs_sourceinfo();
702 progress("computing shortest paths"); yppedia_graph_shortest_paths();
703 progress("setting archs from labels"); yppedia_archs_chart_labels();
704 progress("setting archs from nearby"); yppedia_archs_fillbynearest();
705 progress("computing yppedia spr"); yppedia_graph_spr();
708 progress("fetching yppedia ocean text"); yppedia_ocean_fetch_text();
715 progress("reading database");
716 database_fetch_ocean();
717 progress("computing database spr"); database_graph_spr();
719 progress("comparing islands"); compare_island_lists();
720 progress("comparing distances"); compare_distances();
725 foreach my $k (@msgkinds) {
726 my $n= $msgkindprinted{$k};
728 printf STDERR "*** %d%s %ss\n", $n, $iteration?' additional':'', $k;
731 if ($msgs{'error'}) {
732 print STDERR "*** errors, aborting update\n";
736 if (!%msgkindprinted) {
737 progress("updating database"); database_do_updates();
738 progress("updating _ocean-*.txt"); localtopo_rewrite();
740 print STDERR "*** --stdin-chart, aborting!\n";
743 progress("checking database"); db_check_referential_integrity(1);
744 progress("committing database"); $dbh->commit();
745 progress("committing _ocean-*.txt"); localtopo_commit();
750 my $default= !$msgkindprinted{'warning'};
751 printf STDERR "*** confirm update %s ? ", $default?'(y/n)':'(n/y)';
754 printf STDERR "[--stdin-chart]\n";
758 $!=0; my $result= <STDIN>; defined $result or die $!;
760 $result= $default?'y':'n' if !length $result;
761 $result= $result =~ m/^y/i;
764 printf STDERR "*** updated abandoned at your request\n";
769 undef %msgkindprinted;