6 # usage: ./yppedia-chart-parser <Oceanname>
7 # updates OCEAN-Oceanname.db and _ocean-<oceanname>.txt
8 # from YPPedia (chart and ocean page) and source-info.txt
10 # This is part of ypp-sc-tools, a set of third-party tools for assisting
11 # players of Yohoho Puzzle Pirates.
13 # Copyright (C) 2009 Ian Jackson <ijackson@chiark.greenend.org.uk>
15 # This program is free software: you can redistribute it and/or modify
16 # it under the terms of the GNU General Public License as published by
17 # the Free Software Foundation, either version 3 of the License, or
18 # (at your option) any later version.
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
25 # You should have received a copy of the GNU General Public License
26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
28 # Yohoho and Puzzle Pirates are probably trademarks of Three Rings and
29 # are used without permission. This program is not endorsed or
30 # sponsored by Three Rings.
32 use strict (qw(vars));
35 use Graph::Undirected;
39 my $widists= Graph::Undirected->new();
40 my $wiarchs= Graph::Undirected->new();
54 my @msgkinds= qw(change warning error);
58 sub pmsg ($$) { push @{ $msgs{$_[0]} }, "$_[0]: $_[1]\n"; }
59 sub warning ($) { pmsg("warning",$_[0]); }
60 sub error ($) { pmsg("error", $_[0]); }
61 sub change ($) { pmsg("change", $_[0]); }
62 sub print_messages () {
63 foreach my $k (@msgkinds) {
66 foreach my $m (sort @$ms) {
67 next if $msgprinted{$m};
70 $msgkindprinted{$k}++;
74 sub progress ($) { print "($_[0])\n"; }
76 if (@ARGV && $ARGV[0] eq '--debug') {
78 open DEBUG, ">&STDOUT" or die $!;
81 open DEBUG, ">/dev/null" or die $!;
86 $ARGV[0] =~ m/^\-/ and die;
87 my $ocean= shift @ARGV;
93 my $tp= (0+$x ^ 0+$y) & 1;
94 defined $parity or $parity=$tp;
95 $tp==$parity or warning("line $.: parity error $x,$y is $tp not $parity");
97 $winode2lines{$n}{$.}++;
101 sub yppedia_chart_parse () {
102 # We don't even bother with tag soup; instead we do line-oriented parsing.
106 s/^\s*//; chomp; s/\s+$//; s/\s+/ /g;
107 s/\<\/?(?:b|em)\>//g;
108 s/\{\{Chart\ style\|[^{}]*\}\}//g;
109 next unless m/\{\{/; # only interested in chart template stuff
111 my ($x,$y, $arch,$island,$solid,$dirn);
112 my $nn= sub { return nn_xy($x,$y) };
115 m/^\{\{ chart\ label \|(\d+)\|(\d+)\| .*
116 \'\[\[ [^][\']* \| (\S+)\ archipelago \]\]\'*\}\}$/xi) {
117 printf DEBUG "%2d,%-2d arch %s\n", $x,$y,$arch;
118 push @wiarchlabels, [ $x,$y,$arch ];
119 } elsif (($x,$y,$island) =
120 m/^\{\{ chart\ island\ icon \|(\d+)\|(\d+)\|
121 ([^| ][^|]*[^| ]) \| .*\}\}$/xi) {
123 $wiisland2node{$island}= $n;
124 $winode2island{$n}= $island;
125 $widists->add_vertex($n);
126 $wiarchs->add_vertex($n);
127 printf DEBUG "%2d,%-2d island %s\n", $x,$y,$island;
128 } elsif (($solid,$x,$y,$dirn) =
129 m/^\{\{ chart\ league((?:\ solid)?) \|(\d+)\|(\d+)\|
130 ([-\/\\o]) \| .*\}\}$/xi) {
131 next if $dirn eq 'o';
133 my ($bx,$by) = ($x,$y);
134 if ($dirn eq '-') { $bx+=2; }
135 elsif ($dirn eq '\\') { $bx++; $by++; }
136 elsif ($dirn eq '/') { $x++; $by++; }
139 my $nb= nn_xy($bx,$by);
140 $widists->add_weighted_edge($nn->(), $nb, 1);
141 $wiarchs->add_edge($nn->(), $nb) if $solid;
142 $wiarchs->add_edge($nn->(), $nb) if $solid;
144 printf DEBUG "%2d,%-2d league %-6s %s %s\n", $x,$y,
145 $solid?'solid':'dotted', $dirn, $nb;
147 m/^\{\{ chart\ head \}\}$/xi
151 warning("line $.: ignoring incomprehensible: $_");
156 sub yppedia_graphs_add_shortcuts () {
157 # We add edges between LPs we know about, as you can chart
158 # between them. Yppedia often lacks these edges.
160 foreach my $p ($widists->vertices) {
161 my ($ax,$ay) = $p =~ m/^(\d+)\,(\d+)$/ or die;
162 my $add_shortcut= sub {
163 my $q= sprintf "%d,%d", $ax+$_[0], $ay+$_[1];
164 return unless $widists->has_vertex($q);
165 return if $widists->has_edge($p,$q);
166 printf DEBUG "%-5s league-shortcut %-5s\n", $p, $q;
167 $widists->add_weighted_edge($p,$q,1);
169 $add_shortcut->( 2,0);
170 $add_shortcut->(+1,1);
171 $add_shortcut->(-1,1);
175 sub yppedia_graphs_prune_boring () {
176 # Prune the LP database by eliminating boring intermediate vertices
177 foreach my $delete ($widists->vertices()) {
178 next if exists $winode2island{$delete};
179 my @neigh= $widists->neighbours($delete);
180 next unless @neigh==2;
182 map { $weight += $widists->get_edge_weight($delete, $_) } @neigh;
183 $widists->add_weighted_edge(@neigh, $weight);
184 $widists->delete_vertex($delete);
185 printf DEBUG "%-5s elide %5s %-5s %2d\n", $delete, @neigh, $weight;
189 sub yppedia_graphs_check () {
190 # Check that it's connected.
191 foreach my $cc ($widists->connected_components()) {
192 next if 2*@$cc > $widists->vertices();
193 my $m= "disconnected league point(s):";
194 foreach my $n (@$cc) {
195 $m .= "\n LP $n, def. yppedia line(s): ".
196 join(',', sort keys %{ $winode2lines{$n} });
202 sub yppedia_archs_sourceinfo () {
203 # Assign archipelagoes according to the source-info file
204 foreach my $arch (sort keys %{ $oceans{$ocean} }) {
205 foreach my $islename (sort keys %{ $oceans{$ocean}{$arch} }) {
206 my $islenode= $wiisland2node{$islename};
207 if (!defined $islenode) {
208 error("island $islename in source-info but not in WP map");
211 my $ccix= $wiarchs->connected_component_by_vertex($islenode);
212 my $oldarch= $wiccix2arch{$ccix};
213 error("island in $arch in source-info".
214 " connected to $oldarch as well: $islename")
215 if defined $oldarch && $oldarch ne $arch;
216 printf DEBUG "%-5s force-island-arch cc%-2d %-10s %s\n",
217 $islenode, $ccix, $arch, $islename;
218 $wiccix2arch{$ccix}= $arch;
223 sub yppedia_archs_chart_labels () {
224 # Assign archipelago labels to groups of islands
226 foreach my $label (@wiarchlabels) {
227 my ($ax,$ay,$arch) = @$label;
228 my $best_ccmulti= -1;
231 # print DEBUG "$ax,$ay arch-island-search $arch\n";
232 $ay += 1; $ax += 2; # coords are rather to the top left of label
233 foreach my $vertex ($wiarchs->vertices()) {
234 next unless exists $winode2island{$vertex};
235 my $ccix= $wiarchs->connected_component_by_vertex($vertex);
236 my @cc= $wiarchs->connected_component_by_index($ccix);
237 my $ccmulti= @cc > 1;
238 my ($vx,$vy) = split /,/, $vertex;
239 my $d2= ($vx-$ax)*($vx-$ax) + ($vy-$ay)*($vy-$ay);
240 my $cmp= $ccmulti <=> $best_ccmulti
242 printf DEBUG "%2d,%-2d arch-island-search %5s d2=%4d cc%-2d".
243 " #cc=%2d ccmulti=%d cmp=%2d %s\n",
244 $ax,$ay, $vertex, $d2, $ccix, scalar(@cc), $ccmulti, $cmp,
245 $winode2island{$vertex};
246 next unless $cmp > 0;
249 $best_ccmulti= $ccmulti;
251 die 'no island vertices?!' unless defined $best_n;
252 my $ccix= $wiarchs->connected_component_by_vertex($best_n);
254 "%2d,%-2d arch-island-select %-5s d2=%4d cc%-2d %-10s %s\n",
255 $ax,$ay, $best_n, $ccix, $best_d2, $arch, $winode2island{$best_n};
256 my $desc= join "\n", map {
257 my $in= $winode2island{$_};
258 " LP $_". (defined $in ? ", $in" : "");
259 } sort $wiarchs->connected_component_by_index($ccix);
261 if (exists $wiccix2arch{$ccix} and $wiccix2arch{$ccix} ne $arch) {
262 error("archipelago determination failed, wrongly merged:\n".
263 " archipelago $arch\n".
264 " archipelago $wiccix2arch{$ccix}\n".
268 $wiccix2arch{$ccix}= $arch;
269 # print "$ccix $arch ::\n$desc\n";
273 sub yppedia_archs_fillbynearest() {
274 # Assign islands not labelled above to archipelagoes.
276 # We do this by, for each connected component (set of islands
277 # linked by purchaseable charts), searching for the nearest other
278 # connected component which has already been assigned an arch.
279 # `Nearest' means shortest distance of unpurchaseable charts, in
282 # we need only consider vertices which weren't `boring intermediate
283 # vertices' (removed during optimisation as being of order 2)
284 my @ccs_useful= map {
285 [ grep { $widists->has_vertex($_) } @$_ ]
286 } $wiarchs->connected_components();
290 foreach my $sourceccix (0..$#ccs_useful) {
291 next if defined $wiccix2arch{$sourceccix};
292 next unless $ccs_useful[$sourceccix];
294 my @sourcecc= $wiarchs->connected_component_by_index($sourceccix);
295 my @islandnodes= grep { $winode2island{$_} } @sourcecc;
296 next unless @islandnodes; # don't care, then
298 foreach my $islandnode (@islandnodes) {
299 printf DEBUG "%-5s arch-join-need cc%-2d %s\n",
300 $islandnode, $sourceccix, $winode2island{$islandnode};
302 my $best_dist= 9999999;
303 my ($best_target, $best_targetccix, $best_source);
304 foreach my $targetccix (0..$#ccs_useful) {
305 next unless defined $wiccix2arch{$targetccix}; # not helpful
306 next unless $ccs_useful[$targetccix];
307 foreach my $target ($wiarchs->
308 connected_component_by_index($targetccix)) {
309 next unless $widists->has_vertex($target);
310 foreach my $source (@sourcecc) {
311 my $target_dist= widist($target,$source);
312 next unless defined $target_dist;
313 next if $target_dist >= $best_dist;
314 $best_dist= $target_dist;
315 $best_source= $source;
316 $best_target= $target;
317 $best_targetccix= $targetccix;
321 die "no possible target ?!" unless defined $best_target;
323 my $arch= $wiccix2arch{$best_targetccix};
324 my $best_island= $winode2island{$best_target};
325 printf DEBUG "%-5s arch-join-to %-5s dist=%2d cc%-2d %-10s %s\n",
326 $best_source, $best_target, $best_dist,
327 $best_targetccix, $arch,
328 defined($best_island) ? $best_island : "-";
330 push @assignments, [ $sourceccix, $arch ];
332 foreach my $assign (@assignments) {
333 $wiccix2arch{$assign->[0]}= $assign->[1];
337 sub yppedia_graph_shortest_paths () {
338 $wialldists= $widists->APSP_Floyd_Warshall();
343 my $pl= $wialldists->path_length($p,$q);
344 # die "$p $q" unless defined $pl;
345 # my @pv= $wialldists->path_vertices($p,$q);
346 # if (@pv == $pl) { return $pl; }
347 # printf DEBUG "%-5s PATHLENGTH %-5s pl=%s pv=%s\n", $p,$q,$pl,join('|',@pv);
351 sub winode2arch ($) {
353 my $ccix= $wiarchs->connected_component_by_vertex($node);
354 return $wiccix2arch{$ccix};
356 sub wiisland2arch ($) {
358 my $node= $wiisland2node{$island};
359 die "$island ?" unless defined $node;
360 return winode2arch($node);
363 sub compare_island_lists () {
364 foreach my $island (sort keys %dbisland2arch) {
365 my $node= $wiisland2node{$island};
366 if (!defined $node) {
367 error("would delete island: $island");
370 my $wiarch= winode2arch($node);
371 if (!defined $wiarch) {
372 error("island has no arch: $island");
375 my $dbarch= $dbisland2arch{$island};
376 if ($wiarch ne $dbarch) {
377 change("archipelago change from $dbarch to $wiarch".
378 " for island $island");
381 foreach my $island (sort keys %wiisland2node) {
382 my $wtarch= $wtisland2arch{$island};
383 my $wiarch= wiisland2arch($island);
384 if (!defined $wtarch) {
385 error("island from chart not found on ocean page: $island");
386 } elsif (defined $wiarch and $wtarch ne $wiarch) {
387 error("island in $wtarch on ocean page but".
388 " concluded $wiarch from chart: $island");
391 my $dbarch= $dbisland2arch{$island};
392 if (!defined $dbarch) {
393 my $wiarch= wiisland2arch($island);
394 if (!defined $wiarch) {
395 error("new island has no arch: $island");
397 # We check arches of non-new islands above
399 change("island new in $wiarch: $island");
402 foreach my $island (sort keys %wtisland2arch) {
403 my $node= $wiisland2node{$island};
404 next if defined $node;
405 error("island on ocean page but not in chart: $island");
409 sub shortest_path_reduction ($$) {
412 # Takes a graph $g (and a string for messages $what) and returns
413 # a new graph which is the miminal shortest path transient reduction
416 # We also check that the shortest path closure of the intended result
417 # is the same graph as the input. Thus the input must itself be
418 # a shortest path closure; if it isn't, we die.
420 my $proof=<<'END'; # way to make a big comment
422 Premises and definitions:
424 1. F is an undirected weighted graph with positive edge weights.
426 2. All graphs we will consider have the same vertices as F.
428 3. G = Closure(F) is the graph of cliques whose edge weights
429 are the shortest paths in F, one clique for each connected
432 3a. |XY| for vertices X, Y is the weight of the edge XY in G.
433 If XY is not in G, |XY| is infinite.
435 4. A `reduction' of G is a subgraph K of G such that Closure(K) = G.
436 The reduction is `minimal' if there is no strict subgraph K'
437 of K such that Closure(K') = G.
439 5. Now each edge of G may be:
440 - `unnecessary': included in no minimal reductions of G.
441 - `essential': included in all minimal reductions of G.
442 - `contingent': included in some but not all.
444 6. Consider for any edge AC between the vertices A and C,
445 whether there is any B such that |AB|+|BC| = |AC| ?
446 (There can be no B such that the sum < |AC| since that would
447 mean that |AC| wasn't equal to the shortest path length.)
449 6a. No such B: AC is therefore the only shortest path from A to C
450 (since G is not a multigraph). AC is thus an essential edge.
452 6b. Some such B: Call all such edges AC `questionable'.
454 6c. Thus all edges are essential or questionable.
456 7. Suppose AC is a shortest contingent edge. AC must be
457 questionable since it is not essential. Suppose it is
458 made questionable by the existence of B such that |AB|+|BC| =
459 |AC|. Consider AB and BC. Since |AB| and |BC| are positive,
460 |BC| and |AB| must be < |AC| ie AB and BC are shorter than AC.
461 Since AC is a shortest contingent edge, there must be shortest
462 paths in G for AB and BC consisting entirely of essential edges.
464 8. Therefore it is always safe to remove AC since the paths
465 A..B and B..C will definitely still remain and provide a path
466 A..B..C with length |AB|+|BC| = |AC|.
468 9. Thus AC is unnecessary, contradicting the assumption in 7.
469 There are therefore no shortest contingent edges, and
470 thus no contingent edges.
472 10. We can construct a minimal reduction directly: for each edge
473 AC in G, search for a vertex B such that |AB|+|BC| = |AC|.
474 If we find none, AC is essential. If we find one then AC is
475 not essential and is therefore unnecessary.
479 printf DEBUG "spr %s before %d\n", $what, scalar($g->edges());
481 my $result= Graph::Undirected->new();
482 foreach my $edge_ac ($g->edges()) {
483 my $edgename_ac= join ' .. ', @$edge_ac;
484 printf DEBUG "spr %s edge %s\n", $what, $edgename_ac;
485 my $w_ac= $g->get_edge_weight(@$edge_ac);
487 foreach my $vertex_b ($g->vertices()) {
488 next if grep { $_ eq $vertex_b } @$edge_ac;
489 my $w_ab= $g->get_edge_weight($edge_ac->[0], $vertex_b);
490 next unless defined $w_ab;
491 next if $w_ab >= $w_ac;
492 my $w_bc= $g->get_edge_weight($vertex_b, $edge_ac->[1]);
493 next unless defined $w_ac;
494 next if $w_ab + $w_bc > $w_ac;
496 printf DEBUG "spr %s edge %s unnecessary %s\n",
497 $what, $edgename_ac, $vertex_b;
502 printf DEBUG "spr %s edge %s essential\n", $what, $edgename_ac;
503 $result->add_weighted_edge(@$edge_ac,$w_ac);
506 printf DEBUG "spr %s result %d\n", $what, scalar($result->edges());
508 my $apsp= $result->APSP_Floyd_Warshall();
509 foreach my $ia (sort $g->vertices()) {
510 foreach my $ib (sort $g->vertices()) {
511 my $din= $g->get_edge_weight($ia,$ib);
512 my $dout= $apsp->path_length($ia,$ib);
513 $din= defined($din) ? $din : 'infinity';
514 $dout= defined($dout) ? $dout : 'infinity';
515 error("$what spr apsp discrepancy in=$din out=$dout".
523 sub yppedia_graph_spr () {
524 my $base= Graph::Undirected->new();
525 foreach my $na (sort keys %winode2island) {
526 my $ia= $winode2island{$na};
527 foreach my $nb (sort keys %winode2island) {
528 my $ib= $winode2island{$nb};
529 $base->add_weighted_edge($ia,$ib, widist($na,$nb));
532 $wispr= shortest_path_reduction('wi',$base);
535 sub yppedia_ocean_fetch_start ($) {
538 push @args, '--chart' if $chart;
540 open OCEAN, '-|', "./yppedia-ocean-scraper", @args or die $!;
542 sub yppedia_ocean_fetch_done () {
543 $?=0; $!=0; close OCEAN; $? and die $?; $! and die $!;
546 sub yppedia_ocean_fetch_chart () {
547 yppedia_ocean_fetch_start(1);
548 yppedia_chart_parse();
549 yppedia_ocean_fetch_done();
552 sub yppedia_ocean_fetch_text () {
553 yppedia_ocean_fetch_start(0);
560 die unless defined $arch;
561 $wtisland2arch{$'}= $arch;
568 yppedia_ocean_fetch_done();
571 sub compare_distances () {
572 foreach my $ia (sort keys %dbisland2arch) {
573 my $na= $wiisland2node{$ia};
574 next unless defined $na;
575 foreach my $ib (sort keys %dbisland2arch) {
576 next unless $ia le $ib; # do every pair only once
577 my $dbdist= $dbspr->get_edge_weight($ia,$ib);
578 my $widist= $wispr->get_edge_weight($ia,$ib);
579 next unless defined $dbdist || defined $widist;
581 if (!defined $widist) {
582 warning(sprintf "route delete %2d for %s .. %s",
584 } elsif (!defined $dbdist) {
585 change(sprintf "route new %2d for %s .. %s",
587 } elsif ($dbdist != $widist) {
588 change(sprintf "route change %2d to %2d for %s .. %s",
589 $dbdist, $widist, $ia,$ib);
595 #========== database handling ==========
597 sub database_fetch_ocean () {
599 $sth= $dbh->prepare('SELECT islandname, archipelago FROM islands');
601 undef %dbisland2arch;
602 $dbdists= Graph::Undirected->new();
603 while ($row= $sth->fetchrow_hashref) {
604 print DEBUG "database-island $row->{'islandname'}".
605 " $row->{'archipelago'}\n";
606 $dbisland2arch{$row->{'islandname'}}= $row->{'archipelago'};
608 $sth= $dbh->prepare('SELECT dist, a.islandname a, b.islandname b
610 JOIN islands AS a ON dists.aiid==a.islandid
611 JOIN islands AS b ON dists.biid==b.islandid');
613 while ($row= $sth->fetchrow_hashref) {
614 $dbdists->add_weighted_edge($row->{'a'}, $row->{'b'}, $row->{'dist'});
618 sub database_graph_spr () {
619 $dbspr= shortest_path_reduction('db',$dbdists);
622 #========== main program ==========
624 parse_info_serverside();
626 progress("fetching yppedia chart"); yppedia_ocean_fetch_chart();
627 progress("adding shortcuts"); yppedia_graphs_add_shortcuts();
628 progress("pruning boring vertices"); yppedia_graphs_prune_boring();
629 progress("checking yppedia graphs"); yppedia_graphs_check();
630 progress("setting archs from source-info"); yppedia_archs_sourceinfo();
631 progress("computing shortest paths"); yppedia_graph_shortest_paths();
632 progress("setting archs from labels"); yppedia_archs_chart_labels();
633 progress("setting archs from nearby"); yppedia_archs_fillbynearest();
634 progress("computing yppedia spr"); yppedia_graph_spr();
635 progress("fetching yppedia ocean text"); yppedia_ocean_fetch_text();
641 progress("reading database");
642 database_fetch_ocean();
643 progress("computing database spr"); database_graph_spr();
645 progress("comparing islands"); compare_island_lists();
646 progress("comparing distances"); compare_distances();
651 foreach my $k (@msgkinds) {
652 my $n= $msgkindprinted{$k};
654 printf STDERR "*** %d%s %ss\n", $n, $iteration?' additional':'', $k;
657 if ($msgs{'error'}) {
658 print STDERR "*** errors, aborting update\n";
662 if (!%msgkindprinted) {
663 progress("updating database"); database_do_updates();
664 progress("updating _ocean-*.txt"); localtopo_rewrite();
665 progress("committing database"); $dbh->commit();
666 progress("committing _ocean-*.txt"); localtopo_commit();
671 my $default= !$msgkindprinted{'warning'};
672 printf STDERR "*** confirm update %s ? ", $default?'(y/n)':'(n/y)';
674 $!=0; my $result= <STDIN>; defined $result or die $!;
676 $result= $default?'y':'n' if !length $result;
677 $result= $result =~ m/^y/i;
680 printf STDERR "*** updated abandoned at your request\n";
685 undef %msgkindprinted;