X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-live.git;a=blobdiff_plain;f=yarrg%2Fyppedia-chart-parser;h=fca7abd7f45652f783fa01b330e6ff05fe389f39;hp=a8b9f2aaa73a0e81126ea576fdbc1af71bc6a7a9;hb=cf91713ebb03e007c03dc70867c73db3a1a754a9;hpb=025ff2df9bba44d431fb1ddb0f0800ea43535725 diff --git a/yarrg/yppedia-chart-parser b/yarrg/yppedia-chart-parser index a8b9f2a..fca7abd 100755 --- a/yarrg/yppedia-chart-parser +++ b/yarrg/yppedia-chart-parser @@ -4,28 +4,41 @@ use strict (qw(vars)); use warnings; use Graph::Undirected; - +use Commods; use CommodsDatabase; +my $ocean= 'Midnight'; + + my $widists= Graph::Undirected->new(); my $wiarchs= Graph::Undirected->new(); +my $wispr; +my $dbspr; my @wiarchlabels; my %wiisland2node; my %winode2island; -my %wiisland2arch; my %winode2lines; my %wiccix2arch; +my $wialldists; my $dbdists= Graph::Undirected->new(); my %dbisland2arch; -my %msgcount; -sub perr ($$) { print STDERR "$_[0]: $_[1]\n"; $msgcount{$_[0]}++; } -sub warning ($) { perr("warning",$_[0]); } -sub error ($) { perr("error", $_[0]); } -sub change ($) { perr("change", $_[0]); } +my %msgs; +sub pmsg ($$) { push @{ $msgs{$_[0]} }, "$_[0]: $_[1]\n"; } +sub warning ($) { pmsg("warning",$_[0]); } +sub error ($) { pmsg("error", $_[0]); } +sub change ($) { pmsg("change", $_[0]); } +sub print_messages () { + foreach my $k (qw(change warning error)) { + my $m= $msgs{$k}; + next unless $m; + print sort @$m or die $!; + } +} +sub progress ($) { print "($_[0])\n"; } -if ($ARGV[0] eq '--debug') { +if (@ARGV && $ARGV[0] eq '--debug') { shift @ARGV; open DEBUG, ">&STDOUT" or die $!; select(DEBUG); $|=1; @@ -45,7 +58,7 @@ sub nn_xy ($$) { return $n; } -sub parse_yppedia_map () { +sub yppedia_chart_parse () { # We don't even bother with tag soup; instead we do line-oriented parsing. while (<>) { @@ -61,7 +74,7 @@ sub parse_yppedia_map () { if (($x,$y,$arch) = m/^\{\{ chart\ label \|(\d+)\|(\d+)\| .* \'\[\[ [^][\']* \| (\S+)\ archipelago \]\]\'*\}\}$/xi) { - printf DEBUG "%d,%d arch %s\n", $x,$y,$arch; + printf DEBUG "%2d,%-2d arch %s\n", $x,$y,$arch; push @wiarchlabels, [ $x,$y,$arch ]; } elsif (($x,$y,$island) = m/^\{\{ chart\ island\ icon \|(\d+)\|(\d+)\| @@ -71,8 +84,7 @@ sub parse_yppedia_map () { $winode2island{$n}= $island; $widists->add_vertex($n); $wiarchs->add_vertex($n); -#print "\$g->add_vertex('$n');\n"; - printf DEBUG "%d,%d island %s\n", $x,$y,$island; + printf DEBUG "%2d,%-2d island %s\n", $x,$y,$island; } elsif (($solid,$x,$y,$dirn) = m/^\{\{ chart\ league((?:\ solid)?) \|(\d+)\|(\d+)\| ([-\/\\o]) \| .*\}\}$/xi) { @@ -84,13 +96,13 @@ sub parse_yppedia_map () { elsif ($dirn eq '/') { $x++; $by++; } else { die; } - $widists->add_weighted_edge($nn->(), nn_xy($bx,$by), 1); - $wiarchs->add_edge($nn->(), nn_xy($bx,$by)) if $solid; - $wiarchs->add_edge($nn->(), nn_xy($bx,$by)) if $solid; -#print "\$g->add_edge('".$nn->()."','".nn_xy($bx,$by)."');\n" if $solid; + my $nb= nn_xy($bx,$by); + $widists->add_weighted_edge($nn->(), $nb, 1); + $wiarchs->add_edge($nn->(), $nb) if $solid; + $wiarchs->add_edge($nn->(), $nb) if $solid; - printf DEBUG "%d,%d league %s %s \n", $x,$y, - $solid?'solid':'dotted', $dirn; + printf DEBUG "%2d,%-2d league %-6s %s %s\n", $x,$y, + $solid?'solid':'dotted', $dirn, $nb; } elsif ( m/^\{\{ chart\ head \}\}$/xi ) { @@ -101,7 +113,7 @@ sub parse_yppedia_map () { } } -sub parse_database_map () { +sub database_fetch_ocean () { my ($row,$sth); $sth= $dbh->prepare('SELECT islandname, archipelago FROM islands'); $sth->execute(); @@ -120,22 +132,44 @@ sub parse_database_map () { } } -sub process_yppedia_graphs () { +sub database_graph_spr () { + $dbspr= shortest_path_reduction('db',$dbdists); +} + +sub yppedia_graphs_add_shortcuts () { + # We add edges between LPs we know about, as you can chart + # between them. Yppedia often lacks these edges. + # + foreach my $p ($widists->vertices) { + my ($ax,$ay) = $p =~ m/^(\d+)\,(\d+)$/ or die; + my $add_shortcut= sub { + my $q= sprintf "%d,%d", $ax+$_[0], $ay+$_[1]; + return unless $widists->has_vertex($q); + return if $widists->has_edge($p,$q); + printf DEBUG "%-5s league-shortcut %-5s\n", $p, $q; + $widists->add_weighted_edge($p,$q,1); + }; + $add_shortcut->( 2,0); + $add_shortcut->(+1,1); + $add_shortcut->(-1,1); + } +} + +sub yppedia_graphs_prune_boring () { # Prune the LP database by eliminating boring intermediate vertices foreach my $delete ($widists->vertices()) { next if exists $winode2island{$delete}; my @neigh= $widists->neighbours($delete); next unless @neigh==2; -# my @aneigh= $wiarchs->has_vertex($delete) -# ? $wiarchs->neighbours($delete) : (); -# next unless @aneigh==0 || @aneigh==2; my $weight= 0; map { $weight += $widists->get_edge_weight($delete, $_) } @neigh; $widists->add_weighted_edge(@neigh, $weight); $widists->delete_vertex($delete); - print DEBUG "$delete elide $weight\n"; + printf DEBUG "%-5s elide %5s %-5s %2d\n", $delete, @neigh, $weight; } +} +sub yppedia_graphs_check () { # Check that it's connected. foreach my $cc ($widists->connected_components()) { next if 2*@$cc > $widists->vertices(); @@ -146,43 +180,66 @@ sub process_yppedia_graphs () { } warning($m); } +} - # Compute all-pairs-shortest-paths on dist, which is the - # actual distances between all LPs. - my $wialldists= $widists->APSP_Floyd_Warshall(); +sub yppedia_archs_sourceinfo () { + # Assign archipelagoes according to the source-info file + foreach my $arch (sort keys %{ $oceans{$ocean} }) { + foreach my $islename (sort keys %{ $oceans{$ocean}{$arch} }) { + my $islenode= $wiisland2node{$islename}; + defined $islenode or + error("island $islename in source-info but not in WP map"); + my $ccix= $wiarchs->connected_component_by_vertex($islenode); + my $oldarch= $wiccix2arch{$ccix}; + error("island $islename in $arch in source-info". + " connected to $oldarch as well") + if defined $oldarch && $oldarch ne $arch; + printf DEBUG "%-5s force-island-arch cc%-2d %-10s %s\n", + $islenode, $ccix, $arch, $islename; + $wiccix2arch{$ccix}= $arch; + } + } +} - # Compute arch's +sub yppedia_archs_chart_labels () { + # Assign archipelago labels to groups of islands + # foreach my $label (@wiarchlabels) { my ($ax,$ay,$arch) = @$label; - my $d2best= 9999999; - my $best; + my $best_ccmulti= -1; + my $best_d2= 0; + my $best_n; # print DEBUG "$ax,$ay arch-island-search $arch\n"; $ay += 1; $ax += 2; # coords are rather to the top left of label foreach my $vertex ($wiarchs->vertices()) { next unless exists $winode2island{$vertex}; my $ccix= $wiarchs->connected_component_by_vertex($vertex); my @cc= $wiarchs->connected_component_by_index($ccix); + my $ccmulti= @cc > 1; my ($vx,$vy) = split /,/, $vertex; my $d2= ($vx-$ax)*($vx-$ax) + ($vy-$ay)*($vy-$ay); -# printf DEBUG -# "%d,%d arch-island-search %s d2=%d ccix=%d cc=%d %s\n", -# $ax,$ay, $vertex, $d2, $ccix, scalar(@cc), -# $winode2island{$vertex}; - next unless @cc > 1; - next unless $d2 < $d2best; - $best= $vertex; - $d2best= $d2; - } - die 'no island vertices?!' unless defined $best; - printf DEBUG "%2d,%-2d arch-island-select %5s d2=%-2d %-10s %s\n", - $ax,$ay, $best, $d2best, $arch, $winode2island{$best}; - my $ccix= $wiarchs->connected_component_by_vertex($best); + my $cmp= $ccmulti <=> $best_ccmulti + || $best_d2 <=> $d2; + printf DEBUG "%2d,%-2d arch-island-search %5s d2=%4d cc%-2d". + " #cc=%2d ccmulti=%d cmp=%2d %s\n", + $ax,$ay, $vertex, $d2, $ccix, scalar(@cc), $ccmulti, $cmp, + $winode2island{$vertex}; + next unless $cmp > 0; + $best_n= $vertex; + $best_d2= $d2; + $best_ccmulti= $ccmulti; + } + die 'no island vertices?!' unless defined $best_n; + my $ccix= $wiarchs->connected_component_by_vertex($best_n); + printf DEBUG + "%2d,%-2d arch-island-select %-5s d2=%4d cc%-2d %-10s %s\n", + $ax,$ay, $best_n, $ccix, $best_d2, $arch, $winode2island{$best_n}; my $desc= join "\n", map { my $in= $winode2island{$_}; " LP $_". (defined $in ? ", $in" : ""); } sort $wiarchs->connected_component_by_index($ccix); - if (exists $wiccix2arch{$ccix}) { + if (exists $wiccix2arch{$ccix} and $wiccix2arch{$ccix} ne $arch) { error("archipelago determination failed, wrongly merged:\n". " archipelago $arch\n". " archipelago $wiccix2arch{$ccix}\n". @@ -194,6 +251,84 @@ sub process_yppedia_graphs () { } } +sub yppedia_archs_fillbynearest() { + # Assign islands not labelled above to archipelagoes. + # + # We do this by, for each connected component (set of islands + # linked by purchaseable charts), searching for the nearest other + # connected component which has already been assigned an arch. + # `Nearest' means shortest distance of unpurchaseable charts, in + # leagues. + # + # we need only consider vertices which weren't `boring intermediate + # vertices' (removed during optimisation as being of order 2) + my @ccs_useful= map { + [ grep { $widists->has_vertex($_) } @$_ ] + } $wiarchs->connected_components(); + + my @assignments; + + foreach my $sourceccix (0..$#ccs_useful) { + next if defined $wiccix2arch{$sourceccix}; + next unless $ccs_useful[$sourceccix]; + + my @sourcecc= $wiarchs->connected_component_by_index($sourceccix); + my @islandnodes= grep { $winode2island{$_} } @sourcecc; + next unless @islandnodes; # don't care, then + + foreach my $islandnode (@islandnodes) { + printf DEBUG "%-5s arch-join-need cc%-2d %s\n", + $islandnode, $sourceccix, $winode2island{$islandnode}; + } + my $best_dist= 9999999; + my ($best_target, $best_targetccix, $best_source); + foreach my $targetccix (0..$#ccs_useful) { + next unless defined $wiccix2arch{$targetccix}; # not helpful + next unless $ccs_useful[$targetccix]; + foreach my $target ($wiarchs-> + connected_component_by_index($targetccix)) { + next unless $widists->has_vertex($target); + foreach my $source (@sourcecc) { + my $target_dist= widist($target,$source); + next unless defined $target_dist; + next if $target_dist >= $best_dist; + $best_dist= $target_dist; + $best_source= $source; + $best_target= $target; + $best_targetccix= $targetccix; + } + } + } + die "no possible target ?!" unless defined $best_target; + + my $arch= $wiccix2arch{$best_targetccix}; + my $best_island= $winode2island{$best_target}; + printf DEBUG "%-5s arch-join-to %-5s dist=%2d cc%-2d %-10s %s\n", + $best_source, $best_target, $best_dist, + $best_targetccix, $arch, + defined($best_island) ? $best_island : "-"; + + push @assignments, [ $sourceccix, $arch ]; + } + foreach my $assign (@assignments) { + $wiccix2arch{$assign->[0]}= $assign->[1]; + } +} + +sub yppedia_graph_shortest_paths () { + $wialldists= $widists->APSP_Floyd_Warshall(); +} + +sub widist ($$) { + my ($p,$q) = @_; + my $pl= $wialldists->path_length($p,$q); +# die "$p $q" unless defined $pl; +# my @pv= $wialldists->path_vertices($p,$q); +# if (@pv == $pl) { return $pl; } +# printf DEBUG "%-5s PATHLENGTH %-5s pl=%s pv=%s\n", $p,$q,$pl,join('|',@pv); + return $pl; +} + sub winode2arch ($) { my ($node) = @_; my $ccix= $wiarchs->connected_component_by_vertex($node); @@ -213,16 +348,14 @@ sub compare_island_lists () { error("would delete island: $island"); next; } - my $ccix= $wiarchs->connected_component_by_vertex($node); - my $wiarch= $wiccix2arch{$ccix}; + my $wiarch= winode2arch($node); if (!defined $wiarch) { error("island has no arch: $island"); next; } my $dbarch= $dbisland2arch{$island}; - my $wiarch= winode2arch($node); if ($wiarch ne $dbarch) { - change("change archipelago from $dbarch to $wiarch". + change("archipelago change from $dbarch to $wiarch". " for island $island"); } } @@ -235,14 +368,182 @@ sub compare_island_lists () { next; # We check arches of non-new islands above } - change("new island in $wiarch: $island"); + change("island new in $wiarch: $island"); + } + } +} + +sub shortest_path_reduction ($$) { + my ($what,$g) = @_; + # + # Takes a graph $g (and a string for messages $what) and returns + # a new graph which is the miminal shortest path transient reduction + # of $g. + # + # We also check that the shortest path closure of the intended result + # is the same graph as the input. Thus the input must itself be + # a shortest path closure; if it isn't, we die. + + my $proof=<<'END'; # way to make a big comment + + Premises and definitions: + + 1. F is an undirected weighted graph with positive edge weights. + + 2. All graphs we will consider have the same vertices as F. + + 3. G = Closure(F) is the graph of cliques whose edge weights + are the shortest paths in F, one clique for each connected + component in F. + + 3a. |XY| for vertices X, Y is the weight of the edge XY in G. + If XY is not in G, |XY| is infinite. + + 4. A `reduction' of G is a subgraph K of G such that Closure(K) = G. + The reduction is `minimal' if there is no strict subgraph K' + of K such that Closure(K') = G. + + 5. Now each edge of G may be: + - `unnecessary': included in no minimal reductions of G. + - `essential': included in all minimal reductions of G. + - `contingent': included in some but not all. + + 6. Consider for any edge AC between the vertices A and C, + whether there is any B such that |AB|+|BC| = |AC| ? + (There can be no B such that the sum < |AC| since that would + mean that |AC| wasn't equal to the shortest path length.) + + 6a. No such B: AC is therefore the only shortest path from A to C + (since G is not a multigraph). AC is thus an essential edge. + + 6b. Some such B: Call all such edges AC `questionable'. + + 6c. Thus all edges are essential or questionable. + + 7. Suppose AC is a shortest contingent edge. AC must be + questionable since it is not essential. Suppose it is + made questionable by the existence of B such that |AB|+|BC| = + |AC|. Consider AB and BC. Since |AB| and |BC| are positive, + |BC| and |AB| must be < |AC| ie AB and BC are shorter than AC. + Since AC is a shortest contingent edge, there must be shortest + paths in G for AB and BC consisting entirely of essential edges. + + 8. Therefore it is always safe to remove AC since the paths + A..B and B..C will definitely still remain and provide a path + A..B..C with length |AB|+|BC| = |AC|. + + 9. Thus AC is unnecessary, contradicting the assumption in 7. + There are therefore no shortest contingent edges, and + thus no contingent edges. + + 10. We can construct a minimal reduction directly: for each edge + AC in G, search for a vertex B such that |AB|+|BC| = |AC|. + If we find none, AC is essential. If we find one then AC is + not essential and is therefore unnecessary. + +END + + printf DEBUG "spr %s before %d\n", $what, scalar($g->edges()); + + my $result= Graph::Undirected->new(); + foreach my $edge_ac ($g->edges()) { + my $edgename_ac= join ' .. ', @$edge_ac; + printf DEBUG "spr %s edge %s\n", $what, $edgename_ac; + my $w_ac= $g->get_edge_weight(@$edge_ac); + my $needed= 1; + foreach my $vertex_b ($g->vertices()) { + next if grep { $_ eq $vertex_b } @$edge_ac; + my $w_ab= $g->get_edge_weight($edge_ac->[0], $vertex_b); + next unless defined $w_ab; + next if $w_ab >= $w_ac; + my $w_bc= $g->get_edge_weight($vertex_b, $edge_ac->[1]); + next unless defined $w_ac; + next if $w_ab + $w_bc > $w_ac; + # found path + printf DEBUG "spr %s edge %s unnecessary %s\n", + $what, $edgename_ac, $vertex_b; + $needed= 0; + last; + } + if ($needed) { + printf DEBUG "spr %s edge %s essential\n", $what, $edgename_ac; + $result->add_weighted_edge(@$edge_ac,$w_ac); + } + } + printf DEBUG "spr %s result %d\n", $what, scalar($result->edges()); + + my $apsp= $result->APSP_Floyd_Warshall(); + foreach my $ia (sort $g->vertices()) { + foreach my $ib (sort $g->vertices()) { + my $din= $g->get_edge_weight($ia,$ib); + my $dout= $apsp->path_length($ia,$ib); + $din= defined($din) ? $din : 'infinity'; + $dout= defined($dout) ? $dout : 'infinity'; + error("$what spr apsp discrepancy in=$din out=$dout". + " for $ia .. $ib") + if $din != $dout; + } + } + return $result; +} + +sub yppedia_graph_spr () { + my $base= Graph::Undirected->new(); + foreach my $na (sort keys %winode2island) { + my $ia= $winode2island{$na}; + foreach my $nb (sort keys %winode2island) { + my $ib= $winode2island{$nb}; + $base->add_weighted_edge($ia,$ib, widist($na,$nb)); + } + } + $wispr= shortest_path_reduction('wi',$base); +} + +sub compare_distances () { + foreach my $ia (sort keys %dbisland2arch) { + my $na= $wiisland2node{$ia}; + next unless defined $na; + foreach my $ib (sort keys %dbisland2arch) { + next unless $ia le $ib; # do every pair only once + my $dbdist= $dbspr->get_edge_weight($ia,$ib); + my $widist= $wispr->get_edge_weight($ia,$ib); + next unless defined $dbdist || defined $widist; + + if (!defined $widist) { + warning(sprintf "route delete %2d for %s .. %s", + $dbdist, $ia,$ib); + } elsif (!defined $dbdist) { + change(sprintf "route new %2d for %s .. %s", + $widist, $ia,$ib); + } elsif ($dbdist != $widist) { + change(sprintf "route change %2d to %2d for %s .. %s", + $dbdist, $widist, $ia,$ib); + } } } } -db_setocean('Midnight'); +parse_info_serverside(); + +progress("reading database"); + +db_setocean($ocean); db_connect(); -parse_yppedia_map(); -parse_database_map(); -process_yppedia_graphs(); -compare_island_lists(); +database_fetch_ocean(); + +progress("computing database spr"); database_graph_spr(); + +progress("reading yppedia chart"); yppedia_chart_parse(); +progress("adding shortcuts"); yppedia_graphs_add_shortcuts(); +progress("pruning boring vertices"); yppedia_graphs_prune_boring(); +progress("checking yppedia graphs"); yppedia_graphs_check(); +progress("setting archs from source-info"); yppedia_archs_sourceinfo(); +progress("computing shortest paths"); yppedia_graph_shortest_paths(); +progress("setting archs from labels"); yppedia_archs_chart_labels(); +progress("setting archs from nearby"); yppedia_archs_fillbynearest(); +progress("computing yppedia spr"); yppedia_graph_spr(); + +progress("comparing islands"); compare_island_lists(); +progress("comparing distances"); compare_distances(); + +print_messages();