X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-live.git;a=blobdiff_plain;f=yarrg%2Fyppedia-chart-parser;h=76100dc5ccf88a2931a00ea694399f9929634d71;hp=8eafa4cbf9293c78c278be570d8cbc12b751ba38;hb=13931006f26ed2707664989b5adb5ed893054dc2;hpb=f98f5aba41d756c851656d88e064d1714458f930 diff --git a/yarrg/yppedia-chart-parser b/yarrg/yppedia-chart-parser index 8eafa4c..76100dc 100755 --- a/yarrg/yppedia-chart-parser +++ b/yarrg/yppedia-chart-parser @@ -35,6 +35,7 @@ use warnings; use Graph::Undirected; use Commods; use CommodsDatabase; +use CommodsScrape; my $widists= Graph::Undirected->new(); my $wiarchs= Graph::Undirected->new(); @@ -51,11 +52,17 @@ my %wtisland2arch; my $dbdists; my %dbisland2arch; +my $debugfh; + my @msgkinds= qw(change warning error); my %msgs; my %msgprinted; my %msgkindprinted; -sub pmsg ($$) { push @{ $msgs{$_[0]} }, "$_[0]: $_[1]\n"; } +sub pmsg ($$) { + my $m= "$_[0]: $_[1]\n"; + print $debugfh "D $m"; + push @{ $msgs{$_[0]} }, $m; +} sub warning ($) { pmsg("warning",$_[0]); } sub error ($) { pmsg("error", $_[0]); } sub change ($) { pmsg("change", $_[0]); } @@ -73,17 +80,26 @@ sub print_messages () { } sub progress ($) { print "($_[0])\n"; } -if (@ARGV && $ARGV[0] eq '--debug') { - shift @ARGV; - open DEBUG, ">&STDOUT" or die $!; - select(DEBUG); $|=1; -} else { - open DEBUG, ">/dev/null" or die $!; +my $stdin_chart=0; + +$debugfh= new IO::File ">/dev/null" or die $!; + +while (@ARGV) { + last unless $ARGV[0] =~ m/^-/; + $_= shift @ARGV; + last if m/^--$/; + if ($_ eq '--debug') { + $debugfh= new IO::File ">&STDOUT" or die $!; + select($debugfh); $|=1; select(STDOUT); + } elsif ($_ eq '--stdin-chart') { + $stdin_chart=1; + } else { + die; + } } -select(STDOUT); $|=1; +$|=1; @ARGV==1 or die; -$ARGV[0] =~ m/^\-/ and die; my $ocean= shift @ARGV; @@ -98,59 +114,31 @@ sub nn_xy ($$) { return $n; } -sub yppedia_chart_parse () { - # We don't even bother with tag soup; instead we do line-oriented parsing. - - while () { - s/\<--.*--\>//g; - s/^\s*//; chomp; s/\s+$//; s/\s+/ /g; - s/\<\/?(?:b|em)\>//g; - s/\{\{Chart\ style\|[^{}]*\}\}//g; - next unless m/\{\{/; # only interested in chart template stuff - - my ($x,$y, $arch,$island,$solid,$dirn); - my $nn= sub { return nn_xy($x,$y) }; - - if (($x,$y,$arch) = - m/^\{\{ chart\ label \|(\d+)\|(\d+)\| .* - \'\[\[ [^][\']* \| (\S+)\ archipelago \]\]\'*\}\}$/xi) { - printf DEBUG "%2d,%-2d arch %s\n", $x,$y,$arch; - push @wiarchlabels, [ $x,$y,$arch ]; - } elsif (($x,$y,$island) = - m/^\{\{ chart\ island\ icon \|(\d+)\|(\d+)\| - ([^| ][^|]*[^| ]) \| .*\}\}$/xi) { - my $n= $nn->(); - $wiisland2node{$island}= $n; - $winode2island{$n}= $island; - $widists->add_vertex($n); - $wiarchs->add_vertex($n); - printf DEBUG "%2d,%-2d island %s\n", $x,$y,$island; - } elsif (($solid,$x,$y,$dirn) = - m/^\{\{ chart\ league((?:\ solid)?) \|(\d+)\|(\d+)\| - ([-\/\\o]) \| .*\}\}$/xi) { - next if $dirn eq 'o'; - - my ($bx,$by) = ($x,$y); - if ($dirn eq '-') { $bx+=2; } - elsif ($dirn eq '\\') { $bx++; $by++; } - elsif ($dirn eq '/') { $x++; $by++; } - else { die; } - - my $nb= nn_xy($bx,$by); - $widists->add_weighted_edge($nn->(), $nb, 1); - $wiarchs->add_edge($nn->(), $nb) if $solid; - $wiarchs->add_edge($nn->(), $nb) if $solid; - - printf DEBUG "%2d,%-2d league %-6s %s %s\n", $x,$y, - $solid?'solid':'dotted', $dirn, $nb; - } elsif ( - m/^\{\{ chart\ head \}\}$/xi - ) { - next; - } else { - warning("line $.: ignoring incomprehensible: $_"); - } - } +sub run_yppedia_chart_parse ($) { + my ($oceanfh) = @_; + yppedia_chart_parse($oceanfh, $debugfh, + \&nn_xy, + sub { + my ($x,$y,$arch) = @_; + push @wiarchlabels, [ $x,$y,$arch ]; + }, + sub { + my ($n, $island) = @_; + $wiisland2node{$island}= $n; + $winode2island{$n}= $island; + $widists->add_vertex($n); + $wiarchs->add_vertex($n); + }, + sub { + my ($na, $nb, $solid) = @_; + $widists->add_weighted_edge($na, $nb, 1); + $wiarchs->add_edge($na, $nb) if $solid; + $wiarchs->add_edge($na, $nb) if $solid; + }, + sub { + my ($lno,$l) = @_; + warning("line $l: ignoring incomprehensible: $l"); + }); } sub yppedia_graphs_add_shortcuts () { @@ -163,7 +151,7 @@ sub yppedia_graphs_add_shortcuts () { my $q= sprintf "%d,%d", $ax+$_[0], $ay+$_[1]; return unless $widists->has_vertex($q); return if $widists->has_edge($p,$q); - printf DEBUG "%-5s league-shortcut %-5s\n", $p, $q; + printf $debugfh "%-5s league-shortcut %-5s\n", $p, $q; $widists->add_weighted_edge($p,$q,1); }; $add_shortcut->( 2,0); @@ -182,7 +170,7 @@ sub yppedia_graphs_prune_boring () { map { $weight += $widists->get_edge_weight($delete, $_) } @neigh; $widists->add_weighted_edge(@neigh, $weight); $widists->delete_vertex($delete); - printf DEBUG "%-5s elide %5s %-5s %2d\n", $delete, @neigh, $weight; + printf $debugfh "%-5s elide %5s %-5s %2d\n", $delete, @neigh, $weight; } } @@ -213,7 +201,7 @@ sub yppedia_archs_sourceinfo () { error("island in $arch in source-info". " connected to $oldarch as well: $islename") if defined $oldarch && $oldarch ne $arch; - printf DEBUG "%-5s force-island-arch cc%-2d %-10s %s\n", + printf $debugfh "%-5s force-island-arch cc%-2d %-10s %s\n", $islenode, $ccix, $arch, $islename; $wiccix2arch{$ccix}= $arch; } @@ -225,32 +213,28 @@ sub yppedia_archs_chart_labels () { # foreach my $label (@wiarchlabels) { my ($ax,$ay,$arch) = @$label; - my $best_ccmulti= -1; - my $best_d2= 0; + my $best_d2= 9999999; my $best_n; -# print DEBUG "$ax,$ay arch-island-search $arch\n"; +# print $debugfh "$ax,$ay arch-island-search $arch\n"; $ay += 1; $ax += 2; # coords are rather to the top left of label foreach my $vertex ($wiarchs->vertices()) { next unless exists $winode2island{$vertex}; my $ccix= $wiarchs->connected_component_by_vertex($vertex); my @cc= $wiarchs->connected_component_by_index($ccix); - my $ccmulti= @cc > 1; my ($vx,$vy) = split /,/, $vertex; my $d2= ($vx-$ax)*($vx-$ax) + ($vy-$ay)*($vy-$ay); - my $cmp= $ccmulti <=> $best_ccmulti - || $best_d2 <=> $d2; - printf DEBUG "%2d,%-2d arch-island-search %5s d2=%4d cc%-2d". - " #cc=%2d ccmulti=%d cmp=%2d %s\n", - $ax,$ay, $vertex, $d2, $ccix, scalar(@cc), $ccmulti, $cmp, + my $cmp= $best_d2 <=> $d2; + printf $debugfh "%2d,%-2d arch-island-search %5s d2=%4d cc%-2d". + " #cc=%2d cmp=%2d %s\n", + $ax,$ay, $vertex, $d2, $ccix, scalar(@cc), $cmp, $winode2island{$vertex}; next unless $cmp > 0; $best_n= $vertex; $best_d2= $d2; - $best_ccmulti= $ccmulti; } die 'no island vertices?!' unless defined $best_n; my $ccix= $wiarchs->connected_component_by_vertex($best_n); - printf DEBUG + printf $debugfh "%2d,%-2d arch-island-select %-5s d2=%4d cc%-2d %-10s %s\n", $ax,$ay, $best_n, $ccix, $best_d2, $arch, $winode2island{$best_n}; my $desc= join "\n", map { @@ -296,7 +280,7 @@ sub yppedia_archs_fillbynearest() { next unless @islandnodes; # don't care, then foreach my $islandnode (@islandnodes) { - printf DEBUG "%-5s arch-join-need cc%-2d %s\n", + printf $debugfh "%-5s arch-join-need cc%-2d %s\n", $islandnode, $sourceccix, $winode2island{$islandnode}; } my $best_dist= 9999999; @@ -322,7 +306,7 @@ sub yppedia_archs_fillbynearest() { my $arch= $wiccix2arch{$best_targetccix}; my $best_island= $winode2island{$best_target}; - printf DEBUG "%-5s arch-join-to %-5s dist=%2d cc%-2d %-10s %s\n", + printf $debugfh "%-5s arch-join-to %-5s dist=%2d cc%-2d %-10s %s\n", $best_source, $best_target, $best_dist, $best_targetccix, $arch, defined($best_island) ? $best_island : "-"; @@ -344,7 +328,7 @@ sub widist ($$) { # die "$p $q" unless defined $pl; # my @pv= $wialldists->path_vertices($p,$q); # if (@pv == $pl) { return $pl; } -# printf DEBUG "%-5s PATHLENGTH %-5s pl=%s pv=%s\n", $p,$q,$pl,join('|',@pv); +# printf $debugfh "%-5s PATHLENGTH %-5s pl=%s pv=%s\n", $p,$q,$pl,join('|',@pv); return $pl; } @@ -381,11 +365,13 @@ sub compare_island_lists () { foreach my $island (sort keys %wiisland2node) { my $wtarch= $wtisland2arch{$island}; my $wiarch= wiisland2arch($island); - if (!defined $wtarch) { - error("island from chart not found on ocean page: $island"); - } elsif (defined $wiarch and $wtarch ne $wiarch) { - error("island in $wtarch on ocean page but". - " concluded $wiarch from chart: $island"); + if (!$stdin_chart) { + if (!defined $wtarch) { + error("island from chart not found on ocean page: $island"); + } elsif (defined $wiarch and $wtarch ne $wiarch) { + error("island in $wtarch on ocean page but". + " concluded $wiarch from chart: $island"); + } } my $dbarch= $dbisland2arch{$island}; @@ -399,10 +385,12 @@ sub compare_island_lists () { change("island new in $wiarch: $island"); } } - foreach my $island (sort keys %wtisland2arch) { - my $node= $wiisland2node{$island}; - next if defined $node; - error("island on ocean page but not in chart: $island"); + if (!$stdin_chart) { + foreach my $island (sort keys %wtisland2arch) { + my $node= $wiisland2node{$island}; + next if defined $node; + error("island on ocean page but not in chart: $island"); + } } } @@ -423,7 +411,8 @@ sub shortest_path_reduction ($$) { 1. F is an undirected weighted graph with positive edge weights. - 2. All graphs we will consider have the same vertices as F. + 2. All graphs we will consider have the same vertices as F + and none have self-edges. 3. G = Closure(F) is the graph of cliques whose edge weights are the shortest paths in F, one clique for each connected @@ -476,12 +465,14 @@ sub shortest_path_reduction ($$) { END - printf DEBUG "spr %s before %d\n", $what, scalar($g->edges()); + printf $debugfh "spr %s before %d\n", $what, scalar($g->edges()); my $result= Graph::Undirected->new(); foreach my $edge_ac ($g->edges()) { + $result->add_vertex($edge_ac->[0]); # just in case + next if $edge_ac->[0] eq $edge_ac->[1]; my $edgename_ac= join ' .. ', @$edge_ac; - printf DEBUG "spr %s edge %s\n", $what, $edgename_ac; + printf $debugfh "spr %s edge %s\n", $what, $edgename_ac; my $w_ac= $g->get_edge_weight(@$edge_ac); my $needed= 1; foreach my $vertex_b ($g->vertices()) { @@ -493,17 +484,17 @@ END next unless defined $w_ac; next if $w_ab + $w_bc > $w_ac; # found path - printf DEBUG "spr %s edge %s unnecessary %s\n", + printf $debugfh "spr %s edge %s unnecessary %s\n", $what, $edgename_ac, $vertex_b; $needed= 0; last; } if ($needed) { - printf DEBUG "spr %s edge %s essential\n", $what, $edgename_ac; + printf $debugfh "spr %s edge %s essential\n", $what, $edgename_ac; $result->add_weighted_edge(@$edge_ac,$w_ac); } } - printf DEBUG "spr %s result %d\n", $what, scalar($result->edges()); + printf $debugfh "spr %s result %d\n", $what, scalar($result->edges()); my $apsp= $result->APSP_Floyd_Warshall(); foreach my $ia (sort $g->vertices()) { @@ -544,9 +535,13 @@ sub yppedia_ocean_fetch_done () { } sub yppedia_ocean_fetch_chart () { - yppedia_ocean_fetch_start(1); - yppedia_chart_parse(); - yppedia_ocean_fetch_done(); + if ($stdin_chart) { + run_yppedia_chart_parse('::STDIN'); + } else { + yppedia_ocean_fetch_start(1); + run_yppedia_chart_parse('::OCEAN'); + yppedia_ocean_fetch_done(); + } } sub yppedia_ocean_fetch_text () { @@ -601,7 +596,7 @@ sub database_fetch_ocean () { undef %dbisland2arch; $dbdists= Graph::Undirected->new(); while ($row= $sth->fetchrow_hashref) { - print DEBUG "database-island $row->{'islandname'}". + print $debugfh "database-island $row->{'islandname'}". " $row->{'archipelago'}\n"; $dbisland2arch{$row->{'islandname'}}= $row->{'archipelago'}; } @@ -619,6 +614,82 @@ sub database_graph_spr () { $dbspr= shortest_path_reduction('db',$dbdists); } +sub database_do_updates () { + my $addisland= $dbh->prepare(<<'END') + INSERT OR IGNORE INTO islands (islandname, archipelago) VALUES (?, ?); +END + ; + foreach my $island (sort keys %wiisland2node) { + my $wiarch= wiisland2arch($island); + $addisland->execute($island, $wiarch); + } + + db_doall(<prepare(<<'END') + INSERT INTO dists VALUES + ((SELECT islandid FROM islands WHERE islandname == ?), + (SELECT islandid FROM islands WHERE islandname == ?), + ?); +END + ; + my $addroute= $dbh->prepare(<<'END') + INSERT INTO routes VALUES + ((SELECT islandid FROM islands WHERE islandname == ?), + (SELECT islandid FROM islands WHERE islandname == ?), + ?); +END + ; + foreach my $ia (sort keys %wiisland2node) { + my $na= $wiisland2node{$ia}; + foreach my $ib (sort keys %wiisland2node) { + my $nb= $wiisland2node{$ib}; + my $apdist= $ia eq $ib ? 0 : widist($na,$nb); + die "$ia $ib" unless defined $apdist; + my $sprdist= $wispr->get_edge_weight($ia,$ib); + die "$ia $ib $apdist $sprdist" if + defined($sprdist) && $sprdist != $apdist; + + $adddist->execute($ia,$ib,$apdist); + $addroute->execute($ia,$ib,$sprdist) if defined $sprdist; + } + } + + # select ia.islandname, ib.islandname, d.dist from dists as d, islands as ia on d.aiid = ia.islandid, islands as ib on d.biid = ib.islandid order by ia.islandname, ib.islandname; + +} + +#========== update _ocean-*.txt ========== + +our $localtopo_path; + +sub localtopo_rewrite () { + $localtopo_path= '_ocean-'.(lc $ocean).'.txt'; + my $fh= new IO::File "$localtopo_path.tmp", 'w'; + print $fh "# autogenerated - do not edit\n" or die $!; + print $fh "ocean $ocean\n" or die $!; + my %arches; + foreach my $isle (sort keys %wtisland2arch) { + my $arch= $wtisland2arch{$isle}; + push @{ $arches{$arch} }, $isle; + } + foreach my $arch (sort keys %arches) { + print $fh " $arch\n" or die $!; + foreach my $isle (@{ $arches{$arch} }) { + print $fh " $isle\n" or die $!; + } + } + print $fh "\n" or die $!; + close $fh or die $!; +} + +sub localtopo_commit () { + rename "$localtopo_path.tmp", $localtopo_path or die $!; +} + #========== main program ========== parse_info_serverside(); @@ -632,7 +703,10 @@ progress("computing shortest paths"); yppedia_graph_shortest_paths(); progress("setting archs from labels"); yppedia_archs_chart_labels(); progress("setting archs from nearby"); yppedia_archs_fillbynearest(); progress("computing yppedia spr"); yppedia_graph_spr(); -progress("fetching yppedia ocean text"); yppedia_ocean_fetch_text(); + +if (!$stdin_chart) { + progress("fetching yppedia ocean text"); yppedia_ocean_fetch_text(); +} db_setocean($ocean); db_connect(); @@ -662,15 +736,25 @@ for (;;) { if (!%msgkindprinted) { progress("updating database"); database_do_updates(); progress("updating _ocean-*.txt"); localtopo_rewrite(); + if ($stdin_chart) { + print STDERR "*** --stdin-chart, aborting!\n"; + exit 1; + } + progress("checking database"); db_check_referential_integrity(1); progress("committing database"); $dbh->commit(); progress("committing _ocean-*.txt"); localtopo_commit(); exit 0; } $dbh->rollback(); - + my $default= !$msgkindprinted{'warning'}; printf STDERR "*** confirm update %s ? ", $default?'(y/n)':'(n/y)'; + if ($stdin_chart) { + printf STDERR "[--stdin-chart]\n"; + exit 1; + } + $!=0; my $result= ; defined $result or die $!; $result =~ s/\s//g; $result= $default?'y':'n' if !length $result;