#!/usr/bin/perl -w use strict; use POSIX; use IO::Handle; use Carp; use TOML::Parser; use Data::Dumper; use Time::HiRes; use Digest::MD5 qw(md5_hex); use File::Compare; our $max_rate = 2; # per second # todo: allow to read size details out of svg (maybe in daemon-otter?) # todo: allow scraper method none to handle item vs filename mismatch #print Dumper($libinfo); open DEBUG, ">&STDERR" or die $!; autoflush DEBUG 1; sub run_curl { my ($datalog, $output, $url, @xopts) = @_; my @curl = (qw(curl -Ssf -L --proto-redir -all), @xopts); push @curl, '-o', "$output.tmp", $url; our $last_curl; $last_curl //= 0.; my $now = Time::HiRes::time; my $delay = 1./$max_rate - ($now - $last_curl); Time::HiRes::sleep $delay if $delay > 0; $last_curl = $now; # print DEBUG "+ @curl\n"; $!=$?=0; my $r = system @curl; die "curl failed ($? $!): @curl" if $r; my $logtime = strftime "%F %T UTC", gmtime time; print $datalog "$logtime: downloaded into $output from $url\n" or die $!; rename "$output.tmp", "$output" or die "install $output: $!"; } sub cfg_lookup_1 ($@) { my ($dict, @keys) = @_; #print DEBUG "cfg_lookup_1 ".Dumper($dict, \@keys); foreach my $k (@keys) { confess unless ref $dict eq 'HASH'; $dict = $dict->{$k}; return undef if !defined $dict; } #print DEBUG "cfg_lookup_1 (@keys) => $dict\n"; return $dict; } sub mk_cfg_lookup ($$@) { my ($groups, $thisgroup, @keysprefix) = @_; #print DEBUG "mk_cfg_lookup >@keysprefix< ".Dumper($thisgroup); return sub { #print DEBUG "from mk_cfg_lookup >@keysprefix< >@_<\n"; my $cgroup = $thisgroup; for (;;) { my $got = cfg_lookup_1($cgroup, @keysprefix, @_); return $got if defined $got; my $inherit = cfg_lookup_1($cgroup, qw(inherit)); return undef unless $inherit; $cgroup = $groups->{$inherit}; confess "$inherit".Dumper($groups,$inherit)."?" unless $cgroup; } } } sub cfg_affixes ($$$) { my ($cfg, $keybase, $middle) = @_; return ($cfg->("${keybase}_prefix") // '') .$middle .($cfg->("${keybase}_suffix") // ''); } sub method_none { return sub { } } sub methodlic_none { undef } sub methodisoffline_none { 0 } sub methodlic_wikimedia ($) { my ($scraper) = @_; my $spdx = ${ \ $scraper->('spdx') }; return <('site-title') } They are all available under at least, and distributed here under, $spdx END (deepending on the file - consult the source file for details) END as well as possibly other licences. There is NO WARRANTY. See .download-log for the original URL and download timestamp. The wikitext of the File: page on the wiki is in .wikitext, and contains the authorship and derivation information, as well as information about any alternative licence terms. [ This LICENCE file was generated by media-scraper and should not be manually edited ] END } sub method_wikimedia ($$$) { my ($scraper, $methname) = @_; #print DEBUG "METHOD $methname...\n"; return sub { my ($lbase, $ldest, $rstem) = @_; my $rfilename = cfg_affixes $scraper, 'filename', $rstem; my $url = cfg_affixes $scraper, 'url', $rfilename; my $wt = "$lbase.wikitext"; #print DEBUG "rfilename=$rfilename url=$url .\n"; my $datalog = new IO::File "$lbase.download-log", '>>' or die $!; print $datalog "\n" or die $!; run_curl $datalog, $wt, $url; open WT, "$wt" or die $!; my (@lics) = @{ $scraper->('licences') }; s/\W/\\$&/g foreach @lics; s/\\\*$/.*/g foreach @lics; my $lic1_re = '(?:'.(join '|', @lics).')'; my $ok; while () { s/\s+$//; if (m{^ \{\{ ($lic1_re) \}\} $}xi || m{^ \{\{ self\| (?:[^{}]*\|)? ($lic1_re) (?:\|[^{}]*)? \}\} $}xi) { print "licence=$1 "; $ok = 1; last; } } if (!$ok) { die "\nfile $wt from $url no appropriate licence $lic1_re"; } my $hash_prefix = ''; if ($scraper->('data_url_hashprefix')) { # https://www.mediawiki.org/wiki/Special:MyLanguage/Manual:$wgHashedUploadDirectory md5_hex($rfilename) =~ m{^((.).)} or die; $hash_prefix .= "$2/$1/"; } my $data_url = cfg_affixes $scraper, 'data_url', $hash_prefix.$rfilename; run_curl $datalog, $ldest, $data_url; close $datalog or die $!; }; } sub methodisoffline_wikimedia { 0 } sub method_cards_oxymoron { my ($scraper, $methname) = @_; return sub { my ($lbase, $ldest, $rstem) = @_; my $lgif = $lbase; $lgif =~ m{/card-oxymoron-(\w+)-(\w+)$} or die "$lbase ?"; my $basename = $`; $lgif = "$basename/cards/src/\l$1$2.gif"; if (stat $lgif) { } elsif ($! != ENOENT) { die "$lgif $!"; } else { print STDERR "\nbuilding $basename...\n"; $!=$?=0; system "$basename/build" and die "$! $?"; } $!=$?=0; system qw(convert), $lgif, $ldest and die "$! $?"; } } sub methodlic_cards_oxymoron { undef } sub methodisoffline_cards_oxymoron ($$) { 1 } our $offline; while (@ARGV && $ARGV[0] =~ m/^-/) { $_ = shift @ARGV; last if m/^-$/; if (m/^--offline$/) { $offline = 1; } else { die "bad option: \`$_'"; } } my $input = $ARGV[0] // die; $input =~ m/\.toml$/ or die "$input ?"; my $basename = $`; mkdir $basename or $!==EEXIST or die "mkdir $basename: $!"; my $parser = TOML::Parser->new(); my $libinfo = $parser->parse_file($input); my $groups = $libinfo->{group}; my $scraper = sub { $libinfo->{scraper}{$_[0]} }; my $method = $scraper->('method'); $method =~ s/-/_/g; my $method_fn = ${*::}{"method_$method"}; my $methodlic_fn = ${*::}{"methodlic_$method"}; my $licpath = "$basename/LICENCE"; my $methodisoffline_fn = ${*::}{"methodisoffline_$method"}; my $method_lictext = $methodlic_fn->($scraper); if (defined $method_lictext) { my $licfile = new IO::File "$licpath.tmp", '>' or die $!; print $licfile <('spdx') } (applies to the contents of this directory unless otherwise stated) END The download was done by media-scraper, controlled by $input. END close $licfile or die $!; } my $makepath = "$basename/files.make"; my $makefile = new IO::File "$makepath.tmp", '>' or die $!; foreach my $groupname (sort keys %$groups) { my $group_dict = $groups->{$groupname}; my $gcfg = mk_cfg_lookup($groups, $group_dict); my $method_impl = $method_fn->($scraper, $method); foreach (split(/\n/, $gcfg->('files'))) { s/^\s+//; next if m/^\#/ || m/^$/ || m/^\:/; m/^(\S+)\s+(\S+)/ or die "bad line in files: \`$_'"; my $lministem = $1; my $rministem = $2; my $lstem = cfg_affixes $gcfg, 'item', $lministem; my $rstem = cfg_affixes $gcfg, 'stem', $rministem; my $lbase = "$basename/$lstem"; my $lupstream = "$lbase.svg"; my $lprocessed = "$lbase.usvg"; print DEBUG "file $lstem "; my $process_colour = sub ($$) { my ($linput, $lprocessed) = @_; print $makefile <('colours'); if (!keys %$colours) { $process_colour->($lupstream, $lprocessed, ""); return; } foreach my $colour (sort keys %$colours) { my $cspec = $colours->{$colour}; my $abbrev = $cspec->{abbrev} or confess; my $ncoloured = $lupstream; $ncoloured =~ s/\.svg$/.coloured$&/; my $outfile = $lprocessed; $ncoloured =~ s/_c/$abbrev/ or confess "$outfile ?"; $outfile =~ s/_c/$abbrev/ or confess "$outfile ?"; my $coloured = $lupstream; my $ci = 0; my $cfp; my $emitmap = sub { my ($from, $to) = @_; confess if $from =~ m/\W/ || $to =~ m/\W/; $coloured = $ncoloured; if (!defined $cfp) { $cfp ="\$@.$ci.tmp"; print $makefile <$cfp END } $ci++; my $nfp = "\$@.$ci.tmp"; print $makefile <$nfp END $cfp = $nfp; }; my %map = %{ $cspec->{map} // { } }; while (keys %map) { my $from = (sort keys %map)[0]; my @maybe_cycle = (); my $cycle; for (;;) { push @maybe_cycle, $from; my $to = $map{$from}; if (!exists $map{$to}) { last; } $from = $to; if (grep { $_ eq $to } @maybe_cycle) { $cycle = $to; last; } } my $emit_most = sub ($) { my ($end) = @_; $end //= $#maybe_cycle; foreach my $i (@maybe_cycle[0..$end]) { $emitmap->($i, $map{$i}); } }; if (defined $cycle) { my $temp = 'abcbfb'; # chosen at random my $aside = $maybe_cycle[-1]; $emitmap->($aside, $temp); $emit_most->($#maybe_cycle-1); $emitmap->($temp, $map{$aside}); print $makefile <(); } delete $map{$_} foreach @maybe_cycle; } if ($ci) { # inkscape extensions have a tendency to write an empty # file when they don't like their input or arguments print $makefile <($coloured, $outfile); } }; $process_colours->(); if (! $methodisoffline_fn->($basename, $lupstream)) { if (stat $lupstream) { print DEBUG "already.\n"; next; } die "$lupstream $!" unless $!==ENOENT; if ($offline) { print DEBUG "missing.\n"; warn "offline but $lupstream missing\n"; next; } } $method_impl->($lbase, $lupstream, $rstem); print DEBUG "done.\n"; } } close $makefile or die $!; if (defined($method_lictext)) { my $cmp = compare("$licpath.tmp", $licpath); die if $cmp < 0; if ($cmp) { rename "$licpath.tmp", $licpath or die $!; } else { remove "$licpath.tmp"; } } rename "$makepath.tmp", $makepath or die $!;