10 use Digest::MD5 qw(md5_hex);
13 our $max_rate = 2; # per second
15 # todo: allow to read size details out of svg (maybe in daemon-otter?)
16 # todo: allow scraper method none to handle item vs filename mismatch
18 #print Dumper($libinfo);
20 open DEBUG, ">&STDERR" or die $!;
24 my ($datalog, $output, $url, @xopts) = @_;
25 my @curl = (qw(curl -Ssf -L --proto-redir -all), @xopts);
26 push @curl, '-o', "$output.tmp", $url;
29 my $now = Time::HiRes::time;
30 my $delay = 1./$max_rate - ($now - $last_curl);
31 Time::HiRes::sleep $delay if $delay > 0;
33 # print DEBUG "+ @curl\n";
34 $!=$?=0; my $r = system @curl; die "curl failed ($? $!): @curl" if $r;
35 my $logtime = strftime "%F %T UTC", gmtime time;
36 print $datalog "$logtime: downloaded into $output from $url\n"
38 rename "$output.tmp", "$output" or die "install $output: $!";
41 sub cfg_lookup_1 ($@) {
42 my ($dict, @keys) = @_;
43 #print DEBUG "cfg_lookup_1 ".Dumper($dict, \@keys);
44 foreach my $k (@keys) {
45 confess unless ref $dict eq 'HASH';
47 return undef if !defined $dict;
49 #print DEBUG "cfg_lookup_1 (@keys) => $dict\n";
53 sub mk_cfg_lookup ($$@) {
54 my ($groups, $thisgroup, @keysprefix) = @_;
55 #print DEBUG "mk_cfg_lookup >@keysprefix< ".Dumper($thisgroup);
57 #print DEBUG "from mk_cfg_lookup >@keysprefix< >@_<\n";
58 my $cgroup = $thisgroup;
60 my $got = cfg_lookup_1($cgroup, @keysprefix, @_);
61 return $got if defined $got;
62 my $inherit = cfg_lookup_1($cgroup, qw(inherit));
63 return undef unless $inherit;
64 $cgroup = $groups->{$inherit};
65 confess "$inherit".Dumper($groups,$inherit)."?" unless $cgroup;
70 sub cfg_affixes ($$$) {
71 my ($cfg, $keybase, $middle) = @_;
73 ($cfg->("${keybase}_prefix") // '')
75 .($cfg->("${keybase}_suffix") // '');
78 sub method_none { return sub { } }
79 sub methodlic_none { undef }
80 sub methodisoffline_none { 0 }
82 sub methodlic_wikimedia ($) {
84 my $spdx = ${ \ $scraper->('spdx') };
85 return <<END.($spdx =~ m{/} ? <<END : '').<<END;
86 These files were all obtained from
87 ${ \ $scraper->('site-title') }
89 They are all available under at least, and distributed here under,
92 (deepending on the file - consult the source file for details)
94 as well as possibly other licences. There is NO WARRANTY.
96 See <file>.download-log for the original URL and download timestamp.
97 The wikitext of the File: page on the wiki is in <file>.wikitext, and
98 contains the authorship and derivation information, as well as
99 information about any alternative licence terms.
101 [ This LICENCE file was generated by media-scraper and should not
106 sub method_wikimedia ($$$) {
107 my ($scraper, $methname) = @_;
108 #print DEBUG "METHOD $methname...\n";
110 my ($lbase, $ldest, $rstem) = @_;
111 my $rfilename = cfg_affixes $scraper, 'filename', $rstem;
112 my $url = cfg_affixes $scraper, 'url', $rfilename;
113 my $wt = "$lbase.wikitext";
114 #print DEBUG "rfilename=$rfilename url=$url .\n";
115 my $datalog = new IO::File "$lbase.download-log", '>>' or die $!;
116 print $datalog "\n" or die $!;
117 run_curl $datalog, $wt, $url;
118 open WT, "$wt" or die $!;
119 my (@lics) = @{ $scraper->('licences') };
120 s/\W/\\$&/g foreach @lics;
121 s/\\\*$/.*/g foreach @lics;
122 my $lic1_re = '(?:'.(join '|', @lics).')';
126 if (m{^ \{\{ ($lic1_re) \}\} $}xi ||
127 m{^ \{\{ self\| (?:[^{}]*\|)? ($lic1_re) (?:\|[^{}]*)? \}\} $}xi) {
134 die "\nfile $wt from $url no appropriate licence $lic1_re";
136 my $hash_prefix = '';
137 if ($scraper->('data_url_hashprefix')) {
138 # https://www.mediawiki.org/wiki/Special:MyLanguage/Manual:$wgHashedUploadDirectory
139 md5_hex($rfilename) =~ m{^((.).)} or die;
140 $hash_prefix .= "$2/$1/";
142 my $data_url = cfg_affixes $scraper, 'data_url', $hash_prefix.$rfilename;
143 run_curl $datalog, $ldest, $data_url;
144 close $datalog or die $!;
148 sub methodisoffline_wikimedia { 0 }
150 sub method_cards_oxymoron {
151 my ($scraper, $methname) = @_;
153 my ($lbase, $ldest, $rstem) = @_;
155 $lgif =~ m{/card-oxymoron-(\w+)-(\w+)$} or die "$lbase ?";
157 $lgif = "$basename/cards/src/\l$1$2.gif";
160 } elsif ($! != ENOENT) {
163 print STDERR "\nbuilding $basename...\n";
164 $!=$?=0; system "$basename/build" and die "$! $?";
166 $!=$?=0; system qw(convert), $lgif, $ldest and die "$! $?";
169 sub methodlic_cards_oxymoron { undef }
170 sub methodisoffline_cards_oxymoron ($$) { 1 }
174 while (@ARGV && $ARGV[0] =~ m/^-/) {
177 if (m/^--offline$/) {
180 die "bad option: \`$_'";
184 my $input = $ARGV[0] // die;
185 $input =~ m/\.toml$/ or die "$input ?";
187 mkdir $basename or $!==EEXIST or die "mkdir $basename: $!";
189 my $parser = TOML::Parser->new();
190 my $libinfo = $parser->parse_file($input);
191 my $groups = $libinfo->{group};
192 my $scraper = sub { $libinfo->{scraper}{$_[0]} };
193 my $method = $scraper->('method');
195 my $method_fn = ${*::}{"method_$method"};
197 my $methodlic_fn = ${*::}{"methodlic_$method"};
198 my $licpath = "$basename/LICENCE";
200 my $methodisoffline_fn = ${*::}{"methodisoffline_$method"};
202 my $method_lictext = $methodlic_fn->($scraper);
203 if (defined $method_lictext) {
204 my $licfile = new IO::File "$licpath.tmp", '>' or die $!;
206 print $licfile <<END, $method_lictext, <<END or die $!;
207 SPDX-License-Identifier: ${ \ $scraper->('spdx') }
208 (applies to the contents of this directory unless otherwise stated)
212 The download was done by media-scraper, controlled by $input.
214 close $licfile or die $!;
217 my $makepath = "$basename/files.make";
218 my $makefile = new IO::File "$makepath.tmp", '>' or die $!;
220 foreach my $groupname (sort keys %$groups) {
221 my $group_dict = $groups->{$groupname};
222 my $gcfg = mk_cfg_lookup($groups, $group_dict);
223 my $method_impl = $method_fn->($scraper, $method);
224 foreach (split(/\n/, $gcfg->('files'))) {
226 next if m/^\#/ || m/^$/ || m/^\:/;
227 m/^(\S+)\s+(\S+)/ or die "bad line in files: \`$_'";
231 my $lstem = cfg_affixes $gcfg, 'item', $lministem;
232 my $rstem = cfg_affixes $gcfg, 'stem', $rministem;
233 my $lbase = "$basename/$lstem";
234 my $lupstream = "$lbase.svg";
235 my $lprocessed = "$lbase.usvg";
237 print DEBUG "file $lstem ";
239 my $process_colour = sub ($$) {
240 my ($linput, $lprocessed) = @_;
241 print $makefile <<END or die $!;
242 LIBRARY_FILES += $lprocessed
243 LIBRARY_FILE_INPUTS += $lprocessed:$linput
244 $lprocessed: $linput $licpath $input
245 \$(LIBRARY_PROCESS_SVG)
249 my $process_colours = sub {
250 my $colours = $gcfg->('colours');
251 if (!keys %$colours) {
252 $process_colour->($lupstream, $lprocessed, "");
255 foreach my $colour (sort keys %$colours) {
256 my $cspec = $colours->{$colour};
257 my $abbrev = $cspec->{abbrev} or confess;
258 my $ncoloured = $lupstream;
259 $ncoloured =~ s/\.svg$/.coloured$&/;
260 my $outfile = $lprocessed;
261 $ncoloured =~ s/_c/$abbrev/ or confess "$outfile ?";
262 $outfile =~ s/_c/$abbrev/ or confess "$outfile ?";
263 my $coloured = $lupstream;
268 my ($from, $to) = @_;
269 confess if $from =~ m/.\W|[^\w#]/
270 || $to =~ m/.\W|[^\w#]/;
272 $coloured = $ncoloured;
275 print $makefile <<END or die $!;
276 LIBRARY_CLEAN += $ncoloured
277 $ncoloured: $lupstream \$(MAKEFILE_DEP) \$(USVG_DEP) $makepath
278 \$(USVG_CMD) - -c <\$< >$cfp
282 my $nfp = "\$@.$ci.tmp";
283 print $makefile <<END or die $!;
284 \$(RECOLOUR_SVG) -f '$from' -t '$to' $cfp >$nfp
289 my %map = %{ $cspec->{map} // { } };
291 my $from = (sort keys %map)[0];
292 my @maybe_cycle = ();
295 push @maybe_cycle, $from;
296 my $to = $map{$from};
297 if (!exists $map{$to}) {
301 if (grep { $_ eq $to } @maybe_cycle) {
306 my $emit_most = sub ($) {
308 $end //= $#maybe_cycle;
309 foreach my $i (@maybe_cycle[0..$end]) {
310 $emitmap->($i, $map{$i});
313 if (defined $cycle) {
314 my $temp = 'abcbfb'; # chosen at random
315 my $aside = $maybe_cycle[-1];
316 $emitmap->($aside, $temp);
317 $emit_most->($#maybe_cycle-1);
318 $emitmap->($temp, $map{$aside});
319 print $makefile <<END or die $!;
320 # cycle: @maybe_cycle
325 delete $map{$_} foreach @maybe_cycle;
328 # inkscape extensions have a tendency to write an empty
329 # file when they don't like their input or arguments
330 print $makefile <<END or die $!;
336 $process_colour->($coloured, $outfile);
340 $process_colours->();
342 if (! $methodisoffline_fn->($basename, $lupstream)) {
343 if (stat $lupstream) {
344 print DEBUG "already.\n";
347 die "$lupstream $!" unless $!==ENOENT;
350 print DEBUG "missing.\n";
351 warn "offline but $lupstream missing\n";
356 $method_impl->($lbase, $lupstream, $rstem);
358 print DEBUG "done.\n";
362 close $makefile or die $!;
364 if (defined($method_lictext)) {
365 my $cmp = compare("$licpath.tmp", $licpath);
368 rename "$licpath.tmp", $licpath or die $!;
370 remove "$licpath.tmp";
374 rename "$makepath.tmp", $makepath or die $!;