From: Ian Jackson Date: Sat, 12 Sep 2020 12:46:53 +0000 (+0100) Subject: media-scraper: Licence handling X-Git-Tag: otter-0.2.0~950 X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=commitdiff_plain;h=42992b7f26b60c4c95974a38fe5fce01a176c187;p=otter.git media-scraper: Licence handling Signed-off-by: Ian Jackson --- diff --git a/LICENCE b/LICENCE index fcdc3181..261ab961 100644 --- a/LICENCE +++ b/LICENCE @@ -30,8 +30,27 @@ Attribution-Share Alike 3.0 Unported License, or the Creative Commons Attribution-ShareAlike 4.0 International Licence. There is NO WARRANTY. -For each file in the shape library, there is a corresponding file -".licence" giving licence, provenance and authorship information. +Many of these files were downloaded from public sources using the +./media-scraper script; if they are to be edited, it would be best to +edit them at the public source. + +For each file in the shape library, there is either a corresponding +file ".licence" (in the git source tree), or a file LICENCE in the +relevant directory, giving licence, provenance and authorship +information. + +The individual shapes must be sent by the server to the client as part +of HTML/XML documents and as part of the client/server protocol; they +become part of the dynamic HTML in the page in the client web browser. + +These versions of the files have been processed by usvg and are saved +as ".usvg" in the built version of the overall git tree. The spdx +licence identifier and the provenance of the file is recorded in an +XML comment like this where $SPDX is +the SPDX licence identifier for the primary licence we are using; and +the $LIBNAME/$BASENAME are relative to the library/ directory in the +source tree, where the full authorship and licence information, and +source url etc., can be found. You can find a copy of the actual licences in the files CC-BY-SA-3.0.txt and CC-BY-SA-4.0.txt. If not, see diff --git a/library/wikimedia.toml b/library/wikimedia.toml index 97a27185..6392dbc6 100644 --- a/library/wikimedia.toml +++ b/library/wikimedia.toml @@ -16,11 +16,13 @@ blt45 a white bishop """ [chess.scraper] +spdx = "CC-BY-SA-3.0" +filename_prefix = "Chess_" +filename_suffix = ".svg" + method = "wikimedia" licences = [ "Cc-by-sa-3.0", "GFDL|migration=relicense" ] url_prefix = "https://commons.wikimedia.org/wiki/File:" -filename_prefix = "Chess_" -filename_suffix = ".svg" url_suffix = "?action=raw" data_url_prefix = "https://upload.wikimedia.org/wikipedia/commons/" data_url_hashprefix = true diff --git a/media-scraper b/media-scraper index f25f2faa..8ba5aeb3 100755 --- a/media-scraper +++ b/media-scraper @@ -21,8 +21,6 @@ autoflush DEBUG 1; sub run_curl { my ($datalog, $output, $url, @xopts) = @_; - return if stat $output; - die "$output $!" unless $!==ENOENT; my @curl = (qw(curl -Ssf -L --proto-redir -all), @xopts); push @curl, '-o', "$output.tmp", $url; our $last_curl; @@ -31,7 +29,7 @@ sub run_curl { my $delay = 1./$max_rate - ($now - $last_curl); Time::HiRes::sleep $delay if $delay > 0; $last_curl = $now; - print DEBUG "+ @curl\n"; +# print DEBUG "+ @curl\n"; $!=$?=0; my $r = system @curl; die "curl failed ($? $!): @curl" if $r; my $logtime = strftime "%F %T UTC", gmtime time; print $datalog "$logtime: downloaded into $output from $url\n" @@ -39,13 +37,12 @@ sub run_curl { rename "$output.tmp", "$output" or die "install $output: $!"; } -sub method_wikimedia ($$) { +sub method_wikimedia ($$$) { my ($cfg, $methname) = @_; print DEBUG "METHOD $methname...\n"; return sub { my ($filespec, $base) = @_; my $filename = $cfg->{filename_prefix}.$filespec.$cfg->{filename_suffix}; - print DEBUG "file $filespec $filename "; my $url = $cfg->{url_prefix}.$filename.$cfg->{url_suffix}; my $wt = "$base.wikitext"; my $datalog = new IO::File "$base.download-log", '>>' or die $!; @@ -75,9 +72,15 @@ sub method_wikimedia ($$) { $data_url .= "$2/$1/"; } $data_url .= $filename.$cfg->{data_url_suffix}; - my $ups = "$base.upstream.svg"; + my $ups = "$base.svg"; run_curl $datalog, $ups, $data_url; close $datalog or die $!; + return <($filespec, $base); + my $licpath = "$base.licence"; + print DEBUG "file $base "; + if (stat $licpath) { + print DEBUG "already.\n"; + next; + } + die "$base $!" unless $!==ENOENT; + my $lictext = $method_fn->($filespec, $base); + $lictext = "SPDX-License-Identifier: $scraper->{spdx}\n\n".$lictext; + my $licfile = new IO::File "$licpath.tmp", '>' or die $!; + print $licfile $lictext or die $!; + close $licfile or die $!; + rename "$licpath.tmp", "$licpath" or die $!; + print DEBUG "done.\n"; } }