From: Ian Jackson Date: Sat, 25 Jul 2009 18:35:47 +0000 (+0100) Subject: wip commod-email-processor: can parse, now need to make it handle database X-Git-Tag: 3.0~38 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-test.git;a=commitdiff_plain;h=9d525e44bd9642a72eb7acd81bd9956b014e3872 wip commod-email-processor: can parse, now need to make it handle database --- diff --git a/pctb/Commods.pm b/pctb/Commods.pm index 5eb9c7b..7d9a0be 100644 --- a/pctb/Commods.pm +++ b/pctb/Commods.pm @@ -13,8 +13,9 @@ BEGIN { @ISA = qw(Exporter); @EXPORT = qw(&parse_masters %oceans %commods %clients &parse_pctb_commodmap %pctb_commodmap @pctb_commodmap - &get_our_version + &get_our_version &check_tsv_line &pipethrough_prep &pipethrough_run + &pipethrough_run_along &pipethrough_run_finish &pipethrough_run_gzip &cgipostform); %EXPORT_TAGS = ( ); @@ -128,21 +129,33 @@ sub pipethrough_prep () { my $tf= IO::File::new_tmpfile() or die $!; return $tf; } - -sub pipethrough_run ($$$@) { + +sub pipethrough_run_along ($$$@) { my ($tf, $childprep, $cmd, @a) = @_; $tf->flush or die $!; $tf->seek(0,0) or die $!; - my $child= open GZ, "-|"; defined $child or die $!; + my $fh= new IO::File; + my $child= $fh->open("-|"); defined $child or die $!; if (!$child) { open STDIN, "<&", $tf; &$childprep() if defined $childprep; exec $cmd @a; die $!; } + return $fh; +} +sub pipethrough_run_finish ($) { + my ($fh)= @_; + $fh->error and die $!; + close $fh or die "$! $?"; die $? if $?; +} + +sub pipethrough_run ($$$@) { + my ($tf, $childprep, $cmd, @a) = @_; + my $pt= pipethrough_run_along($tf,$childprep,$cmd,@a); my $r; - { undef $/; $!=0; $r= ; } + { undef $/; $!=0; $r= <$pt>; } defined $r or die $!; - close GZ or die "$! $?"; die $? if $?; + pipethrough_run_finish($pt); return $r; } sub pipethrough_run_gzip ($) { @@ -186,4 +199,33 @@ sub cgipostform ($$$) { } } +our %check_tsv_done; + +sub check_tsv_line ($$) { + my ($l, $bad_data_callback) = @_; + my $bad_data= sub { &$bad_data_callback("bad data: line $.: $_[0]"); }; + + chomp($l) or &$bad_data('missing end-of-line'); + + $l !~ m/\P{IsPrint}/ or &$bad_data('nonprinting char(s)'); + $l !~ m/\\/ or &$bad_data('data contains backslashes'); + my @v= split /\t/, $l, -1; + @v==6 or &$bad_data('wrong number of fields'); + my ($commod,$stall) = @v; + + !keys %commods or + defined $commods{$commod} or + &$bad_data("unknown commodity \`$commod'"); + + $stall =~ m/^\p{IsUpper}|^[0-9]/ or &$bad_data("stall not capitalised"); + !exists $check_tsv_done{$commod,$stall} or &$bad_data("repeated data"); + $check_tsv_done{$commod,$stall}= 1; + foreach my $i (2..5) { + my $f= $v[$i]; + $f =~ m/^(|0|[1-9][0-9]{0,5}|\>1000)$/ or &$bad_data("bad field $i"); + ($i % 2) or ($f !~ m/\>/) or &$bad_data("> in field $i price"); + } + return @v; +} + 1; diff --git a/pctb/commod-email-processor b/pctb/commod-email-processor old mode 100644 new mode 100755 index 2f2231b..8b38fcf --- a/pctb/commod-email-processor +++ b/pctb/commod-email-processor @@ -1,4 +1,4 @@ -#!/usr/bin/perl - +#!/usr/bin/perl -w # # This script is invoked to process an email sent by the # commod-update-receiver Perl script. @@ -46,30 +46,35 @@ use strict (qw(vars)); +use POSIX; use MIME::Parser; +use Commods; + setlocale(LC_CTYPE, "en_GB.UTF-8"); -my $mp= new MIME::Parser; +my $parser= new MIME::Parser; our $entity; -sub find_part ($$) { - my ($filename, $type, $accepter) = @_ +sub find_part ($$$) { + my ($filename, $type, $accepter) = @_; foreach my $part ($entity->parts()) { my $h= $part->head(); next unless $h->recommended_filename() eq $filename; next unless $h->mime_type() eq $type; next unless $part->effective_type() eq $type; - next unless &$accepter($h) if defined $accepter; + next if defined $accepter and !&$accepter($h); return $part; } die "no appropriate part with name $filename and type $type"; } +sub bad_data_fail ($) { die $_[0]; } + sub main () { $parser->extract_nested_messages(0); $parser->ignore_errors(0); - $entity= $mp->parse(\*STDIN); + $entity= $parser->parse(\*STDIN); my $eff_type= $entity->effective_type(); die "effective type $eff_type" unless $eff_type eq 'multipart/mixed'; @@ -80,11 +85,34 @@ sub main () { my $mdh= $mdpart->open('r') or die; my %md; - while (<$mdh>) { + while (defined($_= $mdh->getline())) { m/^([a-z]+)\t(.*)$/ or next; $md{$1}= $2; } - my $tsvpart= find_part('deduped.tsv.gz', 'application/octet-stream'); - $tsvpart->binmode(1); - + my $tsvpart= find_part('deduped.tsv.gz', 'application/octet-stream', + undef); + my $tsv= pipethrough_prep(); + $tsvpart->bodyhandle()->print($tsv); + my $pt= pipethrough_run_along($tsv,undef, 'gunzip','gunzip'); + + while (<$pt>) { + my @v= check_tsv_line($_, \&bad_data_fail); + print "[",join('|',@v),"]\n"; + } + + pipethrough_run_finish($pt); +} + +my $ok= eval { + main(); + 1; +}; +my $err= $@; + +$parser->filer->purge(); + +if (!$ok) { + print STDERR "PROCESSING FAILED\n $@\n"; + exit 1; +} diff --git a/pctb/commod-update-receiver b/pctb/commod-update-receiver index 1fe8612..7814de8 100755 --- a/pctb/commod-update-receiver +++ b/pctb/commod-update-receiver @@ -127,7 +127,7 @@ sub ksmap ($) { foreach my $vn (sort { ksmap($a) cmp ksmap($b) } keys %o) { my $val= $o{$vn}; - die if $val =~ m/\n|\r|\t/; + die if $val =~ m/\n|\r/; $metadata .= "$vn\t$o{$vn}\n"; } @@ -146,25 +146,8 @@ if (!$gunzchild) { exec 'gunzip'; die $!; } -sub bad_data ($) { fail("bad data: line $.: $_[0]"); } - -our %done; - while () { - !m/\P{IsPrint}/ or die bad_data('nonprinting char(s)'); - !m/\\/ or bad_data('data contains backslashes'); - my @v= split /\t/; - @v==6 or bad_data('wrong number of fields'); - my ($commod,$stall) = @v; - defined $commods{$commod} or bad_data("unknown commodity \`$commod'"); - $stall =~ m/^\p{IsUpper}|^[0-9]/ or bad_data("stall not capitalised"); - !exists $done{$commod,$stall} or bad_data("repeated data"); - $done{$commod,$stall}= 1; - foreach my $i (2..5) { - my $f= $v[$i]; - $f =~ m/^(|0|[1-9][0-9]{0,5}|\>1000)$/ or bad_data("bad field $i"); - ($i % 2) or ($f !~ m/\>/) or bad_data("> in field $i price"); - } + check_tsv_line($_,\&fail); } GZ->error and die $!; $?=0; close GZ; $? and fail("gunzip for check failed code $?");