From: Ian Jackson Date: Sun, 21 Aug 2016 11:03:07 +0000 (+0100) Subject: New format: wip spi input converter X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?p=appendix-a6.git;a=commitdiff_plain;h=2ae008ee988080047f9a5576bc6f8a702ced2470 New format: wip spi input converter --- diff --git a/README.format b/README.format index e08627c..5ec7903 100644 --- a/README.format +++ b/README.format @@ -2,10 +2,10 @@ We define a common election and ballot input format. Format consists of lines: - [| OPTION] + [| OPTION...] CAND = DESCRIPTION ... [| OPTION...] - VOTERNAME : CAND, (CAND=CAND=...) ... [| OPTION...] - + VOTERNAME : CAND CAND=CAND=... ... [| OPTION...] + . We define normalised and non-normalised inputs. Normalised is for input by compute programs (which do not need to check conformance with @@ -15,27 +15,26 @@ messing about by numans. In normalised files lines must occor in the order above (and whitespace is normalised, although readers should avoid relying on this other than that they may assume lack of leading and trailing -whitespace on each line). +whitespace on each line). Normalised files always contain a +(possibly-empty) `| OPTION' section on each line. + +Normalised files alwyas contain a `.' line at the end. In +non-normalised files anything after the `.' line is ignored, but it +may be missing. Non-normalised files can also contain empty lines and # COMMENT... In non-normalised files any CAND may be omitted in which case CAND = CAND is assumed. In non-normalised files an empty description is -replaced with a copy of CAND. +replaced with a copy of CAND. In non-normalised files there may be +multiple lines for any one CAND (provided there is only one non-empty +DESCRIPTION). -CAND and VOTERNAME may contain any characters except control -characters and 7-bit ASCII punctuation, but including ASCII - ampersand & - apostrophe ' - hyphen - - full stop . - and underscore (_). Processors are not required to -generate unambiguous output when CAND or VOTERNAME contains characters -other than 7-bit alphanumerics plus _. +CAND and VOTERNAME may contain 7-bit alphanumerics and _.. -DESCRIPTION may contain any characters except | (and cannot contain -leading or trailing whitespace). +DESCRIPTION may contain any characters except | and newline (and +cannot contain leading or trailing whitespace). VOTERNAMEs need not be distinct and may be empty. @@ -64,4 +63,5 @@ Currently defined OPTNAMEs (default values shown) Voter Weight=1.0 -OPTIONs wihin a particular scope may occur in any order. +OPTIONs wihin a particular scope may occur in any order. Whether they +may be repeated is up to the option. diff --git a/normalise b/normalise index ca5ecf7..6bf8ea6 100755 --- a/normalise +++ b/normalise @@ -2,14 +2,11 @@ use strict; -our @options, @candiates, @voters; +our @options, %candiates, @ballots; -our %seen_cand, %need_cand; +my $candvoter_re = '\w+'; -my $candvoter_re = '[^\000-\037!"#$%()*+,/0-\136`-\177]+'; - -sub normalise_opts ($) { - my ($os) = @_; +sub normalise_opts_list ($) { my @o; foreach my $o (split /\s+/, $os) { if ($o =~ m/^\w+$/) { @@ -21,7 +18,13 @@ sub normalise_opts ($) { badinput "bad option \`$o'"; } } - return @o ? " | @o" : ""; + return @o; +} + +sub normalise_opts ($) { + my ($os) = @_; + my @o = normalise_opts_list $os; + return " | @o"; } while (<>) { @@ -29,12 +32,49 @@ while (<>) { next if m/^\#/; s/^\s+//; s/\s+$//; - if (m/^\|\s*(\w+(?:\=\S+)?)$/) { - push @options, "| $1"; + if (m/^\|/) { + push @options, normalise_opts_list $'; } elsif (m/^($candvoter_re?)\s*=\s*([^|]+?)\s*|(.*)?$/) { my ($cand,$desc,$opts) = ($1,$2,$3); + push @{ $candidates{$cand}{Opts} }, normalise_opts $opts; + if (length $desc) { + badinput "multiple descriptions for $cand" if + defined $candidates{$cand}{Desc}; + $candidates{$cand}{Desc} = $desc; + } $desc=$cand unless length $desc; - $opts = normalise_opts $opts; - push @candidates, "$cand = $desc".$opts; - } elsif (m/^($candvoter_re?)?\s*\:/) { - + push @candidates, "$cand = $desc". + } elsif (m/^($candvoter_re?)?\s*\:([^|]+)(|(.*)?$/) { + my ($voter,$opts) = ($1,$3); + my @p; + foreach my $p (split /\s+/, $2) { + if ($p =~ m/^\w+(?:\=\w+)*$/) { + push @p, $&; + $candidates{$_} //= { } foreach my $p =~ m/\w+/g; + } else { + badinput "bad vote preference \`$p'"; + } + } + push @ballots, "$voter : @p".normalise_opts $opts; + } elsif (m/^\.$/) { + } else { + badinput "unknown line format \`$_'"; + } +} + +print "| @options\n" or die $!; + +foreach my $cand (sort keys %candidates) { + my $c = $candidates{$cand}; + $c->{Desc} //= $cand; + $c->{Opts} //= [ ]; + my $opts = $c->{Opts}; + print "$cand = $c->{Desc} | @$opts\n" or die $!; +} + +sub vsortkey { $_[0] =~ m/:/; return "$' : $`"; } + +print $_,"\n" or die $! foreach + (sort { vsortkey($a) cmp vsortkey($b) } @ballots; + +print ".\n" or die $!; diff --git a/spi2loose b/spi2loose new file mode 100755 index 0000000..9965502 --- /dev/null +++ b/spi2loose @@ -0,0 +1,8 @@ +#!/usr/bin/perl -w +use strict; +while (<>) { + m/^(\w+) (\w+)$/ or die "$_ ?"; + print "$1 : ".(join " ", split //, $2)."\n" or die $!; +} + +