From: Ian Jackson Date: Sun, 21 Aug 2016 09:38:15 +0000 (+0100) Subject: New format: README.format and normalise, wip. Before restrict CAND and VOTERNAME... X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?p=appendix-a6.git;a=commitdiff_plain;h=a11536b183032286d08363afa8f0d1fbd124b979;ds=sidebyside New format: README.format and normalise, wip. Before restrict CAND and VOTERNAME to \w+ --- diff --git a/README.format b/README.format new file mode 100644 index 0000000..e08627c --- /dev/null +++ b/README.format @@ -0,0 +1,67 @@ +We define a common election and ballot input format. + +Format consists of lines: + + [| OPTION] + CAND = DESCRIPTION ... [| OPTION...] + VOTERNAME : CAND, (CAND=CAND=...) ... [| OPTION...] + + +We define normalised and non-normalised inputs. Normalised is for +input by compute programs (which do not need to check conformance with +the syntax) and non-normalised is for output from converters and +messing about by numans. + +In normalised files lines must occor in the order above (and +whitespace is normalised, although readers should avoid relying on +this other than that they may assume lack of leading and trailing +whitespace on each line). + +Non-normalised files can also contain empty lines and + # COMMENT... + +In non-normalised files any CAND may be omitted in which case CAND = +CAND is assumed. In non-normalised files an empty description is +replaced with a copy of CAND. + +CAND and VOTERNAME may contain any characters except control +characters and 7-bit ASCII punctuation, but including ASCII + ampersand & + apostrophe ' + hyphen - + full stop . + and underscore (_). Processors are not required to +generate unambiguous output when CAND or VOTERNAME contains characters +other than 7-bit alphanumerics plus _. + +DESCRIPTION may contain any characters except | (and cannot contain +leading or trailing whitespace). + +VOTERNAMEs need not be distinct and may be empty. + +OPTION is OPTNAME[=VALUE] and modifies the preference, ballot, or whole +election. VALUE may contain no whitespace. + +OPTNAME contains only alphanumerics (or _) and starts with a letter or +_. Reader should treat unknown OPTNAMEs as follows: + starts with uppercase letter - crash (reject the input file) + starts with lowercase letter - generate a warning + starts with underscore - silently ignore + +Anything which expects a particular OPTNAME should usually accept all +three variants, with a regexp like /^_?[Qq]orum=(\d+)$/. Convention +is _-separated words, with inner words being in lowercase, even when +OPTNAME starts with a capital. + +Currently defined OPTNAMEs (default values shown) + + Election seats=1 Number of seats to be filled. + Election _nodefault For Debian A6, no default option + Election quorum=NUM Quorum (default, none) + + Candidate Super=1:1 For Debian A6, RAT:IO, + Candidate default For Debian A6, Is the default option + + Voter Weight=1.0 + +OPTIONs wihin a particular scope may occur in any order. diff --git a/normalise b/normalise new file mode 100755 index 0000000..ca5ecf7 --- /dev/null +++ b/normalise @@ -0,0 +1,40 @@ +#!/usr/bin/perl -w + +use strict; + +our @options, @candiates, @voters; + +our %seen_cand, %need_cand; + +my $candvoter_re = '[^\000-\037!"#$%()*+,/0-\136`-\177]+'; + +sub normalise_opts ($) { + my ($os) = @_; + my @o; + foreach my $o (split /\s+/, $os) { + if ($o =~ m/^\w+$/) { + push @o, $&; + } elsif ($o =~ m/^\w+\=\S+$/) { + push @o, $&;S + } elseif ($o !~ m/\S/) { + } else { + badinput "bad option \`$o'"; + } + } + return @o ? " | @o" : ""; +} + +while (<>) { + next unless m/\S/; + next if m/^\#/; + s/^\s+//; + s/\s+$//; + if (m/^\|\s*(\w+(?:\=\S+)?)$/) { + push @options, "| $1"; + } elsif (m/^($candvoter_re?)\s*=\s*([^|]+?)\s*|(.*)?$/) { + my ($cand,$desc,$opts) = ($1,$2,$3); + $desc=$cand unless length $desc; + $opts = normalise_opts $opts; + push @candidates, "$cand = $desc".$opts; + } elsif (m/^($candvoter_re?)?\s*\:/) { +