From: Ian Jackson Date: Tue, 25 Jan 2011 15:03:21 +0000 (+0000) Subject: canon: compact a bit more; notes: wip new format X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?a=commitdiff_plain;h=5273d961c426b74d9c24d2afbfae1d6f0392a1d0;p=ypp-sc-tools.main.git canon: compact a bit more; notes: wip new format --- diff --git a/yarrg/canon b/yarrg/canon index e70e646..a9f9801 100755 --- a/yarrg/canon +++ b/yarrg/canon @@ -25,6 +25,8 @@ sub alencodenum ($) { my ($val) = @_; my $res= ''; while ($val || !length($res)) { + # allowing empty strings, reusing "0" for 62, doing base63, + # saves 0.5% my $dig= $val % 62; $val= ($val-$dig) / 62; $res = chr($dig + ($dig<10 ? 48 : @@ -64,17 +66,21 @@ foreach my $k (sort keys %s) { if (!defined($r[2])) { $prep->(); - printf "-\n"; + printf "\n"; # no "-" here saves 3.5% next; } + # base62-encoding all these numbers saves about 8% my $qtydiff= $r[2] - ($r[0] || 0); if (!defined($r[0]) || $r[0] != $r[2]) { $prep->(); - printf "\@%d",$r[2]; + printf "\@%s",alencodenum($r[2]); } - if ($qtydiff) { + if ($qtydiff>0) { $prep->(); - printf "%+d",$qtydiff; + printf "+%s",alencodenum($qtydiff); + } elsif ($qtydiff<0) { + $prep->(); + printf "-%s",alencodenum(-$qtydiff); } print $eol; } diff --git a/yarrg/notes b/yarrg/notes index 31596dc..99438d9 100644 --- a/yarrg/notes +++ b/yarrg/notes @@ -52,42 +52,42 @@ very few price changes, mostly qty changes ==================== files - ARCHIVE.%{ocean}.lock.par never removed - ARCHIVE.%{ocean}.ocean.par updated by rename - ARCHIVE.%{ocean}s.%{isle}s.main.par updated by rename - ARCHIVE.%{ocean}s.%{isle}s.log.par appended, length in main - ARCHIVE.%{ocean}s.%{isle}s.z%d.par create/write, length in main - ocean file is always updated first so lockfree readers should open - main, then ocean - -integers are in LE byte order -vuint is one or more bytes with 7 bits each, BE first; top bit is "more bytes" - -format for an ocean file - magic Frame uint8*4 59a72671 - - for each commodity: - starting with commodity 0x0001 as zero is reserved for sentinels - commodity name uint16 name length - uint8*length name bytes - -format for a main file - magic Frame uint8*4 59a72672 - number of z files Frame uint32 - length of log file in bytes Frame uint32 - - single uncompressed diff - representing the change from nothing to - the most recent uploaded data - -format for a log or z file - - magic Frame uint8*4 59a72673 - - series of diffs most recent last - - for a log file, there may be some trailing garbage - not referred to in main file (see "length of log file") + ARCHIVE-%{ocean}s-lock.par never removed + ARCHIVE-%{ocean}s-main.par updated by rename + ARCHIVE-%{ocean}s-auxil.par appended/renamed, len in main + ARCHIVE-%{ocean}s-log-%{isleid}s.par appended/renamed, len in main + ARCHIVE-%{ocean}s-old-%{isleid}s-%4d.par.gz created, count in main + + others files is always updated before main + so lockfree readers should open main, then other files + +format is a series of lines + + all !yarrg-archive [...] magic, 1st line + + main !stalls [...] + main !island \ + [...] " + main = + + auxil =: + auxil =&\n + + log/old & applies to following data + log/old [:] delete offer + log/old [:]@ adjust price + log/old [:][@]- adjust qty + log/old [:][@]+ maybe price + log ^ + applies to previous data + includes 6-byte len of one ^... line + +log file is a series of diffs most recent last; each diff is + metadataid×tamp, zero or more commods, ^uploadlenbytes + the commodid + + for a log file, there may be some trailing garbage + not referred to in main file (see "length of log file") for a z file, the file length is definitive and the last entry is always valid if the file is referred to in the main