3 # history database sanity checker
4 # David Barr <barr@math.psu.edu>
6 # w/mods from: hucka@eecs.umich.edu
7 # Katsuhiro Kondou <kondou@nec.co.jp>
9 # Throw away history entries with:
10 # malformed lines (too long, contain nulls or special characters)
13 # ctlinnd throttle 'fixing history'
14 # ./fixhist <history >history.n
15 # makedbz -s `wc -l <history.n` -f history.n
16 # or use instructions from fixhist to avoid the `wc -l <history.n`
17 # mv history.n history
18 # mv history.n.dir history.dir
19 ### if TAGGED_HASH is DO or before inn2.0
20 # mv history.n.pag history.pag
21 ### if TAGGED_HASH is DONT
22 # mv history.n.hash history.hash
23 # mv history.n.index history.index
25 # ctlinnd reload history x
26 # ctlinnd go 'fixing history'
27 # any malformed entries will be output to stderr.
35 ($msgid,$dates,$arts,$xtra) = split('\t');
37 &tossit(); # too many fields
40 if (!($dates) && (($arts) || ($xtra))) {
41 &tossit(); # if not date field, then the rest
42 next; # should be empty
44 if (length($msgid) >= $MAXKEYLEN) {
45 &tossit(); # message-id too long
48 if ($msgid !~ /^<[^<> ]*>$/) {
49 if ($msgid =~ /^\[[0-9A-F]{32}\]$/) {
51 if ($arts =~ /^\@[0-9A-F]{56}\@$/) {
52 $arts =~ s/^\@([0-9A-F]{36})([0-9A-F]{20})\@$/\@${1}\@/;
53 print "$msgid\t$dates\t$arts\n";
56 if ($arts !~ /^\@[0-9A-F]{36}\@$/) {
62 &tossit(); # malformed msg-ids
66 if ($arts ne "" && ($arts !~ /[^\/]*\/[0-9]*/)) {
67 &tossit(); # malformed articles list
71 if (/[\000-\010\012-\037\177-\237]/) { # non-control chars except tab
72 &tossit(); # illegal chars
76 if ($dates =~ /[^\d~\-]/) { # rudimentary check
77 &tossit(); # full check would be too slow
83 $0 = "history line $./$count" if $. % 50000 == 0;
85 print STDERR "Done. Now run:\nmakedbz -s $count -f history.n\n";