#!/usr/local/bin/perl # # history database sanity checker # David Barr # version 1.4 # w/mods from: hucka@eecs.umich.edu # Katsuhiro Kondou # version 1.1 # Throw away history entries with: # malformed lines (too long, contain nulls or special characters) # # INN Usage: # ctlinnd throttle 'fixing history' # ./fixhist history.n # makedbz -s `wc -l ) { chop; ($msgid,$dates,$arts,$xtra) = split('\t'); if ($xtra) { &tossit(); # too many fields next; } if (!($dates) && (($arts) || ($xtra))) { &tossit(); # if not date field, then the rest next; # should be empty } if (length($msgid) >= $MAXKEYLEN) { &tossit(); # message-id too long next; } if ($msgid !~ /^<[^<> ]*>$/) { if ($msgid =~ /^\[[0-9A-F]{32}\]$/) { if ($arts ne "") { if ($arts =~ /^\@[0-9A-F]{56}\@$/) { $arts =~ s/^\@([0-9A-F]{36})([0-9A-F]{20})\@$/\@${1}\@/; print "$msgid\t$dates\t$arts\n"; next; } if ($arts !~ /^\@[0-9A-F]{36}\@$/) { &tossit(); next; } } } else { &tossit(); # malformed msg-ids next; } } else { if ($arts ne "" && ($arts !~ /[^\/]*\/[0-9]*/)) { &tossit(); # malformed articles list next; } } if (/[\000-\010\012-\037\177-\237]/) { # non-control chars except tab &tossit(); # illegal chars next; } if ($dates) { if ($dates =~ /[^\d~\-]/) { # rudimentary check &tossit(); # full check would be too slow next; } } print "$_\n"; $count++; $0 = "history line $./$count" if $. % 50000 == 0; } print STDERR "Done. Now run:\nmakedbz -s $count -f history.n\n"; sub tossit { print STDERR "$_\n"; }