chiark / gitweb /
debugging for thing that crashed
[innduct.git] / contrib / fixhist
1 #!/usr/local/bin/perl
2 #
3 # history database sanity checker
4 # David Barr <barr@math.psu.edu>
5 # version 1.4
6 # w/mods from: hucka@eecs.umich.edu
7 # Katsuhiro Kondou <kondou@nec.co.jp>
8 # version 1.1
9 # Throw away history entries with:
10 #   malformed lines (too long, contain nulls or special characters)
11 #
12 # INN Usage:
13 #   ctlinnd throttle 'fixing history'
14 #   ./fixhist <history >history.n
15 #   makedbz -s `wc -l <history.n` -f history.n
16 #      or use instructions from fixhist to avoid the `wc -l <history.n`
17 #   mv history.n history
18 #   mv history.n.dir history.dir
19 ### if TAGGED_HASH is DO or before inn2.0
20 #   mv history.n.pag history.pag
21 ### if TAGGED_HASH is DONT
22 #   mv history.n.hash history.hash
23 #   mv history.n.index history.index
24 ### endif
25 #   ctlinnd reload history x
26 #   ctlinnd go 'fixing history'
27 # any malformed entries will be output to stderr.
28
29
30 $MAXKEYLEN=254;
31 $count=0;
32
33 while (<>) {
34         chop;
35         ($msgid,$dates,$arts,$xtra) = split('\t');
36         if ($xtra) {
37                 &tossit();              # too many fields
38                 next;
39         }
40         if (!($dates) && (($arts) || ($xtra))) {
41                 &tossit();              # if not date field, then the rest
42                 next;                   # should be empty
43         }
44         if (length($msgid) >= $MAXKEYLEN) {
45                 &tossit();              # message-id too long
46                 next;
47         }
48         if ($msgid !~ /^<[^<> ]*>$/) {
49                 if ($msgid =~ /^\[[0-9A-F]{32}\]$/) {
50                         if ($arts ne "") { 
51                                 if ($arts =~ /^\@[0-9A-F]{56}\@$/) {
52                                         $arts =~ s/^\@([0-9A-F]{36})([0-9A-F]{20})\@$/\@${1}\@/;
53                                         print "$msgid\t$dates\t$arts\n";
54                                         next;
55                                 }
56                                 if ($arts !~ /^\@[0-9A-F]{36}\@$/) {
57                                         &tossit();
58                                         next;
59                                 }
60                         }
61                 } else {
62                         &tossit();              # malformed msg-ids
63                         next;
64                 }
65         } else {
66                 if ($arts ne "" && ($arts !~ /[^\/]*\/[0-9]*/)) {
67                         &tossit();              # malformed articles list
68                         next;
69                 }
70         }
71         if (/[\000-\010\012-\037\177-\237]/) { # non-control chars except tab
72                 &tossit();              # illegal chars
73                 next;
74         }
75         if ($dates) {
76                 if ($dates =~ /[^\d~\-]/) {     # rudimentary check
77                         &tossit();              # full check would be too slow
78                         next;
79                 }
80         }
81         print "$_\n";
82         $count++;
83         $0 = "history line $./$count" if $. % 50000 == 0;
84 }
85 print STDERR "Done.  Now run:\nmakedbz -s $count -f history.n\n";
86
87 sub tossit {
88         print STDERR "$_\n";
89 }