2 # fixscript will replace this line with require innshellvars.pl
4 # @(#)scanspool.pl 1.20 4/6/92 00:47:35
6 # Written by: Landon Curt Noll (chongo was here /\../\)
8 # This code is placed in the public domain.
10 # scanspool - perform a big scan over all articles in /usr/spool/news
13 # scanspool [-a active_file] [-s spool_dir] [-v] [-c] [-n]
15 # -a active_file active file to use (default /usr/lib/news/active)
16 # -s spool_dir spool tree (default /usr/spool/news)
18 # verbose messages begin with a tab
19 # show articles found in non-active directories
20 # -c check article filenames, don't scan the articles
21 # -n don't throttle innd
23 # NOTE: This take a while, -v is a good thing if you want to know
24 # how far this program has progressed.
26 # This program will scan first the active file, noting problems such as:
29 # group aliased to a non-existent group
30 # group aliased to a group tat is also aliased
32 # Then it will examine all articles under your news spool directory,
33 # looking for articles that:
35 # basename that starts with a leading 0
36 # basename that is out of range with the active file
37 # does not contain a Newsgroups: line
38 # article that is all header and no text
39 # is in a directory for which there is no active group
40 # article that is in a group to which it does not belong
42 # Scanspool understands aliased groups. Thus, if an article is posted
43 # to foo.old.name that is aliases to foo.bar, it will be expected to
44 # be found under foo.bar and not foo.old.name.
46 # Any group that is of type 'j' or 'x' (4th field of the active file)
47 # will be allowed to show up under the junk group.
49 # Scanspool assumes that the path of a valid newsgroup's directory
50 # from the top of the spool tree will not contain any "." character.
51 # Thus, directories such as out.going, tmp.dir, in.coming and
52 # news.archive will not be searched. This program also assumes that
53 # article basenames contain only decimal digits. Last, files under
54 # the top level directory "lost+found" are not scanned.
56 # The output of scanspool will start with one of 4 forms:
58 # FATAL: fatal or internal error (to stderr)
60 # WARN: active or article format problem, (to stderr)
61 # group alias problem, find error,
64 # path/123: basename starts with 0, (to stdout)
65 # article number out of range,
66 # article in the wrong directory,
67 # article in directory not related to
68 # an active non-aliases newsgroup
70 # \t ... verbose message starting with a tab (to stdout)
76 # $name - newsgroup name in foo.dot.form
77 # produces => 4th active field (y, n, x, ...)
78 # alias type is "=", not "=foo.bar"
81 # $name - newsgroup name in foo.dot.form
82 # produces => newsgroup name in foo.dot.form
83 # if type is =, this will be a.b, not $name
86 # $name - newsgroup name in foo.dot.form
87 # produces => lowest article allowed in the group
88 # if type is =, this is not valid
91 # $name - newsgroup name in foo.dot.form
92 # produces => highest article allowed in the group
93 # if type is =, this is not valid
94 # If $highart{$name} < $lowart{$name},
95 # then the group should be empty
101 # setup non-buffered stdout and stderr
110 $prog = $0; # our name
111 $spool = "$inn::patharticles";
112 $active = "$inn::pathdb/active";
113 $ctlinnd = "$inn::pathbin/ctlinnd";
114 $reason = "running scanspool"; # throttle reason
119 $active = $opt_a if (defined($opt_a));
120 $spool = $opt_s if (defined($opt_s));
122 # throttle innd unless -n
124 if (! defined($opt_n)) {
125 system("$ctlinnd throttle '$reason' >/dev/null 2>&1");
128 # process the active file
130 &parse_active($active);
132 # check the spool directory
134 &check_spool($spool);
136 # unthrottle innd unless -n
138 if (! defined($opt_n)) {
139 system("$ctlinnd go '$reason' >/dev/null 2>&1");
146 # parse_active - parse the active file
148 # From the active file, fill out the @gname2type (type of newsgroup)
149 # and @realgname (real/non-aliased name of group), @lowart & @highart
150 # (low and high article numbers). This routine will also check for
151 # aliases to missing groups or groups that are also aliases.
155 local ($active) = $_[0]; # the name of the active file to use
156 local (*ACTIVE); # active file handle
157 local ($line); # active file line
158 local ($name); # name of newsgroup
159 local ($low); # low article number
160 local ($high); # high article number
161 local ($type); # type of newsgroup (4th active field)
162 local ($field5); # 5th active field (should not exist)
163 local ($dir); # directory path of group from $spool
164 local ($alias); # realname of an aliased group
165 local ($linenum); # active file line number
167 # if verbose (-v), say what we are doing
168 print "\tscanning $active\n" if defined($opt_v);
170 # open the active file
171 open (ACTIVE, $active) || &fatal(1, "cannot open $active");
175 while ($line = <ACTIVE>) {
180 # verify that we have a correct number of tokens
181 if ($line !~ /^\S+ 0*(\d+) 0*(\d+) \S+$/o) {
182 &problem("WARNING: active line is mal-formed at line $linenum");
185 ($name, $high, $low, $type) = $line =~ /^(\S+) 0*(\d+) 0*(\d+) (\S+)$/o;
187 # watch for duplicate entries
188 if (defined($realgname{$name})) {
189 &problem("WARNING: ignoring dup group: $name, at line $linenum");
193 # record which type it is
194 $gname2type{$name} = $type;
196 # record the low and high article numbers
197 $lowart{$name} = $low;
198 $highart{$name} = $high;
200 # determine the directory and real group name
201 if ($type eq "j" || $type eq "x") {
204 } elsif ($type =~ /^=(.+)/o) {
206 ($dir = $alias) =~ s#\.#/#go;
207 $gname2type{$name} = "="; # rename type to be just =
213 $realgname{$name} = $alias;
216 # close the active file
219 # be sure that any alias type is aliased to a real group
220 foreach $name (keys %realgname) {
222 # skip if not an alias type
223 next if $gname2type{$name} ne "=";
225 # be sure that the alias exists
226 $alias = $realgname{$name};
227 if (! defined($realgname{$alias})) {
228 &problem("WARNING: alias for $name: $alias, is not a group");
232 # be sure that the alias is not an alias of something else
233 if ($gname2type{$alias} eq "=") {
234 &problem("WARNING: alias for $name: $alias, is also an alias");
241 # problem - report a problem to stdout
243 # Print a message to stdout. Parameters are space separated.
244 # A final newline is appended to it.
247 # &problem(arg, arg2, ...)
251 local ($line); # the line to write
253 # print the line with the header and newline
254 $line = join(" ", @_);
255 print STDERR $line, "\n";
259 # fatal - report a fatal error to stderr and exit
261 # Print a message to stderr. The message has the program name prepended
262 # to it. Parameters are space separated. A final newline is appended
263 # to it. This function exists with the code of exitval.
266 # &fatal(exitval, arg, arg2, ...)
270 local ($exitval) = $_[0]; # what to exit with
274 print STDERR "FATAL: fatal called with only ", $#_-1, " arguments\n";
280 # print the error message
282 $line = join(" ", @_);
283 print STDERR "$prog: ", $line, "\n";
285 # unthrottle innd unless -n
287 if (! defined($opt_n)) {
288 system("$ctlinnd go '$reason' >/dev/null 2>&1");
296 # check_spool - check the articles found in the spool directory
298 # This subroutine will check all articles found under the $spool directory.
299 # It will examine only file path that do not contain any "." or whitespace
300 # character, and whose basename is completely numeric. Files under
301 # lost+found will also be ignored.
304 # $spooldir - top of /usr/spool/news article tree
308 local ($spooldir) = $_[0]; # top of article tree
309 local (*FILEFILE); # pipe from the find files process
310 local ($filename); # article pathname under $spool
311 local ($artgrp); # group of an article
312 local ($artnum); # article number in a group
313 local ($prevgrp); # previous different value of $artgrp
314 local ($preverrgrp); # previous non-active $artgrp
315 local (*ARTICLE); # article handle
316 local ($aline); # header line from an article
317 local (@group); # array of groups from the Newsgroup header
320 # if verbose, say what we are doing
321 print "\tfinding articles under $spooldir\n" if defined($opt_v);
323 # move to the $spool directory
324 chdir $spooldir || &fatal(2, "cannot chdir to $spool");
326 # start finding files
329 "find . \\( -type f -o -type l \\) -name '[0-9]*' -print 2>&1 |")) {
330 &fatal(3, "cannot start find in $spool");
333 # process each history line
335 while ($filename = <FINDFILE>) {
337 # if the line contains find:, assume it is a find error and print it
339 if ($filename =~ /find:\s/o) {
340 &problem("WARNING:", $filename);
344 # remove the \n and ./ that find put in our path
345 $filename =~ s#^\./##o;
347 # skip is this path has a . in it (beyond a leading ./)
348 next if ($filename =~ /\./o);
351 next if ($filename =~ m:^lost+found/:o);
353 # skip if not a numeric basename
354 next if ($filename !~ m:/\d+$:o);
356 # get the article's newsgroup name (based on its path from $spool)
358 $artgrp =~ s#/\d+$##o;
361 # if verbose (-v), then note if our group changed
362 if (defined($opt_v) && $artgrp ne $prevgrp) {
367 # note if the article is not in a directory that is used by
368 # a real (non-aliased) group in the active file
370 # If we complained about this dgroup before, don't complain again.
371 # If verbose, note files that could be removed.
373 if (!defined($gname2type{$artgrp}) || $gname2type{$artgrp} =~ /[=jx]/o){
374 if ($preverrgrp ne $artgrp) {
375 &problem("$artgrp: not an active group directory");
376 $preverrgrp = $artgrp;
378 if (defined($opt_v)) {
379 &problem("$filename: article found in non-active directory");
384 # check on the article number
386 $artnum =~ s#^.+/##o;
387 if ($artnum =~ m/^0/o) {
388 &problem("$filename: article basename starts with a 0");
390 if (defined($gname2type{$artgrp})) {
391 if ($lowart{$artgrp} > $highart{$artgrp}) {
392 &problem("$filename: active indicates group should be empty");
393 } elsif ($artnum < $lowart{$artgrp}) {
394 &problem("$filename: article number is too low");
395 } elsif ($artnum > $highart{$artgrp}) {
396 &problem("$filename: article number is too high");
400 # if check filenames only (-c), then do nothing else with the file
401 next if (defined($opt_c));
403 # don't open a control or junk, they can be from anywhere
404 next if ($artgrp eq "control" || $artgrp eq "junk");
407 if (!open(ARTICLE, $filename)) {
409 # the find is now gone (expired?), give up on it
410 &problem("WARNING: cannot open $filename");
414 # read until the Newsgroup header line is found
416 while ($aline = <ARTICLE>) {
418 # catch the newsgroup: header
419 if ($aline =~ /^Newsgroups:\w*\W/io) {
421 # convert $aline into a comma separated list of groups
422 $aline =~ s/^Newsgroups://io;
423 $aline =~ tr/ \t\n//d;
425 # form an array of news groups
426 @group = split(",", $aline);
428 # see if any groups in the Newsgroup list are our group
429 for ($j=0; $j <= $#group; ++$j) {
432 if ($realgname{$group[$j]} eq $artgrp) {
433 # this article was posted to this group
438 # no group or group alias was found
439 &problem("$filename: does not belong in $artgrp");
442 # else watch for the end of the header
443 } elsif ($aline =~ /^\s*$/o) {
445 # no Newsgroup: header found
446 &problem("WARNING: $filename: no Newsgroup header");
450 &problem("WARNING: $filename: EOF found while reading header");
458 # all done with the find