#! /usr/bin/perl # fixscript will replace this line with require innshellvars.pl # @(#)scanspool.pl 1.20 4/6/92 00:47:35 # # Written by: Landon Curt Noll (chongo was here /\../\) # # This code is placed in the public domain. # # scanspool - perform a big scan over all articles in /usr/spool/news # # usage: # scanspool [-a active_file] [-s spool_dir] [-v] [-c] [-n] # # -a active_file active file to use (default /usr/lib/news/active) # -s spool_dir spool tree (default /usr/spool/news) # -v verbose mode # verbose messages begin with a tab # show articles found in non-active directories # -c check article filenames, don't scan the articles # -n don't throttle innd # # NOTE: This take a while, -v is a good thing if you want to know # how far this program has progressed. # # This program will scan first the active file, noting problems such as: # # malformed line # group aliased to a non-existent group # group aliased to a group tat is also aliased # # Then it will examine all articles under your news spool directory, # looking for articles that: # # basename that starts with a leading 0 # basename that is out of range with the active file # does not contain a Newsgroups: line # article that is all header and no text # is in a directory for which there is no active group # article that is in a group to which it does not belong # # Scanspool understands aliased groups. Thus, if an article is posted # to foo.old.name that is aliases to foo.bar, it will be expected to # be found under foo.bar and not foo.old.name. # # Any group that is of type 'j' or 'x' (4th field of the active file) # will be allowed to show up under the junk group. # # Scanspool assumes that the path of a valid newsgroup's directory # from the top of the spool tree will not contain any "." character. # Thus, directories such as out.going, tmp.dir, in.coming and # news.archive will not be searched. This program also assumes that # article basenames contain only decimal digits. Last, files under # the top level directory "lost+found" are not scanned. # # The output of scanspool will start with one of 4 forms: # # FATAL: fatal or internal error (to stderr) # # WARN: active or article format problem, (to stderr) # group alias problem, find error, # article open error # # path/123: basename starts with 0, (to stdout) # article number out of range, # article in the wrong directory, # article in directory not related to # an active non-aliases newsgroup # # \t ... verbose message starting with a tab (to stdout) # Data structures # # $gname2type{$name} # $name - newsgroup name in foo.dot.form # produces => 4th active field (y, n, x, ...) # alias type is "=", not "=foo.bar" # # $realgname{$name} # $name - newsgroup name in foo.dot.form # produces => newsgroup name in foo.dot.form # if type is =, this will be a.b, not $name # # $lowart{$name} # $name - newsgroup name in foo.dot.form # produces => lowest article allowed in the group # if type is =, this is not valid # # $highart{$name} # $name - newsgroup name in foo.dot.form # produces => highest article allowed in the group # if type is =, this is not valid # If $highart{$name} < $lowart{$name}, # then the group should be empty # perl requirements # require "getopts.pl"; # setup non-buffered stdout and stderr # select(STDERR); $|=1; select(STDOUT); $|=1; # global constants # $prog = $0; # our name $spool = "$inn::patharticles"; $active = "$inn::pathdb/active"; $ctlinnd = "$inn::pathbin/ctlinnd"; $reason = "running scanspool"; # throttle reason # parse args # &Getopts("a:s:vcn"); $active = $opt_a if (defined($opt_a)); $spool = $opt_s if (defined($opt_s)); # throttle innd unless -n # if (! defined($opt_n)) { system("$ctlinnd throttle '$reason' >/dev/null 2>&1"); } # process the active file # &parse_active($active); # check the spool directory # &check_spool($spool); # unthrottle innd unless -n # if (! defined($opt_n)) { system("$ctlinnd go '$reason' >/dev/null 2>&1"); } # all done exit(0); # parse_active - parse the active file # # From the active file, fill out the @gname2type (type of newsgroup) # and @realgname (real/non-aliased name of group), @lowart & @highart # (low and high article numbers). This routine will also check for # aliases to missing groups or groups that are also aliases. # sub parse_active { local ($active) = $_[0]; # the name of the active file to use local (*ACTIVE); # active file handle local ($line); # active file line local ($name); # name of newsgroup local ($low); # low article number local ($high); # high article number local ($type); # type of newsgroup (4th active field) local ($field5); # 5th active field (should not exist) local ($dir); # directory path of group from $spool local ($alias); # realname of an aliased group local ($linenum); # active file line number # if verbose (-v), say what we are doing print "\tscanning $active\n" if defined($opt_v); # open the active file open (ACTIVE, $active) || &fatal(1, "cannot open $active"); # parse each line $linenum = 0; while ($line = ) { # count the line ++$linenum; # verify that we have a correct number of tokens if ($line !~ /^\S+ 0*(\d+) 0*(\d+) \S+$/o) { &problem("WARNING: active line is mal-formed at line $linenum"); next; } ($name, $high, $low, $type) = $line =~ /^(\S+) 0*(\d+) 0*(\d+) (\S+)$/o; # watch for duplicate entries if (defined($realgname{$name})) { &problem("WARNING: ignoring dup group: $name, at line $linenum"); next; } # record which type it is $gname2type{$name} = $type; # record the low and high article numbers $lowart{$name} = $low; $highart{$name} = $high; # determine the directory and real group name if ($type eq "j" || $type eq "x") { $dir = "junk"; $alias = $name; } elsif ($type =~ /^=(.+)/o) { $alias = $1; ($dir = $alias) =~ s#\.#/#go; $gname2type{$name} = "="; # rename type to be just = } else { $dir = $name; $dir =~ s#\.#/#go; $alias = $name; } $realgname{$name} = $alias; } # close the active file close (ACTIVE); # be sure that any alias type is aliased to a real group foreach $name (keys %realgname) { # skip if not an alias type next if $gname2type{$name} ne "="; # be sure that the alias exists $alias = $realgname{$name}; if (! defined($realgname{$alias})) { &problem("WARNING: alias for $name: $alias, is not a group"); next; } # be sure that the alias is not an alias of something else if ($gname2type{$alias} eq "=") { &problem("WARNING: alias for $name: $alias, is also an alias"); next; } } } # problem - report a problem to stdout # # Print a message to stdout. Parameters are space separated. # A final newline is appended to it. # # usage: # &problem(arg, arg2, ...) # sub problem { local ($line); # the line to write # print the line with the header and newline $line = join(" ", @_); print STDERR $line, "\n"; } # fatal - report a fatal error to stderr and exit # # Print a message to stderr. The message has the program name prepended # to it. Parameters are space separated. A final newline is appended # to it. This function exists with the code of exitval. # # usage: # &fatal(exitval, arg, arg2, ...) # sub fatal { local ($exitval) = $_[0]; # what to exit with # firewall if ($#_ < 1) { print STDERR "FATAL: fatal called with only ", $#_-1, " arguments\n"; if ($#_ < 0) { $exitval = -1; } } # print the error message shift(@_); $line = join(" ", @_); print STDERR "$prog: ", $line, "\n"; # unthrottle innd unless -n # if (! defined($opt_n)) { system("$ctlinnd go '$reason' >/dev/null 2>&1"); } # exit exit($exitval); } # check_spool - check the articles found in the spool directory # # This subroutine will check all articles found under the $spool directory. # It will examine only file path that do not contain any "." or whitespace # character, and whose basename is completely numeric. Files under # lost+found will also be ignored. # # given: # $spooldir - top of /usr/spool/news article tree # sub check_spool { local ($spooldir) = $_[0]; # top of article tree local (*FILEFILE); # pipe from the find files process local ($filename); # article pathname under $spool local ($artgrp); # group of an article local ($artnum); # article number in a group local ($prevgrp); # previous different value of $artgrp local ($preverrgrp); # previous non-active $artgrp local (*ARTICLE); # article handle local ($aline); # header line from an article local (@group); # array of groups from the Newsgroup header local ($j); # if verbose, say what we are doing print "\tfinding articles under $spooldir\n" if defined($opt_v); # move to the $spool directory chdir $spooldir || &fatal(2, "cannot chdir to $spool"); # start finding files # if (!open (FINDFILE, "find . \\( -type f -o -type l \\) -name '[0-9]*' -print 2>&1 |")) { &fatal(3, "cannot start find in $spool"); } # process each history line # while ($filename = ) { # if the line contains find:, assume it is a find error and print it chop($filename); if ($filename =~ /find:\s/o) { &problem("WARNING:", $filename); next; } # remove the \n and ./ that find put in our path $filename =~ s#^\./##o; # skip is this path has a . in it (beyond a leading ./) next if ($filename =~ /\./o); # skip if lost+found next if ($filename =~ m:^lost+found/:o); # skip if not a numeric basename next if ($filename !~ m:/\d+$:o); # get the article's newsgroup name (based on its path from $spool) $artgrp = $filename; $artgrp =~ s#/\d+$##o; $artgrp =~ s#/#.#go; # if verbose (-v), then note if our group changed if (defined($opt_v) && $artgrp ne $prevgrp) { print "\t$artgrp\n"; $prevgrp = $artgrp; } # note if the article is not in a directory that is used by # a real (non-aliased) group in the active file # # If we complained about this dgroup before, don't complain again. # If verbose, note files that could be removed. # if (!defined($gname2type{$artgrp}) || $gname2type{$artgrp} =~ /[=jx]/o){ if ($preverrgrp ne $artgrp) { &problem("$artgrp: not an active group directory"); $preverrgrp = $artgrp; } if (defined($opt_v)) { &problem("$filename: article found in non-active directory"); } next; } # check on the article number $artnum = $filename; $artnum =~ s#^.+/##o; if ($artnum =~ m/^0/o) { &problem("$filename: article basename starts with a 0"); } if (defined($gname2type{$artgrp})) { if ($lowart{$artgrp} > $highart{$artgrp}) { &problem("$filename: active indicates group should be empty"); } elsif ($artnum < $lowart{$artgrp}) { &problem("$filename: article number is too low"); } elsif ($artnum > $highart{$artgrp}) { &problem("$filename: article number is too high"); } } # if check filenames only (-c), then do nothing else with the file next if (defined($opt_c)); # don't open a control or junk, they can be from anywhere next if ($artgrp eq "control" || $artgrp eq "junk"); # try open the file if (!open(ARTICLE, $filename)) { # the find is now gone (expired?), give up on it &problem("WARNING: cannot open $filename"); next; } # read until the Newsgroup header line is found AREADLINE: while ($aline =
) { # catch the newsgroup: header if ($aline =~ /^Newsgroups:\w*\W/io) { # convert $aline into a comma separated list of groups $aline =~ s/^Newsgroups://io; $aline =~ tr/ \t\n//d; # form an array of news groups @group = split(",", $aline); # see if any groups in the Newsgroup list are our group for ($j=0; $j <= $#group; ++$j) { # look at the group if ($realgname{$group[$j]} eq $artgrp) { # this article was posted to this group last AREADLINE; } } # no group or group alias was found &problem("$filename: does not belong in $artgrp"); last; # else watch for the end of the header } elsif ($aline =~ /^\s*$/o) { # no Newsgroup: header found &problem("WARNING: $filename: no Newsgroup header"); last; } if (eof(ARTICLE)) { &problem("WARNING: $filename: EOF found while reading header"); } } # close the article close(ARTICLE); } # all done with the find close(FINDFILE); }