2 # Copyright 1999 Stephen M. Benoit, Service Providers of America.
3 # See notice at end of this file.
5 # Filename: archivegz.pl
6 # Author: Stephen M. Benoit (benoits@servicepro.com)
7 # Created: Wed Apr 14 13:56:01 1999
8 # Version: $Id: archivegz.in 4329 2001-01-14 13:47:52Z rra $
10 $RCSID='$Id: archivegz.in 4329 2001-01-14 13:47:52Z rra $ ';
12 # Specify command line options, and decode the command line.
14 require 'newgetopt.pl';
15 require 'newusage.pl';
18 "help|usage;;print this message",
19 "version;;print version",
20 "a=s;;directory to archive in instead of the default",
21 "f;;directory names will be flattened out",
22 "i=s;;append one line to the index file for each article (Destination name, Message ID, Subject)",
23 "m;; Files are copied by making a link. Not applicable, ignored",
24 "r;;Suppress stderr redirection to /var/log/news/errlog",
25 "n=s;;the news spool (source) directory (default=/var/spool/news/)",
26 "t=i;;timeout that separates batches (default 10 seconds)",
30 # "OPT;;Option without an argument",
31 # "OPT!;;Negatable option without an argument",
32 # "VAR=T;;Option with mandatory argumet T = s(tring),i(nteger), or f(loat).
33 # "VAR:T;;Option with optional argument.
34 # "OPT|AAA|BBB";;AAA and BBB are aliases for OPT",
35 # "VAR=T@";;Push option argument onto array @opt_VAR"
38 $badopt = !&NGetOpt(&NMkOpts(@opts));
39 # $badarg = (@ARGV != 0);
40 if ($badarg || $badopt || $opt_help)
42 &NUsage($0,0,'',@opts);
43 exit ($badopt||$badarg);
45 if ($opt_version) {print STDERR "$RCSID\n"; exit 0}
47 # --------------------------------------------------------------------
49 # --- constants and defaults ---
50 $NEWS_ROOT = "/var/spool/news/";
51 $NEWS_ERR = "/var/log/news/errlog";
52 $NEWS_ARCHIVE = $NEWS_ROOT . "news.archive/";
56 if ($timeout<1) {$timeout=1;}
58 # --------------------------------------------------------------------
64 $data =~ s+\\+\\\\+gi; # replace \ with \\
65 $data =~ s+\/+\\\/+gi; # replace / with \/
67 $data =~ s/([\+\*\?\[\]\(\)\{\}\.\|])/\\$1/gi; # replace +*?[](){}.|
73 local(@fhlist) = split(' ',$_[0]);
76 vec($bits,fileno($_),1) = 1;
83 my ($fileh,$timeout)=@_;
84 my $filehandle = (ref($fileh)
85 ? (ref($fileh) eq 'GLOB'
86 || UNIVERSAL::isa($fileh, 'GLOB')
87 || UNIVERSAL::isa($fileh, 'IO::Handle'))
88 : (ref(\$fileh) eq 'GLOB'));
89 local(*FILEH) = *$fileh{FILEHANDLE};
91 local($rin,$win,$ein);
92 local($rout,$wout,$eout);
93 $rin = $win = $ein = '';
94 $rin = fhbits('FILEH');
102 $nfound = select($rout=$rin, $wout=$win, $eout=$ein, $timeout);
107 # use sysread() to get characters up to end-of-line (incl.)
110 $result = sysread(FILEH, $accum, 1, $offset);
117 if (substr($accum,$offset,1) eq "\n")
130 # --------------------------------------------------------------------
132 # --- source spool directory ---
135 if ($opt_n !~ /^\//) # absolute path?
136 { $opt_n = $NEWS_ROOT . $opt_n; }
137 if ($opt_n !~ /\/$/) # must end with /
142 # --- archive directory ---
145 if ($opt_a !~ /^\//) # absolute path?
146 { $opt_a = $NEWS_ROOT . $opt_a; }
147 if ($opt_a !~ /\/$/) # must end with /
149 $NEWS_ARCHIVE = $opt_a;
152 # --- redirect stderr ---
155 open(SAVEERR, ">&STDERR");
156 open(STDERR, ">>$NEWS_ERR") || die "Can't redirect stderr";
159 # --- get input file opened ---
160 if ($infilename=shift(@ARGV))
162 if ($infilename !~ /^\//) # absolute filename?
164 $infilename = $NEWS_ROOT . $infilename;
172 open(INFILE,"<$infilename");
182 # --- loop over each line in infile ---
183 # comments start with '#', ignore blank lines, each line is a filename
184 while ($srcfile = &timed_getline(INFILE,$timeout))
186 if ($srcfile =~ /\#/) {$srcfile = $`;}
187 if ($srcfile =~ /^\s*/) {$srcfile = $';}
188 if ($srcfile =~ /\s*$/) {$srcfile = $`;}
189 if ($srcfile) # if a filename survived all that...
191 if ($srcfile !~ /^\//) # absolute filename?
193 $srcfile = $NEWS_ROOT . $srcfile;
195 # $srcfile is now a valid, absolute filename
196 # split filename into news directory, newsgroup and article number
199 if ($remaining =~ /\/(\d*)$/) # remove / and article number
200 { $artnum = $1; $remaining=$`;}
201 $regex = ®exp_escape($NEWS_ROOT);
202 if ($remaining =~ /^$regex/) # split off news dir
203 { $newsdir = $&; $grpdir = $';}
205 { $newsdir = ''; $grpdir = $remaining; } # ... otherwise, grp = dir
207 $newsgrp =~ s/\//\./g; # replace slash (/) with dot (.)
210 $grpdir = "$newsgrp.gz";
213 { $grpdir .= "/archive.gz"; }
214 $destfile = $NEWS_ARCHIVE . $grpdir;
216 # print STDERR "$srcfile --> $newsgrp --> $destfile\n";
217 if ($sourcefile{$newsgrp}) {$sourcefile{$newsgrp} .= " ";}
218 $sourcefile{$newsgrp} .= $srcfile;
219 $destfile{$newsgrp} = $destfile;
220 $destname{$newsgrp} = $grpdir;
224 # --- is there anything to do at this time? ---
231 # make sure directory exists
232 if ($opt_i =~ /\/[^\/]*$/)
235 system("mkdir -p $dirbase");
237 open(INDEX,">>$opt_i");
240 # --- make sure that archive file can be written (make parent dirs) ---
241 if ($destfile{$group} =~ /\/[^\/]*$/)
244 system("mkdir -p $dirbase");
247 # --- process each article ---
248 foreach $group (keys(%destfile))
250 # --- gzip the concatenated document, appending archive file ---
251 open(GZIP, "|gzip -c >> $destfile{$group}") || die "Can't open gzip";
253 # --- concatenate the articles, keeping header info if needed ---
255 foreach $srcfile (split(/\s+/, $sourcefile{$group}))
257 # print STDERR "reading $srcfile...\n";
259 open(DOC, "<$srcfile");
265 print GZIP $this_doc;
268 # --- get header information and store it in index
269 $subject=''; $mesageid=''; $destname='';
270 if ($this_doc =~ /Subject:\s*(.*)/)
272 if ($subject =~ /^\s*/) {$subject = $';}
273 if ($subject =~ /\s*$/) {$subject = $`;}
274 if ($this_doc =~ /Message-ID:\s*(.*)/)
276 if ($messageid =~ /^\s*/) {$messageid = $';}
277 if ($messageid =~ /\s*$/) {$messageid = $`;}
279 print INDEX "$destname{$group} $messageid $subject\n";
286 # --- close index file ---
293 if (!defined($srcfile)) # file was closed
301 # --- restore stderr ---
305 open(STDERR,">>&SAVEERR");
308 # --- close input file ---
317 # Copyright 1999 Stephen M. Benoit, Service Providers of America (SPA).
319 # Permission to use, copy, modify, and distribute this software and its
320 # documentation for any purpose without fee is hereby granted without fee,
321 # provided that the above copyright notice appear in all copies and that both
322 # that copyright notice and this permission notice appear in supporting
323 # documentation, and that the name of SPA not be used in advertising or
324 # publicity pertaining to distribution of the software without specific,
325 # written prior permission. SPA makes no representations about the
326 # suitability of this software for any purpose. It is provided "as is"
327 # without express or implied warranty.
329 # SPA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
330 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
331 # SPA BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
332 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
333 # AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
334 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.