2 # fixscript will replace this line with require innshellvars.pl
3 $ID='$Id: thdexpire.in 4572 2001-02-24 22:31:05Z rra $$';
9 # With the -M switch this program installs its own man page.
10 #-----------------------------------------------------------------------------
14 thdexpire - dynamic expire daemon for timehash and timecaf storage
20 [ B<-f> I<kilobytes> ]
23 [ B<-x> I<minseconds> ]
31 This is a daemon, to be started along with B<innd>, which periodically
32 looks if news spool space is getting tight, and frees space by removing
33 articles until enough is free. It is an adjunct (not a replacement) to
34 INNs B<expire> program.
42 Configure your storage classes carefully. Let the default go in class
43 100 and choose the storage classes as relative (percent) retention
44 times. E.g. if you want to give C<alt.binaries.*> a fifth of the
45 default time, put them in class 20. Storage classes above 200 are
46 ignored by this program. 0 expires immediately. An example is given
51 Set up your F<expire.ctl> in a way that it puts only a maximum cap on
52 retention times. Run B<expire> from B<news.daily> as usual. However,
53 it should only expire articles which have an Expires line or are in
54 classes above 200. See L<"EXAMPLES">.
58 Ensure to start this daemon along with B<innd>.
62 To get information and statistics, run B<thdexpire -r> (in parallel to
63 a running daemon). This will show you the current actual retention
70 B<thdexpire> works directly on the spool. It assumes the layout
71 described in the timehash and timecaf sections of L<storage.conf(5)> as of
72 INN-2.x-CURRENT (Dec. 5, 1998). For every storage class associated
73 with timehash/timecaf, B<thdexpire> keeps a I<work time> which is the
74 modification time of the oldest article/CAF file in this class. This
75 time is chosen so that the difference of the work time of class N to
76 now (i.e. the I<retention time> for class N) will be N/100 of the
77 retention time of class 100. The work time of all classes is
78 continuously adjusted as time goes by. Articles and CAF files which
79 are older than the work time are deleted.
85 =item B<-t> I<minutes>
87 Check for free space every I<minutes> minutes (default 30).
89 =item B<-f> I<kilobytes>
91 Leave I<kilobytes> kilobytes of free disk space on each spool
92 filesystem (default 50000).
96 Leave I<inodes> inodes free on each spool filesystem (default 5000).
98 =item B<-m> I<mindays>
100 Set the minimum normal holding time for class 100 to I<mindays> days
103 =item B<-x> I<minseconds>
105 Set the absolute minimum holding time for any article to I<minseconds>
106 seconds (default 86400, i.e. 1 day).
110 Do not delete any articles, just print what would be done.
114 Set the verbosity level. Values from 1 to 3 are meaningful, where
115 higher levels are mostly for debugging.
119 Do not run as a daemon, instead print a report from the database (see
120 L<FILES>) on the available storage classes, current expire times and
127 Here is an example F<storage.conf> file:
129 # Large postings in binary groups are expired fast:
132 newsgroups: *.binaries.*,*.binaer.*,*.dateien.*,alt.mag.*
137 # Local groups and *.answers groups don't expire at all with
138 # thdexpire. These are handled by Expires lines and a cutoff
141 newsgroups: *.answers,news.announce.*,local.*
145 # Expires lines are honored if they dont exceed 90 days.
146 # Exempt those postings from thdexpire handling.
153 # Default: should be class 100 because thdexpire bases its
154 # calculations thereupon.
160 And here is an F<expire.ctl> which fits:
162 # Our local groups are held 6 months
164 # Everything else is handled by thdexpire, or Expires lines
167 Note that B<thdexpire> does not actually use these files, they just
168 configure other parts of the news system in an appropriate way.
174 =item F<E<lt>inn::pathdbE<gt>/thdexpstat.{dir,pag}>
176 Holds state information like classes, expire times, oldest articles.
177 When this file is missing, it will be rebuilt the next time the daemon
178 is started, which basically means scanning the spool directories to
179 find the oldest articles. With the B<-r> option, the contents of this
182 =item F<E<lt>inn::innddirE<gt>/thdexpire.pid>
184 Contains the PID of the running daemon.
190 I<SIGINT> or I<SIGTERM> can be sent to the daemon at any time, causing
191 it to gracefully exit immediately.
195 L<expire(8)>, L<news.daily(8)>, L<storage.conf(5)>
199 This version needs the B<inndf> program supplied with newer releases of INN.
201 The filenames for timecaf were wrong in older versions of the INN
202 documentation. This program uses the true filenames, as found by
203 reading the INN source.
207 Any error messages are printed on standard error. Normal progress
208 messages, as specified by the B<-v> option, are printed on standard
213 Storage classes which are in I<storage.conf> but not on disk (i.e.
214 which have never been filed into) when the daemon starts are ignored.
216 The code is ugly and uses too many global variables.
217 Should probably rewrite it in C.
221 Directories which are left empty are not removed.
223 The overview database is not affected by B<thdexpire>, it has to be
224 cleaned up by the daily regular B<news.daily> run. This may need a
229 Olaf Titz <olaf@bigred.inka.de>. Use and distribution of this work is
230 permitted under the same terms as the B<INN> package.
234 Inspired by the old B<dexpire> program for the traditional spool.
236 June 1998: wrote the first version for timehash.
238 November 1998: added code for timecaf, works on multiple spool
239 filesystems, PODed documentation.
245 #-----------------------------------------------------------------------------
247 chdir $inn::spool || die "chdir $inn::spool: $!";
248 $opt_r=0; # make a report
249 $opt_t=30; # check interval in minutes
250 $opt_f=50000; # required space in kilobytes
251 $opt_i=5000; # required space in inodes
252 $opt_m=7; # minimum normal (class 100) time in days
253 $opt_x=86400; # absolute minimum hold time in seconds
254 $opt_N=0; # dont actually delete articles
255 $opt_v=0; # verbosity level
256 $opt_M=0; # install man page
257 getopts("rt:f:i:m:x:Nv:M");
259 $_=$inn::pathdb; $_=$inn::pathnews; # shut up warning
260 $sfile="$inn::pathdb/thdexpstat";
261 $ID=~/ ([^,]+,v [^ ]+)/; $ID=$1;
264 print "Installing thdexpire(8) man page\n";
265 $0=~m:^(.*)/([^/]+)$:;
266 chdir $1 || die "chdir $1";
267 exec "pod2man --section=8 --center='Contributed News Software'" .
268 " --release='$ID' $2 >$inn::pathnews/man/man8/thdexpire.8";
272 tie(%S, SDBM_File, $sfile, O_RDONLY, 0664) || die "open $sfile: $!";
278 (system "shlock", "-p", $$, "-f", "$inn::innddir/thdexpire.pid")>>8==0
279 || die "Already running";
280 tie(%S, SDBM_File, $sfile, O_RDWR|O_CREAT, 0664) || die "open $sfile: $!";
281 $SIG{'TERM'}=$SIG{'INT'}='finish';
283 printf "%s starting at %s\n", $ID, &wtime(time) if ($opt_v>0);
286 $NOW=time; $ac=$cc=0;
287 opendir(CD, ".") || &err("opendir $inn::spool: $!");
288 while ($cd=readdir(CD), defined($cd)) {
289 $cd=~/^time(caf)?-([0-9a-f][0-9a-f])$/i || next;
290 $c{hex($2)}=1 unless hex($2)>200;
293 @classes=sort {$a<=>$b} keys %c;
294 foreach $c (@classes) {
296 $S{"work$;$c"}=$S{"oldest$;$c"}&0xFFFFFF00;
299 $S{"classes"}=join(",", @classes);
302 printf "Checked %d articles, %d CAFs in %d seconds\n", $ac, $cc, time-$NOW
303 if ($ac+$cc>0 && $opt_v>0);
305 chdir $inn::spool || die "chdir $inn::spool: $!";
307 $S{"lastrun"}=$NOW=time;
308 printf "%s\n", &wtime($NOW) if ($opt_v>0);
310 foreach $c (@classes) {
311 $t=($NOW-$S{"work$;$c"})*100/$c;
314 printf "Normal time (class 100): %s\n", &xtime($NOW-$nt)
316 if ($nt<$opt_m*24*60*60) {
317 printf " capped at minimum %d days\n", $opt_m
321 if ($nt>180*24*60*60) {
322 print " capped at maximum 180 days\n"
326 $S{"normaltime"}=$nt;
327 $decrement=$opt_t*60;
330 undef %needk; undef %needi;
331 foreach $c (@classes) {
332 $Dart{$c}=$Dcaf{$c}=$Dkb{$c}=$Dino{$c}=0;
333 $y=sprintf("time-%02x", $c);
338 unless (defined($needk{$S[0]})) {
340 $needk{$S[0]}=$needi{$S[0]}=-1;
344 $y=sprintf("timecaf-%02x", $c);
349 unless (defined($needk{$S[0]})) {
351 $needk{$S[0]}=$needi{$S[0]}=-1;
356 if (open(D, "inndf $x |")) {
359 $needk{$dev{$S[0]}}=$opt_f-$S[1] unless ($S[0] eq "/");
363 if (open(D, "inndf -i $x |")) {
366 $needi{$dev{$S[0]}}=$opt_i-$S[1] unless ($S[0] eq "/");
370 foreach $c (keys %needk) {
371 printf "Device %d needs to free %d kilobytes, %d inodes\n",
372 $c, $needk{$c}<0?0:$needk{$c}, $needi{$c}<0?0:$needi{$c}
373 if ($opt_v>0 && ($needk{$c}>0 || $needi{$c}>0));
374 if ($needk{$c}>0 || $needi{$c}>0) {
378 if ($opt_v>0 && $need<=0) {
379 print " (nothing to do)\n";
383 while (!$error && $need>0) {
384 if ($S{"normaltime"}-$decrement<$opt_m*24*60*60) {
385 print " Normal time hit minimum\n" if ($opt_v>0);
388 $S{"normaltime"}-=$decrement;
389 printf " normal time (100) becomes %ld\n", $S{"normaltime"}
392 $Dart=$Dcaf=$Dkb=$Dino=$need=0;
393 foreach $c (keys %needk) {
394 if ($needk{$c}>0 || $needi{$c}>0) {
399 foreach $c (@classes) {
400 &worktime($c, $NOW-($S{"normaltime"}*$c/100));
401 $Dart+=$dart; $Dcaf+=$dcaf; $Dkb+=$dbb>>10; $Dino+=$dino;
402 $Dart{$c}+=$dart; $Dcaf{$c}+=$dcaf;
403 $Dkb{$c}+=$dbb>>10; $Dino{$c}+=$dino;
408 printf " pass %d deleted %d arts, %d CAFs, %d kb\n",
409 $pass, $Dart, $Dcaf, $Dkb if ($opt_v>1);
410 $decrement-=$decrement>>2 if ($decrement>10*60);
412 $decrement+=$decrement>>1 if ($decrement<4*60*60);
415 $Dkb=$Dart=$Dcaf=$Dino=0;
416 foreach $c (@classes) {
417 printf " class %3d: deleted %6d arts %6d CAFs %10d kb\n",
418 $c, $Dart{$c}, $Dcaf{$c}, $Dkb{$c} if ($opt_v>1);
419 $Dkb+=$Dkb{$c}; $Dart+=$Dart{$c}; $Dcaf+=$Dcaf{$c};
422 printf " deleted %d articles, %d CAFs, %d kb in %d seconds\n",
423 $Dart, $Dcaf, $Dkb, time-$NOW if ($opt_v>0);
425 printf STDERR "Round needed %d seconds, interval is %d\n",
438 if (!$S{"blocksize$;$C$;CAF"}) {
439 # Determine filesystem blocksize
440 # unfortunately no way in perl to statfs
441 my $x=sprintf("%s/timecaf-%02x/test%d", $inn::spool, $C, $$);
442 if (open(A, ">$x")) {
446 $#S>=12 || die "stat: $!";
448 $S{"blocksize$;$C$;CAF"}=$S[7]/$S[12];
450 $S{"blocksize$;$C$;CAF"}=512;
451 warn "hack around broken stat blocksize";
456 return if ($S{"oldest$;$C"});
458 $S{"oldest$;$C"}=$oldest;
459 my $base=sprintf("%s/time-%02x", $inn::spool, $C);
462 printf "Finding oldest in class %d (%s)\n", $C, $base if ($opt_v>0);
464 while ($d1=readdir(D0), defined($d1)) {
465 $d1=~/^[0-9a-f][0-9a-f]$/ || next;
467 opendir(D1, ".") || next;
468 while ($d2=readdir(D1), defined($d2)) {
469 $d2=~/^[0-9a-f][0-9a-f]$/ || next;
471 opendir(D2, ".") || next;
472 while ($a=readdir(D2), defined($a)) {
475 $oldest=$S[9] if ($S[9]<$oldest);
487 $base=sprintf("%s/timecaf-%02x", $inn::spool, $C);
489 printf "Finding oldest in class %d (%s)\n", $C, $base if ($opt_v>0);
491 while ($d1=readdir(D0), defined($d1)) {
492 $d1=~/^[0-9a-f][0-9a-f]$/ || next;
494 opendir(D1, ".") || next;
495 while ($a=readdir(D1), defined($a)) {
498 $oldest=$S[9] if ($S[9]<$oldest);
507 $S{"count$;$C"}=$count;
508 $S{"oldest$;$C"}=$oldest;
516 printf " goal for class %d becomes %s\n", $C, &xtime($goal)
518 if ($goal>$NOW-$opt_x) {
519 printf " goal for class %d cut off\n", $C
524 $dart=$dcaf=$dbb=$dino=0;
525 $hdir=sprintf("time-%02x", $C);
526 $cdir=sprintf("timecaf-%02x", $C);
527 while (($_=$S{"work$;$C"})<$goal) {
528 printf " running: %08x\n", $_ if ($opt_v>2);
529 ($aa,$bb,$cc) = (($_>>24)&0xFF, ($_>>16)&0xFF, ($_>>8)&0xFF);
530 $dir=sprintf("%s/%02x/%02x", $hdir, $bb, $cc);
531 $pat=sprintf("[0-9a-f]{4}-%02x[0-9a-f]{2}", $aa);
532 if (opendir(D, $dir)) {
533 while ($_=readdir(D), defined($_)) {
539 print " would delete $art" if ($opt_v>2);
541 print " deleting $art" if ($opt_v>2);
545 printf " %d kb\n", $S[7]>>10 if ($opt_v>2);
547 $needk{$dev{$hdir}}-=$S[7]>>10;
548 $needi{$dev{$hdir}}--;
552 printf " (no dir %s)\n", $dir if ($opt_v>2);
554 $caf=sprintf("%s/%02x/%02x%02x.CF", $cdir, $bb, $aa, $cc);
558 print " would delete $caf" if ($opt_v>2);
560 print " deleting $caf" if ($opt_v>2);
565 # try to find how much there is in the CAF
567 @C=unpack("a4LLL", $_);
568 if ($C[0] eq "CRMT") {
576 $x=$S[12]*$S{"blocksize$;$C$;CAF"};
579 warn "hack around broken stat blocksize";
581 printf " %d arts %d kb\n", $y, $x>>10 if ($opt_v>2);
583 $needk{$dev{$cdir}}-=$x>>10;
584 $needi{$dev{$cdir}}--;
586 $S{"work$;$C"}+=0x100;
587 $S{"oldest$;$C"}=$S{"work$;$C"} unless ($opt_N);
594 my $cc=$S{"classes"};
595 my $nt=$S{"normaltime"};
596 unless ($cc && $nt) {
597 print "Not initialized.\n";
600 printf "Version: %s (this: %s)\n", $S{"ID"}, $ID;
601 printf "Started at: %s\n", &xtime($S{"inittime"}) if ($S{"inittime"});
602 printf "Last run: %s\n", &xtime($S{"lastrun"}) if ($S{"lastrun"});
603 printf "Classes: %s\n", $cc;
604 foreach $c (split(/,/, $cc)) {
605 printf "Class %d:\n", $c;
606 #printf " Initial count %d articles\n", $S{"count$;$c"};
607 printf " Oldest article: %s\n", &xtime($S{"oldest$;$c"});
608 printf " Expiring at: %s\n", &xtime($S{"work$;$c"});
609 printf " Normal time: %s\n", &xtime($NOW-$nt*$c/100);
610 printf " Filesystem block size (CAF): %d\n", $S{"blocksize$;$c$;CAF"};
618 sprintf("%04d-%02d-%02d %02d:%02d",
619 $T[5]+1900, $T[4]+1, $T[3], $T[2], $T[1]);
625 if ($NOW-$t<0 || $NOW-$t>350*24*60*60) {
629 my @D=gmtime($NOW-$t);
630 sprintf("%04d-%02d-%02d %02d:%02d (%dd %dh %dm)",
631 $T[5]+1900, $T[4]+1, $T[3], $T[2], $T[1],
632 $D[7], $D[2], $D[1]);
637 printf STDERR "%s\n", shift;
644 unlink "$inn::innddir/thdexpire.pid";
647 #-----------------------------------------------------------------------------