-#!/bin/bash
-# usage:
-# expire-iso8601 <unit-in-seconds> <slop-in-seconds>
-# <min-interval-in-units> <extent-in-min-intervals>
-# [<min-interval-in-units> <extent-in-min-intervals> ...]
+#!/usr/bin/perl -w
#
-# eg
-# /home/ian/junk/expire-iso8601 86400 10000 1 14 7 4
-# uses units of 86400s (1 day) with a slop of 10ks;
-# it keeps daily copies (that is, dated no more than 86400+10000s apart)
-# for at least 1*14 days, ie the oldest will be at least 86400s*1*14-10000s
-# older than the very newest
-# and weekly copies (that is, dated no more than 7*86400+10000s apart)
-# for at least 7*4 days, ie the oldest will be at least 86400s*7*4-10000s
-# older than the very newest
+# Copyright 2006 Ian Jackson <ijackson@chiark.greenend.org.uk>
+#
+# This script and its documentation (if any) are free software; you
+# can redistribute it and/or modify them under the terms of the GNU
+# General Public License as published by the Free Software Foundation;
+# either version 3, or (at your option) any later version.
+#
+# chiark-named-conf and its manpage are distributed in the hope that
+# it will be useful, but WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+# PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, consult the Free Software Foundation's
+# website at www.fsf.org, or the GNU Project website at www.gnu.org.
+
+use strict;
+
+our $usage = <<'END';
+usage:
+ expire-iso8601 [<options>] <number>x<interval> [<number>x<interval> ...]
+options:
+ -u<unitlen> <interval> is measured in units of <unitlen> seconds
+ (default is 86400, so <interval> is in days)
+ -s<slop> allow kept items to be <slop> seconds shorter apart than
+ specified; default is 10% of <unitlen>
+ -n do not really delete
+ -r recursive removal (rm -r)
+example:
+ /home/ian/junk/expire-iso8601 14x1 4x7
+ uses units of 86400s (1 day) with a slop of 8640
+ it keeps 14 daily items
+ (that is 14 items, dated no less than 86400-8640 apart)
+ and 4 weekly items
+ (that is 4 items, dated no less than 7*86400-8640 apart)
+ the 14 daily and 7 weekly items may be the same, or not
+ There is no need to sort the list of <number>x<interval> pairs.
+exit status:
+ 0 ok
+ 4 rm failed
+ 8 bad usage
+ 16 catastrophic failure
+END
-set -e
+use POSIX;
+use Carp;
+use Data::Dumper;
+use Date::Parse;
+use DateTime::Format::Strptime;
+
+our @files;
+our $rm = 1;
+our $recurse = 1;
+our $unit = 86400;
+our $slop;
+our $debug = 0;
+our @intervals;
+
+sub badusage ($) {
+ print STDERR "bad usage: $_[0]\n$usage" or die $!;
+ exit 8;
+}
-fail () { echo >&2 "$*"; exit 2; }
-badusage () { fail "bad usage: $*"; }
+sub scan () {
+# my $strp = DateTime::Format::Strptime->new();
+ foreach my $f (<[0-9]*>) {
+ if ($f !~ m/^ \d\d\d\d - \d\d - \d\d
+ (?: T \d\d \: \d\d (?: \: \d\d )?
+ (?: [-+] \d{1,2} \:? \d\d )? )? /x) {
+ print STDERR "ignoring $f\n";
+ }
+# my @t = Date::Parse::strptime($f);
+# @t = map { $_ // 0 } @t;
+# my $t = mktime @t;
+# m
+# my $t = $strp->parse_datetime($f);
+# $t = $t->epoch();
+# my @t = Date::Parse::strptime($f);
+#print STDERR Dumper(\@t);
+# my $t = mktime(@t);
+ $!=0; $?=0; my $t = `date -d '$&' +%s`;
+ die "date(!) failed on $&: $? $!" if $! || $?;
+ chomp $t or confess;
+ push @files, { F => $f, T => $t, U => [] };
+ }
+}
-#-------------------- argument parsing --------------------
+sub precomp () {
+ if (!@files) {
+ print STDERR "none at all yet!\n";
+ exit 0;
+ }
-[ $# -ge 4 ] || badusage 'too few arguments'
+ # newest first, which means biggest T
+ @files = sort { $b->{T} <=> $a->{T} || $b->{F} cmp $a->{F} } @files;
+ my $newest_t = $files[0]{T};
+ $_->{A} = ($newest_t - $_->{T}) / $unit foreach @files;
+ $slop /= $unit;
-unit=$1
-slop=$2
-shift;shift
+ push @{$files[0]{U}}, "newest";
-[ $(($# % 2)) = 0 ] || badusage 'odd keep arguments (need min/extent pairs)'
-argl="$*"
+ print DEBUG Dumper(scalar(@files), \@files, \@intervals) if $debug >= 2;
+}
-alldigits () {
- [ "x${1##*[^0-9]}" = "x$1" ] || badusage "$2 must be all digits"
- [ x$1 ] || badusage "$2 must be nonempty"
+sub flag ($) {
+ my ($int) = @_;
+ my $n = $int->{N};
+ my $d = $int->{D};
+ my $dmax = $d + $slop;
+ my $spec = $int->{Spec};
+ my $start_age = ($n-1) * $d - $slop;
+ my $i = 0;
+
+ print DEBUG "FLAG $spec sa=$start_age dmax=$dmax\n";
+
+ # find $i, the youngest which is at least $start_age
+ for (;;) {
+ print DEBUG "i #$i $files[$i]{A}\n";
+ last if $files[$i]{A} >= $start_age;
+ if ($i == $#files) {
+ print STDERR "insufficiently old for $spec\n";
+ last;
+ }
+ $i++;
+ }
+
+ for (;;) {
+ push @{ $files[$i]{U} }, $spec;
+
+ # find $j, the closest to $i, preferably no more than $dmax younger
+ my $j = $i;
+ for (;;) {
+ $j--;
+ # at each point in this loop $j is the next candidate
+ last if $j < 0;
+ my $dt = $files[$i]{A} - $files[$j]{A};
+ print DEBUG "j #$j $files[$j]{A} dt=$dt\n";
+ last if $dt > $dmax;
+ }
+ last if $j < 0;
+ $j++;
+ if ($j == $i) {
+ $j--;
+ print STDERR "insufficiently dense for $spec after $files[$j]{F}\n";
+ }
+ print DEBUG "i #$j\n";
+
+ $i = $j;
+ }
}
-while [ $# -gt 0 ]; do
- min=$1; shift; extent=$1; shift
- alldigits $min min
- alldigits $extent extent
-done
+sub implement () {
+ foreach (@files) {
+ next unless @{$_->{U}};
+ printf "keep %s for %s - age %.1f\n",
+ $_->{F}, "@{$_->{U}}", $_->{A};
+ }
+ foreach (@files) {
+ next if @{$_->{U}};
+ printf "remove %s - age %.1f\n",
+ $_->{F}, $_->{A};
+ if ($rm) {
+ my $r= system 'rm', ($recurse ? ('-r') : ()), "--", $_->{F};
+ die "run rm: $!\n" unless defined($r) && $r >= 0;
+ exit 12 if $r;
+ }
+ }
+}
-#-------------------- scanning the directory ----------
+open DEBUG, ">/dev/null" or die $!;
+
+while (@ARGV && $ARGV[0] =~ m/^-/) {
+ $_ = shift @ARGV;
+ last if $_ eq '-' || $_ eq '--';
+ if (m/^-[^-]/) {
+ while (m/^-./) {
+ if (s/^-n/-/) { $rm=0; }
+ elsif (s/-r/-/) { $recurse=1; }
+ elsif (s/-D/-/) { $debug++; }
+ elsif (s/-u(\d+)$//) { $unit=$1; }
+ elsif (s/-s(\d+)$//) { $slop=$1; }
+ else { badusage "unknown short option $_" }
+ }
+ } elsif (m/^--help$/) {
+ print $usage or die $!;
+ exit 0;
+ } else {
+ badusage "unknown long option $_"
+ }
+}
-# We build in $l a list of the relevant filenames and the time_t's
-# they represent. And, while we're at it, we find the most recent
-# such time_t ($ls) and its name ($ln).
-#
-# Each entry in $l is $time_t/$filename, and the list is
-# newline-separated for the benefit of sort(1).
-
-ls=0
-for cn in [0-9]*; do
- echo $cn
- case "$cn" in
- ????-??-??)
- conv="$cn";;
- ????-??-??T[0-2][0-9]+[0-9][0-9][0-9][0-9]|\
- ????-??-??T[0-2][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9]|\
- ????-??-??T[0-2][0-9]:[0-6][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9])
- conv="${cn%T*} ${cn#*T}";;
- *)
- echo >&2 "ignoring $cn"
- continue;;
- esac
- cs=$(date -d "$conv" +%s)
- if [ $cs -gt $ls ]; then
- ls=$cs; ln=$cn
- fi
- l="$cs/$cn
-$l"
-done
-
-echo "newest $ln"
-
-#-------------------- main computation --------------------
-
-# We go through the items from most to least recent
-# ie in order of increasing age
-# ie in order of decreasing time_t
-# We constantly maintain records of this item (c) and the last two
-# (b and a).
-#
-# We then check to see if any of the specified minimum/extent pairs
-# mean we should keep c and b.
-#
-# We can delete c if b is older than every specified extent. b will
-# then be the latest version we keep and is old enough. (Note that if
-# the density isn't satisfied, the expected number of old items may
-# not be satisfied either; in the worst case, if b is very old, we
-# might end up with just two items left.)
-#
-# If we delete c then we just go on to the next c, which will
-# definitely be older, so will be deleted too (because b remains
-# unchanged): ie we then delete all the rest.
-#
-# If we don't delete c, we look at the gap between a and c. If this
-# gap is not too long (according to any of the minimum/extent pairs)
-# then it is OK to delete b. (A gap is too long if it's longer than a
-# relevant pair's minimum, but a pair isn't relevant if c is older
-# than the extent.) If we delete b then current c becomes the new b.
-#
-# If we don't delete either then b and c become the new a and b.
-
-- because b is clearly sufficient to
-# satisfy the
-# if we delete
-
-# {l,a,b,c}{s,n,a} = seconds, name of a,b,c where
-# c is one we're looking at now and
-# b is previous one
-# a is one before that
-# l is last (most recent)
-# where a, b, c have not been removed
-
-as=''
-an=''
-bs=''
-bn=''
-
-remove () {
- echo "expire $1 (have $2)"
+badusage "too few arguments" unless @ARGV;
+
+if ($debug) {
+ open DEBUG, ">&STDERR" or die $!;
+ DEBUG->autoflush(1);
}
-l=$(sort -nr <<END
-$l
-END
-)
-
-for ce in $l; do
- cs=${ce%%/*}; cn=${ce#*/}
- set $argl
- c_mightneed=false
- b_needfordensity=false
- if test "$as"; then
- ac_interval=$(( $as - $cs ))
- fi
- while [ $# != 0 ]; do
- min=$(( $1 * $unit + $slop ))
- extent=$(( $1 * $2 * $unit - $slop ))
- # if b is as old as required by anything
- # then c is definitely surplus
- if ! [ "$bs" ] || \
- [ $(($ls - $bs)) -le $extent ]; then
- c_mightneed=true
- fi
- if ! test "$as" || \
- [ $(($ls - $as)) -le $extent -a \
- $ac_interval -gt $min ]; then
- b_needfordensity=true
- fi
- shift;shift
- done
- if ! $c_mightneed; then
- remove $cn "$bn"
- continue
- fi
- if ! $b_needfordensity; then
- remove $bn "$an $cn"
- bn=$cn; bs=$cs
- continue
- fi
- an=$bn; as=$bs
- bn=$cn; bs=$cs
-done
-
-exit 0
+$slop //= $unit * 0.1;
+
+foreach (@ARGV) {
+ m/^(\d+)x(\d+)$/ or badusage "bad <number>x<interval> $_";
+ push @intervals, { Spec => $&, N => $1, D => $2 };
+}
+
+scan();
+precomp();
+foreach (@intervals) { flag $_ }
+implement();