-#!/bin/bash
-set -e
- usage () {
- cat <<END
+#!/usr/bin/perl -w
+#
+# Copyright 2006 Ian Jackson <ijackson@chiark.greenend.org.uk>
+#
+# This script and its documentation (if any) are free software; you
+# can redistribute it and/or modify them under the terms of the GNU
+# General Public License as published by the Free Software Foundation;
+# either version 3, or (at your option) any later version.
+#
+# chiark-named-conf and its manpage are distributed in the hope that
+# it will be useful, but WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+# PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, consult the Free Software Foundation's
+# website at www.fsf.org, or the GNU Project website at www.gnu.org.
+
+use strict;
+
+our $usage = <<'END';
usage:
expire-iso8601 [<options>] <number>x<interval> [<number>x<interval> ...]
options:
-u<unitlen> <interval> is measured in units of <unitlen> seconds
(default is 86400, so <interval> is in days)
- -s<slop> allow kept items to be <slop> seconds shorter apart than
- specified; default is 10% of <unitlen>
- -n do not really delete
- -r recursive removal (rm -r)
+ -s<slop> allow kept items to be <slop> seconds shorter or
+ longer apart than specified; default is 0.1 unit
+ -n do not really delete
+ -r recursive removal (rm -r)
+ --rename-only rename to *.rm, but do not delete
+ --help
example:
/home/ian/junk/expire-iso8601 14x1 4x7
uses units of 86400s (1 day) with a slop of 8640
0 ok
4 rm failed
8 bad usage
- 16 catastrophic failure
+ -1 catastrophic failure
END
- }
-# Copyright 2006 Ian Jackson <ian@chiark.greenend.org.uk>
-#
-# This script and its documentation (if any) are free software; you
-# can redistribute it and/or modify them under the terms of the GNU
-# General Public License as published by the Free Software Foundation;
-# either version 3, or (at your option) any later version.
-#
-# chiark-named-conf and its manpage are distributed in the hope that
-# it will be useful, but WITHOUT ANY WARRANTY; without even the
-# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-# PURPOSE. See the GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, consult the Free Software Foundation's
-# website at www.fsf.org, or the GNU Project website at www.gnu.org.
+use POSIX;
+use Carp;
+use Data::Dumper;
+use Date::Parse;
+use DateTime::Format::Strptime;
+
+$|=1;
+our @oldfiles;
+our @files;
+our $enable = 2;
+our $recurse = 1;
+our $unit = 86400;
+our $slop;
+our $debug = 0;
+our @intervals;
-trap 'exit 16' 0
-badusage () { echo >&2 "bad usage: $*"; usage >&2; trap '' 0; exit 8; }
+sub badusage ($) {
+ print STDERR "bad usage: $_[0]\n$usage" or die $!;
+ exit 8;
+}
+
+sub scan () {
+# my $strp = DateTime::Format::Strptime->new();
+ foreach my $f (<[0-9]*>) {
+ if ($f !~ m/^ \d\d\d\d - \d\d - \d\d
+ (?: T \d\d \: \d\d (?: \: \d\d )?
+ (?: [-+] \d{1,2} \:? \d\d )? )?
+ ( \.rm )? $/x) {
+ print STDERR "ignoring $f\n";
+ }
-#-------------------- argument parsing --------------------
+ if ($1) {
+ push @oldfiles, $f;
+ next;
+ }
-alldigits () {
- [ "x${2##*[^0-9]}" = "x$2" ] || \
- badusage "bad $1 \`$2'; must be all digits"
- [ "$2" ] || badusage "bad $2; must be nonempty"
- eval $1='$2'
+ my @t = Date::Parse::strptime($f);
+ @t = map { $_ // 0 } @t;
+ my $t = mktime @t;
+# m
+# my $t = $strp->parse_datetime($f);
+# $t = $t->epoch();
+# my @t = Date::Parse::strptime($f);
+#print STDERR Dumper(\@t);
+# my $t = mktime(@t);
+# $!=0; $?=0; my $t = `date -d '$&' +%s`;
+# die "date(!) failed on $&: $? $!" if $! || $?;
+# chomp $t or confess;
+ push @files, { F => $f, T => $t, U => [] };
+ }
}
-rm=rm
-recurse=''
-unit=86400
-slop=''
-
-while [ $# -ge 1 ]; do
- arg=$1; shift
- case "$arg" in
- --|-) break ;;
- --help) usage; exit 0 ;;
- --*) badusage "unknown option $arg" ;;
- -*)
- val=${arg#-?}
- case "$arg" in
- -n*) rm=: ;;
- -r*) recurse=-r ;;
- -u*) alldigits unit "$val"; arg='' ;;
- -s*) alldigits slop "$val"; arg='' ;;
- *) badusage "unknown option ${1:0:2}" ;;
- esac
- arg=-${arg#-?}
- if test "x$arg" != x-; then set -- "$arg" "$@"; fi
- ;;
- *) set "$arg" "$@"; break ;;
- esac
-done
-
-[ $# -ge 1 ] || badusage 'too few arguments'
-[ "$slop" ] || slop=$(( $unit / 10 ))
-
-for ni in "$@"; do
- case "$ni" in *x*);; *) badusage "bad <number>x<interval> $ni";; esac
- alldigits number "${ni%%x*}"
- alldigits interval "${ni#*x}"
-done
-
-#-------------------- scanning the directory ----------
-
-# We build in $l a list of the relevant filenames and the time_t's
-# they represent.
-#
-# Each entry in $l is $time_t/$filename, and the list is
-# newline-separated for the benefit of sort(1).
-
-ls=0
-for cn in [0-9]*; do
- case "$cn" in
- ????-??-??)
- conv="$cn";;
- ????-??-??T[0-2][0-9]+[0-9][0-9][0-9][0-9]|\
- ????-??-??T[0-2][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9]|\
- ????-??-??T[0-2][0-9]:[0-6][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9])
- conv="${cn%T*} ${cn#*T}";;
- *)
- echo >&2 "ignoring $cn"
- continue;;
- esac
- cs=$(date -d "$conv" +%s)
- l="$cs/$cn
-$l"
-done
-
-#-------------------- main computation --------------------
-
-# We process each minimum/extent pair, to have it select a bunch of
-# versions to keep. We annotate entries in $l: if we are keeping
-# an entry we prepend a colon; temporarily, if we are keeping an entry
-# because of this particular minimum/extent, we prepend a comma.
-
-# For each minimum/extent pair we look at the list from most recent
-# to least recent,
-# ie in order of increasing age
-# ie in order of decreasing time_t
-# and each time we're more than min older than the last item we kept,
-# we mark the item to keep, until we have as many as we want.
-#
-# We build the new list (space-separated) in lnew.
+sub precomp () {
+ if (!@files) {
+ print STDERR "none at all yet!\n";
+ exit 0;
+ }
-l=$(sort -nr <<END
-$l
-END
-)
-
-for ni in "$@"; do
- wantcount=${ni%x*}
-
- div=1
-
- while true; do
- min=$(( (${ni#*x} * $unit) / $div - $slop ))
-
- ls=''
- lnew=''
- skipped=0
- for ce in $l; do
- cn=${ce#*/}; cl=${ce%%/*}
- cs=${cl#,}; cs=${cs#:}
- case $cl in ,*) ls=$cs; continue;; esac
- if [ $wantcount != 0 ]; then
- if ! [ "$ls" ] || \
- [ $(( $ls - $cs )) -ge $min ]; then
- echo "keep (for $ni) $cn"
- ce=,$ce
- ls=$cs
- wantcount=$(( $wantcount - 1 ))
- else
- skipped=$(( $skipped+1 ))
- fi
- fi
- lnew="$lnew $ce"
- done
- l=$lnew
-
- if [ $wantcount = 0 ]; then break; fi
- printf "%s" "insufficient (for $ni) by $wantcount"
- if [ $skipped = 0 ]; then echo; break; fi
- div=$(( $div * 2 ))
- echo " shortening interval ${div}x"
- done
-
- # s/([,:]+).*/:\1/g
- lnew=''
- for ce in $l; do
- case $ce in ,*) ce=:${ce#,};; esac
- case $ce in ::*) ce=${ce#:};; esac
- lnew="$lnew $ce"
- done
- l=$lnew
-done
-
-#-------------------- execution --------------------
-
-trap '' 0
-exitstatus=0
-
-nonbroken_echo () { (echo "$@"); }
-# While we have subprocesses, we have to avoid bash calling write(1,...)
-# because of a bug in bash (Debian #382798), so we arrange for a subshell
-# for each echo.
-
-jobs=''
-for ce in $l; do
- case $ce in
- :*);;
- *)
- cn=${ce#*/}
- nonbroken_echo "expire $cn"
- $rm $recurse -- $cn &
- jobs="$jobs $!"
- ;;
- esac
-done
-
-if [ "$jobs" ]; then
- nonbroken_echo "all running"
-fi
-
-for job in $jobs; do
- wait $job || exitstatus=4
-done
-
-if [ $exitstatus = 0 ]; then
- echo "complete"
-else
- echo "complete, but problems deleting"
-fi
-
-exit $exitstatus
+ # newest first, which means biggest T
+ @files = sort { $b->{T} <=> $a->{T} || $b->{F} cmp $a->{F} } @files;
+ my $newest_t = $files[0]{T};
+ $_->{A} = ($newest_t - $_->{T}) / $unit foreach @files;
+ $slop /= $unit;
+
+ push @{$files[0]{U}}, "newest";
+
+ print DEBUG Dumper(scalar(@files), \@files, \@intervals) if $debug >= 2;
+}
+
+sub flag ($) {
+ my ($int) = @_;
+ my $n = $int->{N};
+ my $d = $int->{D};
+ my $dmin = $d - $slop;
+ my $dmax = $d + $slop;
+ my $spec = $int->{Spec};
+ my $start_age = ($n-1) * $d - $slop;
+ my $i = 0;
+ my $insufficiently_old = 0;
+
+ print DEBUG "FLAG $spec sa=$start_age dmin=$dmin dmax=$dmax\n";
+
+ # find $i, the youngest which is at least $start_age
+ for (;;) {
+ print DEBUG "i #$i $files[$i]{A}\n";
+ last if $files[$i]{A} >= $start_age;
+ if ($i == $#files) {
+ $insufficiently_old = 1;
+ print STDERR "insufficiently old for $spec\n";
+ last;
+ }
+ $i++;
+ }
+
+ my $oldest = $i;
+ my $count = 0;
+
+ my $use = sub {
+ my ($i, $spec) = @_;
+ push @{ $files[$i]{U} }, $spec;
+ $count++;
+ };
+
+ for (;;) {
+ $use->($i, $spec);
+
+ # find $j, the closest to $i, preferably no more than $dmax younger
+ my $j = $i;
+ for (;;) {
+ $j--;
+ # at each point in this loop $j is the next candidate
+ last if $j < 0;
+ my $dt = $files[$i]{A} - $files[$j]{A};
+ print DEBUG "j #$j $files[$j]{A} dt=$dt\n";
+ last if $dt > $dmax;
+ }
+ $j++;
+ if ($j == $i) {
+ $j--;
+ last if $j < 0;
+ print STDERR "insufficiently dense for $spec before $files[$j]{F}\n";
+ }
+ print DEBUG "i #$j\n";
+
+ $i = $j;
+ }
+
+ $i = $oldest;
+ while ($count < $n) {
+ for (;;) {
+ $i++;
+ if ($i > $#files) {
+ if (!$insufficiently_old) {
+ print STDERR
+ "insufficiently old for $spec (density compensation)\n";
+ }
+ return;
+ }
+ my $dt = $files[$i]{A} - $files[$oldest]{A};
+ print DEBUG "o #$i $files[$i]{A} dt=$dt\n";
+ last if $dt >= $dmin;
+ }
+ $use->($i, "$spec+");
+ }
+}
+
+sub do_rm ($) {
+ my ($fn) = @_;
+ if ($enable >= 2) {
+ my $r= system 'rm', ($recurse ? ('-r') : ()), "--", $fn;
+ die "run rm: $!\n" unless defined($r) && $r >= 0;
+ exit 4 if $r;
+ }
+}
+
+sub implement () {
+ foreach (reverse sort @oldfiles) {
+ printf "remove %s - old\n", $_;
+ do_rm($_);
+ }
+ foreach (reverse @files) {
+ next unless @{$_->{U}};
+ printf "keep %s for %s - age %.1f\n",
+ $_->{F}, "@{$_->{U}}", $_->{A};
+ }
+ foreach (reverse @files) {
+ next if @{$_->{U}};
+ printf "remove %s - age %.1f\n",
+ $_->{F}, $_->{A};
+ if ($enable >= 1) {
+ my $tmp = "$_->{F}.rm";
+ rename $_->{F}, $tmp or die "rename $_->{F} to $tmp: $!\n";
+ do_rm($tmp);
+ }
+ }
+}
+
+open DEBUG, ">/dev/null" or die $!;
+
+while (@ARGV && $ARGV[0] =~ m/^-/) {
+ $_ = shift @ARGV;
+ last if $_ eq '-' || $_ eq '--';
+ if (m/^-[^-]/) {
+ while (m/^-./) {
+ if (s/^-n/-/) { $enable=0; }
+ elsif (s/-r/-/) { $recurse=1; }
+ elsif (s/-D/-/) { $debug++; }
+ elsif (s/-u(\d+)$//) { $unit=$1; }
+ elsif (s/-s(\d+)$//) { $slop=$1; }
+ else { badusage "unknown short option $_" }
+ }
+ } elsif (m/^--rename-only$/) {
+ $enable=1;
+ } elsif (m/^--help$/) {
+ print $usage or die $!;
+ exit 0;
+ } else {
+ badusage "unknown long option $_"
+ }
+}
+
+badusage "too few arguments" unless @ARGV;
+
+if ($debug) {
+ open DEBUG, ">&STDERR" or die $!;
+ DEBUG->autoflush(1);
+}
+
+$slop //= $unit * 0.1;
+
+foreach (@ARGV) {
+ m/^(\d+)x(\d+)$/ or badusage "bad <number>x<interval> $_";
+ push @intervals, { Spec => $&, N => $1, D => $2 };
+}
+
+scan();
+precomp();
+foreach (@intervals) { flag $_ }
+implement();