X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?a=blobdiff_plain;f=scripts%2Fexpire-iso8601;h=1ef843b2ef166bd03fc05a14bec50828a6158b16;hb=2d3da03e893d0c59e236224f049215fab8e81e8e;hp=082d33a75f61a74a8023a15bd03d50e69db214e6;hpb=003f19f0413c6a96865ad785709c3bf590495fa6;p=chiark-utils.git diff --git a/scripts/expire-iso8601 b/scripts/expire-iso8601 index 082d33a..1ef843b 100755 --- a/scripts/expire-iso8601 +++ b/scripts/expire-iso8601 @@ -1,184 +1,211 @@ -#!/bin/bash -set -e - usage () { - cat < +# +# This script and its documentation (if any) are free software; you +# can redistribute it and/or modify them under the terms of the GNU +# General Public License as published by the Free Software Foundation; +# either version 3, or (at your option) any later version. +# +# chiark-named-conf and its manpage are distributed in the hope that +# it will be useful, but WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +# PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, consult the Free Software Foundation's +# website at www.fsf.org, or the GNU Project website at www.gnu.org. + +use strict; + +our $usage = <<'END'; usage: - expire-iso8601 [] - - [ ...] + expire-iso8601 [] x [x ...] options: - -n do not really delete - -r recursive removal (rm -r) + -u is measured in units of seconds + (default is 86400, so is in days) + -s allow kept items to be seconds shorter apart than + specified; default is 10% of + -n do not really delete + -r recursive removal (rm -r) example: - /home/ian/junk/expire-iso8601 86400 10000 1 14 7 4 - uses units of 86400s (1 day) with a slop of 10ks; + /home/ian/junk/expire-iso8601 14x1 4x7 + uses units of 86400s (1 day) with a slop of 8640 it keeps 14 daily items - (that is 14 items, dated no less than 86400-10000s apart) - and 7 weekly items - (that is 7 items, dated no less than 7*86400-10000s apart) + (that is 14 items, dated no less than 86400-8640 apart) + and 4 weekly items + (that is 4 items, dated no less than 7*86400-8640 apart) the 14 daily and 7 weekly items may be the same, or not - There is no need to sort the list of interval/number pairs. + There is no need to sort the list of x pairs. exit status: 0 ok 4 rm failed 8 bad usage 16 catastrophic failure END - } - -trap 'exit 16' 0 -badusage () { echo >&2 "bad usage: $*"; usage >&2; trap '' 0; exit 8; } - -#-------------------- argument parsing -------------------- - -rm=rm -while [ $# -ge 1 ]; do - arg=$1; shift - case "$arg" in - --|-) break ;; - --help) usage; exit 0 ;; - --*) badusage "unknown option $arg" ;; - -*) - case "$arg" in - -n*) rm=: ;; - -r*) recurse=-r ;; - *) badusage "unknown option ${1:0:2}" ;; - esac - arg=-${arg#-?} - if test "x$arg" != x-; then set -- "$arg" "$@"; fi - ;; - *) set "$arg" "$@"; break ;; - esac -done - -[ $# -ge 4 ] || badusage 'too few arguments' - -unit=$1 -slop=$2 -shift;shift - -[ $(($# % 2)) = 0 ] || badusage 'odd keep arguments (need min/extent pairs)' -argl="$*" - -alldigits () { - [ "x${1##*[^0-9]}" = "x$1" ] || badusage "$2 must be all digits" - [ x$1 ] || badusage "$2 must be nonempty" + +use POSIX; +use Carp; +use Data::Dumper; +use Date::Parse; +use DateTime::Format::Strptime; + +our @files; +our $rm = 1; +our $recurse = 1; +our $unit = 86400; +our $slop; +our $debug = 0; +our @intervals; + +sub badusage ($) { + print STDERR "bad usage: $_[0]\n$usage" or die $!; + exit 8; } -while [ $# -gt 0 ]; do - min=$1; shift; extent=$1; shift - alldigits $min min - alldigits $extent extent -done +sub scan () { +# my $strp = DateTime::Format::Strptime->new(); + foreach my $f (<[0-9]*>) { + if ($f !~ m/^ \d\d\d\d - \d\d - \d\d + (?: T \d\d \: \d\d (?: \: \d\d )? + (?: [-+] \d{1,2} \:? \d\d )? )? /x) { + print STDERR "ignoring $f\n"; + } +# my @t = Date::Parse::strptime($f); +# @t = map { $_ // 0 } @t; +# my $t = mktime @t; +# m +# my $t = $strp->parse_datetime($f); +# $t = $t->epoch(); +# my @t = Date::Parse::strptime($f); +#print STDERR Dumper(\@t); +# my $t = mktime(@t); + $!=0; $?=0; my $t = `date -d '$&' +%s`; + die "date(!) failed on $&: $? $!" if $! || $?; + chomp $t or confess; + push @files, { F => $f, T => $t, U => [] }; + } +} -#-------------------- scanning the directory ---------- +sub precomp () { + if (!@files) { + print STDERR "none at all yet!\n"; + exit 0; + } -# We build in $l a list of the relevant filenames and the time_t's -# they represent. -# -# Each entry in $l is $time_t/$filename, and the list is -# newline-separated for the benefit of sort(1). - -ls=0 -for cn in [0-9]*; do - case "$cn" in - ????-??-??) - conv="$cn";; - ????-??-??T[0-2][0-9]+[0-9][0-9][0-9][0-9]|\ - ????-??-??T[0-2][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9]|\ - ????-??-??T[0-2][0-9]:[0-6][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9]) - conv="${cn%T*} ${cn#*T}";; - *) - echo >&2 "ignoring $cn" - continue;; - esac - cs=$(date -d "$conv" +%s) - l="$cs/$cn -$l" -done - -#-------------------- main computation -------------------- - -# We process each minimum/extent pair, to have it select a bunch of -# versions to keep. We annotate entries in $l: if we are keeping -# an entry we prepend a colon. - -# For each minimum/extent pair we look at the list from most recent -# to least recent, -# ie in order of increasing age -# ie in order of decreasing time_t -# and each time we're more than min older than the last item we kept, -# we mark the item to keep, until we have as many as we want. -# -# We build the new list (space-separated) in lnew. + # newest first, which means biggest T + @files = sort { $b->{T} <=> $a->{T} || $b->{F} cmp $a->{F} } @files; + my $newest_t = $files[0]{T}; + $_->{A} = ($newest_t - $_->{T}) / $unit foreach @files; + $slop /= $unit; -l=$(sort -nr <= 2; +} + +sub flag ($) { + my ($int) = @_; + my $n = $int->{N}; + my $d = $int->{D}; + my $dmax = $d + $slop; + my $spec = $int->{Spec}; + my $start_age = ($n-1) * $d - $slop; + my $i = 0; + + print DEBUG "FLAG $spec sa=$start_age dmax=$dmax\n"; + + # find $i, the youngest which is at least $start_age + for (;;) { + print DEBUG "i #$i $files[$i]{A}\n"; + last if $files[$i]{A} >= $start_age; + if ($i == $#files) { + print STDERR "insufficiently old for $spec\n"; + last; + } + $i++; + } + + for (;;) { + push @{ $files[$i]{U} }, $spec; + + # find $j, the closest to $i, preferably no more than $dmax younger + my $j = $i; + for (;;) { + $j--; + # at each point in this loop $j is the next candidate + last if $j < 0; + my $dt = $files[$i]{A} - $files[$j]{A}; + print DEBUG "j #$j $files[$j]{A} dt=$dt\n"; + last if $dt > $dmax; + } + last if $j < 0; + $j++; + if ($j == $i) { + $j--; + print STDERR "insufficiently dense for $spec after $files[$j]{F}\n"; + } + print DEBUG "i #$j\n"; + + $i = $j; + } +} + +sub implement () { + foreach (@files) { + next unless @{$_->{U}}; + printf "keep %s for %s - age %.1f\n", + $_->{F}, "@{$_->{U}}", $_->{A}; + } + foreach (@files) { + next if @{$_->{U}}; + printf "remove %s - age %.1f\n", + $_->{F}, $_->{A}; + if ($rm) { + my $r= system 'rm', ($recurse ? ('-r') : ()), "--", $_->{F}; + die "run rm: $!\n" unless defined($r) && $r >= 0; + exit 12 if $r; + } + } +} + +open DEBUG, ">/dev/null" or die $!; + +while (@ARGV && $ARGV[0] =~ m/^-/) { + $_ = shift @ARGV; + last if $_ eq '-' || $_ eq '--'; + if (m/^-[^-]/) { + while (m/^-./) { + if (s/^-n/-/) { $rm=0; } + elsif (s/-r/-/) { $recurse=1; } + elsif (s/-D/-/) { $debug++; } + elsif (s/-u(\d+)$//) { $unit=$1; } + elsif (s/-s(\d+)$//) { $slop=$1; } + else { badusage "unknown short option $_" } + } + } elsif (m/^--help$/) { + print $usage or die $!; + exit 0; + } else { + badusage "unknown long option $_" + } +} + +badusage "too few arguments" unless @ARGV; + +if ($debug) { + open DEBUG, ">&STDERR" or die $!; + DEBUG->autoflush(1); +} + +$slop //= $unit * 0.1; + +foreach (@ARGV) { + m/^(\d+)x(\d+)$/ or badusage "bad x $_"; + push @intervals, { Spec => $&, N => $1, D => $2 }; +} + +scan(); +precomp(); +foreach (@intervals) { flag $_ } +implement();