From: Ian Jackson Date: Wed, 19 Aug 2020 21:32:58 +0000 (+0100) Subject: expire-iso8601: Replace with new implementation X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?a=commitdiff_plain;h=dc3f1537680f65836c351979726ef5ed1e47df5a;p=chiark-utils.git expire-iso8601: Replace with new implementation This fixes the design problem and is also much more maintainable and comprehensible. Closes: #862897 Signed-off-by: Ian Jackson --- diff --git a/scripts/expire-iso8601 b/scripts/expire-iso8601 index 7600678..552b258 100755 --- a/scripts/expire-iso8601 +++ b/scripts/expire-iso8601 @@ -1,7 +1,24 @@ -#!/bin/bash -set -e - usage () { - cat < +# +# This script and its documentation (if any) are free software; you +# can redistribute it and/or modify them under the terms of the GNU +# General Public License as published by the Free Software Foundation; +# either version 3, or (at your option) any later version. +# +# chiark-named-conf and its manpage are distributed in the hope that +# it will be useful, but WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +# PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, consult the Free Software Foundation's +# website at www.fsf.org, or the GNU Project website at www.gnu.org. + +use strict; + +our $usage = <<'END'; usage: expire-iso8601 [] x [x ...] options: @@ -26,202 +43,169 @@ exit status: 8 bad usage 16 catastrophic failure END - } -# Copyright 2006 Ian Jackson -# -# This script and its documentation (if any) are free software; you -# can redistribute it and/or modify them under the terms of the GNU -# General Public License as published by the Free Software Foundation; -# either version 3, or (at your option) any later version. -# -# chiark-named-conf and its manpage are distributed in the hope that -# it will be useful, but WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -# PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, consult the Free Software Foundation's -# website at www.fsf.org, or the GNU Project website at www.gnu.org. +use POSIX; +use Carp; +use Data::Dumper; +use Date::Parse; +use DateTime::Format::Strptime; + +our @files; +our $rm = 1; +our $recurse = 1; +our $unit = 86400; +our $slop; +our $debug = 0; +our @intervals; + +sub badusage ($) { + print STDERR "bad usage: $_[0]\n$usage" or die $!; + exit 8; +} + +sub scan () { +# my $strp = DateTime::Format::Strptime->new(); + foreach my $f (<[0-9]*>) { + if ($f !~ m/^ \d\d\d\d - \d\d - \d\d + (?: T \d\d \: \d\d (?: \: \d\d )? + (?: [-+] \d{1,2} \:? \d\d )? )? /x) { + print STDERR "ignoring $f\n"; + } + my @t = Date::Parse::strptime($f); + @t = map { $_ // 0 } @t; + my $t = mktime @t; +# m +# my $t = $strp->parse_datetime($f); +# $t = $t->epoch(); +# my @t = Date::Parse::strptime($f); +#print STDERR Dumper(\@t); +# my $t = mktime(@t); +# $!=0; $?=0; my $t = `date -d '$&' +%s`; +# die "date(!) failed on $&: $? $!" if $! || $?; +# chomp $t or confess; + push @files, { F => $f, T => $t, U => [] }; + } +} +sub precomp () { + if (!@files) { + print STDERR "none at all yet!\n"; + exit 0; + } -trap 'exit 16' 0 -badusage () { echo >&2 "bad usage: $*"; usage >&2; trap '' 0; exit 8; } + # newest first, which means biggest T + @files = sort { $b->{T} <=> $a->{T} || $b->{F} cmp $a->{F} } @files; + my $newest_t = $files[0]{T}; + $_->{A} = ($newest_t - $_->{T}) / $unit foreach @files; + $slop /= $unit; -#-------------------- argument parsing -------------------- + push @{$files[0]{U}}, "newest"; -alldigits () { - [ "x${2##*[^0-9]}" = "x$2" ] || \ - badusage "bad $1 \`$2'; must be all digits" - [ "$2" ] || badusage "bad $2; must be nonempty" - eval $1='$2' + print DEBUG Dumper(scalar(@files), \@files, \@intervals) if $debug >= 2; } -rm=rm -recurse='' -unit=86400 -slop='' - -while [ $# -ge 1 ]; do - arg=$1; shift - case "$arg" in - --|-) break ;; - --help) usage; exit 0 ;; - --*) badusage "unknown option $arg" ;; - -*) - val=${arg#-?} - case "$arg" in - -n*) rm=: ;; - -r*) recurse=-r ;; - -u*) alldigits unit "$val"; arg='' ;; - -s*) alldigits slop "$val"; arg='' ;; - *) badusage "unknown option ${1:0:2}" ;; - esac - arg=-${arg#-?} - if test "x$arg" != x-; then set -- "$arg" "$@"; fi - ;; - *) set "$arg" "$@"; break ;; - esac -done - -[ $# -ge 1 ] || badusage 'too few arguments' -[ "$slop" ] || slop=$(( $unit / 10 )) - -for ni in "$@"; do - case "$ni" in *x*);; *) badusage "bad x $ni";; esac - alldigits number "${ni%%x*}" - alldigits interval "${ni#*x}" -done - -#-------------------- scanning the directory ---------- - -# We build in $l a list of the relevant filenames and the time_t's -# they represent. -# -# Each entry in $l is $time_t/$filename, and the list is -# newline-separated for the benefit of sort(1). - -ls=0 -for cn in [0-9]*; do - case "$cn" in - ????-??-??) - conv="$cn";; - ????-??-??T[0-2][0-9]+[0-9][0-9][0-9][0-9]|\ - ????-??-??T[0-2][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9]|\ - ????-??-??T[0-2][0-9]:[0-6][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9]) - conv="${cn%T*} ${cn#*T}";; - *) - echo >&2 "ignoring $cn" - continue;; - esac - cs=$(date -d "$conv" +%s) - l="$cs/$cn -$l" -done - -#-------------------- main computation -------------------- - -# We process each minimum/extent pair, to have it select a bunch of -# versions to keep. We annotate entries in $l: if we are keeping -# an entry we prepend a colon; temporarily, if we are keeping an entry -# because of this particular minimum/extent, we prepend a comma. - -# For each minimum/extent pair we look at the list from most recent -# to least recent, -# ie in order of increasing age -# ie in order of decreasing time_t -# and each time we're more than min older than the last item we kept, -# we mark the item to keep, until we have as many as we want. -# -# We build the new list (space-separated) in lnew. +sub flag ($) { + my ($int) = @_; + my $n = $int->{N}; + my $d = $int->{D}; + my $dmax = $d + $slop; + my $spec = $int->{Spec}; + my $start_age = ($n-1) * $d - $slop; + my $i = 0; + + print DEBUG "FLAG $spec sa=$start_age dmax=$dmax\n"; + + # find $i, the youngest which is at least $start_age + for (;;) { + print DEBUG "i #$i $files[$i]{A}\n"; + last if $files[$i]{A} >= $start_age; + if ($i == $#files) { + print STDERR "insufficiently old for $spec\n"; + last; + } + $i++; + } + + for (;;) { + push @{ $files[$i]{U} }, $spec; + + # find $j, the closest to $i, preferably no more than $dmax younger + my $j = $i; + for (;;) { + $j--; + # at each point in this loop $j is the next candidate + last if $j < 0; + my $dt = $files[$i]{A} - $files[$j]{A}; + print DEBUG "j #$j $files[$j]{A} dt=$dt\n"; + last if $dt > $dmax; + } + last if $j < 0; + $j++; + if ($j == $i) { + $j--; + print STDERR "insufficiently dense for $spec after $files[$j]{F}\n"; + } + print DEBUG "i #$j\n"; + + $i = $j; + } +} -l=$(sort -nr <{U}}; + printf "keep %s for %s - age %.1f\n", + $_->{F}, "@{$_->{U}}", $_->{A}; + } + foreach (@files) { + next if @{$_->{U}}; + printf "remove %s - age %.1f\n", + $_->{F}, $_->{A}; + if ($rm) { + my $r= system 'rm', ($recurse ? ('-r') : ()), "--", $_->{F}; + die "run rm: $!\n" unless defined($r) && $r >= 0; + exit 12 if $r; + } + } +} + +open DEBUG, ">/dev/null" or die $!; + +while (@ARGV && $ARGV[0] =~ m/^-/) { + $_ = shift @ARGV; + last if $_ eq '-' || $_ eq '--'; + if (m/^-[^-]/) { + while (m/^-./) { + if (s/^-n/-/) { $rm=0; } + elsif (s/-r/-/) { $recurse=1; } + elsif (s/-D/-/) { $debug++; } + elsif (s/-u(\d+)$//) { $unit=$1; } + elsif (s/-s(\d+)$//) { $slop=$1; } + else { badusage "unknown short option $_" } + } + } elsif (m/^--help$/) { + print $usage or die $!; + exit 0; + } else { + badusage "unknown long option $_" + } +} + +badusage "too few arguments" unless @ARGV; + +if ($debug) { + open DEBUG, ">&STDERR" or die $!; + DEBUG->autoflush(1); +} + +$slop //= $unit * 0.1; + +foreach (@ARGV) { + m/^(\d+)x(\d+)$/ or badusage "bad x $_"; + push @intervals, { Spec => $&, N => $1, D => $2 }; +} + +scan(); +precomp(); +foreach (@intervals) { flag $_ } +implement(); diff --git a/scripts/expire-iso8601.new b/scripts/expire-iso8601.new deleted file mode 100755 index 552b258..0000000 --- a/scripts/expire-iso8601.new +++ /dev/null @@ -1,211 +0,0 @@ -#!/usr/bin/perl -w -# -# Copyright 2006 Ian Jackson -# -# This script and its documentation (if any) are free software; you -# can redistribute it and/or modify them under the terms of the GNU -# General Public License as published by the Free Software Foundation; -# either version 3, or (at your option) any later version. -# -# chiark-named-conf and its manpage are distributed in the hope that -# it will be useful, but WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -# PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, consult the Free Software Foundation's -# website at www.fsf.org, or the GNU Project website at www.gnu.org. - -use strict; - -our $usage = <<'END'; -usage: - expire-iso8601 [] x [x ...] -options: - -u is measured in units of seconds - (default is 86400, so is in days) - -s allow kept items to be seconds shorter apart than - specified; default is 10% of - -n do not really delete - -r recursive removal (rm -r) -example: - /home/ian/junk/expire-iso8601 14x1 4x7 - uses units of 86400s (1 day) with a slop of 8640 - it keeps 14 daily items - (that is 14 items, dated no less than 86400-8640 apart) - and 4 weekly items - (that is 4 items, dated no less than 7*86400-8640 apart) - the 14 daily and 7 weekly items may be the same, or not - There is no need to sort the list of x pairs. -exit status: - 0 ok - 4 rm failed - 8 bad usage - 16 catastrophic failure -END - -use POSIX; -use Carp; -use Data::Dumper; -use Date::Parse; -use DateTime::Format::Strptime; - -our @files; -our $rm = 1; -our $recurse = 1; -our $unit = 86400; -our $slop; -our $debug = 0; -our @intervals; - -sub badusage ($) { - print STDERR "bad usage: $_[0]\n$usage" or die $!; - exit 8; -} - -sub scan () { -# my $strp = DateTime::Format::Strptime->new(); - foreach my $f (<[0-9]*>) { - if ($f !~ m/^ \d\d\d\d - \d\d - \d\d - (?: T \d\d \: \d\d (?: \: \d\d )? - (?: [-+] \d{1,2} \:? \d\d )? )? /x) { - print STDERR "ignoring $f\n"; - } - my @t = Date::Parse::strptime($f); - @t = map { $_ // 0 } @t; - my $t = mktime @t; -# m -# my $t = $strp->parse_datetime($f); -# $t = $t->epoch(); -# my @t = Date::Parse::strptime($f); -#print STDERR Dumper(\@t); -# my $t = mktime(@t); -# $!=0; $?=0; my $t = `date -d '$&' +%s`; -# die "date(!) failed on $&: $? $!" if $! || $?; -# chomp $t or confess; - push @files, { F => $f, T => $t, U => [] }; - } -} - -sub precomp () { - if (!@files) { - print STDERR "none at all yet!\n"; - exit 0; - } - - # newest first, which means biggest T - @files = sort { $b->{T} <=> $a->{T} || $b->{F} cmp $a->{F} } @files; - my $newest_t = $files[0]{T}; - $_->{A} = ($newest_t - $_->{T}) / $unit foreach @files; - $slop /= $unit; - - push @{$files[0]{U}}, "newest"; - - print DEBUG Dumper(scalar(@files), \@files, \@intervals) if $debug >= 2; -} - -sub flag ($) { - my ($int) = @_; - my $n = $int->{N}; - my $d = $int->{D}; - my $dmax = $d + $slop; - my $spec = $int->{Spec}; - my $start_age = ($n-1) * $d - $slop; - my $i = 0; - - print DEBUG "FLAG $spec sa=$start_age dmax=$dmax\n"; - - # find $i, the youngest which is at least $start_age - for (;;) { - print DEBUG "i #$i $files[$i]{A}\n"; - last if $files[$i]{A} >= $start_age; - if ($i == $#files) { - print STDERR "insufficiently old for $spec\n"; - last; - } - $i++; - } - - for (;;) { - push @{ $files[$i]{U} }, $spec; - - # find $j, the closest to $i, preferably no more than $dmax younger - my $j = $i; - for (;;) { - $j--; - # at each point in this loop $j is the next candidate - last if $j < 0; - my $dt = $files[$i]{A} - $files[$j]{A}; - print DEBUG "j #$j $files[$j]{A} dt=$dt\n"; - last if $dt > $dmax; - } - last if $j < 0; - $j++; - if ($j == $i) { - $j--; - print STDERR "insufficiently dense for $spec after $files[$j]{F}\n"; - } - print DEBUG "i #$j\n"; - - $i = $j; - } -} - -sub implement () { - foreach (@files) { - next unless @{$_->{U}}; - printf "keep %s for %s - age %.1f\n", - $_->{F}, "@{$_->{U}}", $_->{A}; - } - foreach (@files) { - next if @{$_->{U}}; - printf "remove %s - age %.1f\n", - $_->{F}, $_->{A}; - if ($rm) { - my $r= system 'rm', ($recurse ? ('-r') : ()), "--", $_->{F}; - die "run rm: $!\n" unless defined($r) && $r >= 0; - exit 12 if $r; - } - } -} - -open DEBUG, ">/dev/null" or die $!; - -while (@ARGV && $ARGV[0] =~ m/^-/) { - $_ = shift @ARGV; - last if $_ eq '-' || $_ eq '--'; - if (m/^-[^-]/) { - while (m/^-./) { - if (s/^-n/-/) { $rm=0; } - elsif (s/-r/-/) { $recurse=1; } - elsif (s/-D/-/) { $debug++; } - elsif (s/-u(\d+)$//) { $unit=$1; } - elsif (s/-s(\d+)$//) { $slop=$1; } - else { badusage "unknown short option $_" } - } - } elsif (m/^--help$/) { - print $usage or die $!; - exit 0; - } else { - badusage "unknown long option $_" - } -} - -badusage "too few arguments" unless @ARGV; - -if ($debug) { - open DEBUG, ">&STDERR" or die $!; - DEBUG->autoflush(1); -} - -$slop //= $unit * 0.1; - -foreach (@ARGV) { - m/^(\d+)x(\d+)$/ or badusage "bad x $_"; - push @intervals, { Spec => $&, N => $1, D => $2 }; -} - -scan(); -precomp(); -foreach (@intervals) { flag $_ } -implement();