#!/bin/bash
-# usage:
-# expire-iso8601 <unit-in-seconds> <slop-in-seconds>
-# <min-interval-in-units> <extent-in-min-intervals>
-# [<min-interval-in-units> <extent-in-min-intervals> ...]
-#
-# eg
-# /home/ian/junk/expire-iso8601 86400 10000 1 14 7 4
-# uses units of 86400s (1 day) with a slop of 10ks;
-# it keeps daily copies (that is, dated no more than 86400+10000s apart)
-# for at least 1*14 days, ie the oldest will be at least 86400s*1*14-10000s
-# older than the very newest
-# and weekly copies (that is, dated no more than 7*86400+10000s apart)
-# for at least 7*4 days, ie the oldest will be at least 86400s*7*4-10000s
-# older than the very newest
-
set -e
+ usage () {
+ cat <<END
+usage:
+ expire-iso8601 [<options>] <number>x<interval> [<number>x<interval> ...]
+options:
+ -u<unitlen> <interval> is measured in units of <unitlen> seconds
+ (default is 86400, so <interval> is in days)
+ -s<slop> allow kept items to be <slop> seconds shorter apart than
+ specified; default is 10% of <unitlen>
+ -n do not really delete
+ -r recursive removal (rm -r)
+example:
+ /home/ian/junk/expire-iso8601 14x1 4x7
+ uses units of 86400s (1 day) with a slop of 8640
+ it keeps 14 daily items
+ (that is 14 items, dated no less than 86400-8640 apart)
+ and 7 weekly items
+ (that is 7 items, dated no less than 7*86400-8640 apart)
+ the 14 daily and 7 weekly items may be the same, or not
+ There is no need to sort the list of <number>x<interval> pairs.
+exit status:
+ 0 ok
+ 4 rm failed
+ 8 bad usage
+ 16 catastrophic failure
+END
+ }
-fail () { echo >&2 "$*"; exit 2; }
-badusage () { fail "bad usage: $*"; }
+# Copyright 2006 Ian Jackson <ian@chiark.greenend.org.uk>
+#
+# This script and its documentation (if any) are free software; you
+# can redistribute it and/or modify them under the terms of the GNU
+# General Public License as published by the Free Software Foundation;
+# either version 3, or (at your option) any later version.
+#
+# chiark-named-conf and its manpage are distributed in the hope that
+# it will be useful, but WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+# PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, consult the Free Software Foundation's
+# website at www.fsf.org, or the GNU Project website at www.gnu.org.
+
+
+trap 'exit 16' 0
+badusage () { echo >&2 "bad usage: $*"; usage >&2; trap '' 0; exit 8; }
#-------------------- argument parsing --------------------
-[ $# -ge 4 ] || badusage 'too few arguments'
-
-unit=$1
-slop=$2
-shift;shift
-
-[ $(($# % 2)) = 0 ] || badusage 'odd keep arguments (need min/extent pairs)'
-argl="$*"
-
alldigits () {
- [ "x${1##*[^0-9]}" = "x$1" ] || badusage "$2 must be all digits"
- [ x$1 ] || badusage "$2 must be nonempty"
+ [ "x${2##*[^0-9]}" = "x$2" ] || \
+ badusage "bad $1 \`$2'; must be all digits"
+ [ "$2" ] || badusage "bad $2; must be nonempty"
+ eval $1='$2'
}
-while [ $# -gt 0 ]; do
- min=$1; shift; extent=$1; shift
- alldigits $min min
- alldigits $extent extent
+rm=rm
+recurse=''
+unit=86400
+slop=''
+
+while [ $# -ge 1 ]; do
+ arg=$1; shift
+ case "$arg" in
+ --|-) break ;;
+ --help) usage; exit 0 ;;
+ --*) badusage "unknown option $arg" ;;
+ -*)
+ val=${arg#-?}
+ case "$arg" in
+ -n*) rm=: ;;
+ -r*) recurse=-r ;;
+ -u*) alldigits unit "$val"; arg='' ;;
+ -s*) alldigits slop "$val"; arg='' ;;
+ *) badusage "unknown option ${1:0:2}" ;;
+ esac
+ arg=-${arg#-?}
+ if test "x$arg" != x-; then set -- "$arg" "$@"; fi
+ ;;
+ *) set "$arg" "$@"; break ;;
+ esac
+done
+
+[ $# -ge 1 ] || badusage 'too few arguments'
+[ "$slop" ] || slop=$(( $unit / 10 ))
+
+for ni in "$@"; do
+ case "$ni" in *x*);; *) badusage "bad <number>x<interval> $ni";; esac
+ alldigits number "${ni%%x*}"
+ alldigits interval "${ni#*x}"
done
#-------------------- scanning the directory ----------
# We build in $l a list of the relevant filenames and the time_t's
-# they represent. And, while we're at it, we find the most recent
-# such time_t ($ls) and its name ($ln).
+# they represent.
#
# Each entry in $l is $time_t/$filename, and the list is
# newline-separated for the benefit of sort(1).
ls=0
for cn in [0-9]*; do
- echo $cn
case "$cn" in
????-??-??)
conv="$cn";;
continue;;
esac
cs=$(date -d "$conv" +%s)
- if [ $cs -gt $ls ]; then
- ls=$cs; ln=$cn
- fi
l="$cs/$cn
$l"
done
-echo "newest $ln"
-
#-------------------- main computation --------------------
-# We go through the items from most to least recent
+# We process each minimum/extent pair, to have it select a bunch of
+# versions to keep. We annotate entries in $l: if we are keeping
+# an entry we prepend a colon; temporarily, if we are keeping an entry
+# because of this particular minimum/extent, we prepend a comma.
+
+# For each minimum/extent pair we look at the list from most recent
+# to least recent,
# ie in order of increasing age
# ie in order of decreasing time_t
-# We constantly maintain records of this item (c) and the last two
-# (b and a).
-#
-# We then check to see if any of the specified minimum/extent pairs
-# mean we should keep c and b.
-#
-# We can delete c if b is older than every specified extent. b will
-# then be the latest version we keep and is old enough. (Note that if
-# the density isn't satisfied, the expected number of old items may
-# not be satisfied either; in the worst case, if b is very old, we
-# might end up with just two items left.)
-#
-# If we delete c then we just go on to the next c, which will
-# definitely be older, so will be deleted too (because b remains
-# unchanged): ie we then delete all the rest.
-#
-# If we don't delete c, we look at the gap between a and c. If this
-# gap is not too long (according to any of the minimum/extent pairs)
-# then it is OK to delete b. (A gap is too long if it's longer than a
-# relevant pair's minimum, but a pair isn't relevant if c is older
-# than the extent.) If we delete b then current c becomes the new b.
+# and each time we're more than min older than the last item we kept,
+# we mark the item to keep, until we have as many as we want.
#
-# If we don't delete either then b and c become the new a and b.
-
-- because b is clearly sufficient to
-# satisfy the
-# if we delete
-
-# {l,a,b,c}{s,n,a} = seconds, name of a,b,c where
-# c is one we're looking at now and
-# b is previous one
-# a is one before that
-# l is last (most recent)
-# where a, b, c have not been removed
-
-as=''
-an=''
-bs=''
-bn=''
-
-remove () {
- echo "expire $1 (have $2)"
-}
+# We build the new list (space-separated) in lnew.
l=$(sort -nr <<END
$l
END
)
-for ce in $l; do
- cs=${ce%%/*}; cn=${ce#*/}
- set $argl
- c_mightneed=false
- b_needfordensity=false
- if test "$as"; then
- ac_interval=$(( $as - $cs ))
- fi
- while [ $# != 0 ]; do
- min=$(( $1 * $unit + $slop ))
- extent=$(( $1 * $2 * $unit - $slop ))
- # if b is as old as required by anything
- # then c is definitely surplus
- if ! [ "$bs" ] || \
- [ $(($ls - $bs)) -le $extent ]; then
- c_mightneed=true
- fi
- if ! test "$as" || \
- [ $(($ls - $as)) -le $extent -a \
- $ac_interval -gt $min ]; then
- b_needfordensity=true
- fi
- shift;shift
+for ni in "$@"; do
+ wantcount=${ni%x*}
+
+ div=1
+
+ while true; do
+ min=$(( (${ni#*x} * $unit) / $div - $slop ))
+
+ ls=''
+ lnew=''
+ skipped=0
+ for ce in $l; do
+ cn=${ce#*/}; cl=${ce%%/*}
+ cs=${cl#,}; cs=${cs#:}
+ case $cl in ,*) ls=$cs; continue;; esac
+ if [ $wantcount != 0 ]; then
+ if ! [ "$ls" ] || \
+ [ $(( $ls - $cs )) -ge $min ]; then
+ echo "keep (for $ni) $cn"
+ ce=,$ce
+ ls=$cs
+ wantcount=$(( $wantcount - 1 ))
+ else
+ skipped=$(( $skipped+1 ))
+ fi
+ fi
+ lnew="$lnew $ce"
+ done
+ l=$lnew
+
+ if [ $wantcount = 0 ]; then break; fi
+ printf "%s" "insufficient (for $ni) by $wantcount"
+ if [ $skipped = 0 ]; then echo; break; fi
+ div=$(( $div * 2 ))
+ echo " shortening interval ${div}x"
+ done
+
+ # s/([,:]+).*/:\1/g
+ lnew=''
+ for ce in $l; do
+ case $ce in ,*) ce=:${ce#,};; esac
+ case $ce in ::*) ce=${ce#:};; esac
+ lnew="$lnew $ce"
done
- if ! $c_mightneed; then
- remove $cn "$bn"
- continue
- fi
- if ! $b_needfordensity; then
- remove $bn "$an $cn"
- bn=$cn; bs=$cs
- continue
- fi
- an=$bn; as=$bs
- bn=$cn; bs=$cs
+ l=$lnew
+done
+
+#-------------------- execution --------------------
+
+trap '' 0
+exitstatus=0
+
+nonbroken_echo () { (echo "$@"); }
+# While we have subprocesses, we have to avoid bash calling write(1,...)
+# because of a bug in bash (Debian #382798), so we arrange for a subshell
+# for each echo.
+
+jobs=''
+for ce in $l; do
+ case $ce in
+ :*);;
+ *)
+ cn=${ce#*/}
+ nonbroken_echo "expire $cn"
+ $rm $recurse -- $cn &
+ jobs="$jobs $!"
+ ;;
+ esac
done
-exit 0
+if [ "$jobs" ]; then
+ nonbroken_echo "all running"
+fi
+
+for job in $jobs; do
+ wait $job || exitstatus=4
+done
+
+if [ $exitstatus = 0 ]; then
+ echo "complete"
+else
+ echo "complete, but problems deleting"
+fi
+
+exit $exitstatus