From e86eb780462cd2f1e6b60f7ab9c75d0bf8336703 Mon Sep 17 00:00:00 2001 From: ianmdlvl Date: Sat, 12 Aug 2006 19:00:57 +0000 Subject: [PATCH] initial expire-iso860 --- debian/changelog | 6 ++ scripts/expire-iso8601 | 168 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100755 scripts/expire-iso8601 diff --git a/debian/changelog b/debian/changelog index 9ef478a..6a24e34 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +chiark-utils (4.1.5) unstable; urgency=low + + * expire-iso8601 - new script. + + -- + chiark-utils (4.1.4) unstable; urgency=low summer bugfix: diff --git a/scripts/expire-iso8601 b/scripts/expire-iso8601 new file mode 100755 index 0000000..55cdd7f --- /dev/null +++ b/scripts/expire-iso8601 @@ -0,0 +1,168 @@ +#!/bin/bash +# usage: +# expire-iso8601 +# +# [ ...] +# +# eg +# /home/ian/junk/expire-iso8601 86400 10000 1 14 7 4 +# uses units of 86400s (1 day) with a slop of 10ks; +# it keeps daily copies (that is, dated no more than 86400+10000s apart) +# for at least 1*14 days, ie the oldest will be at least 86400s*1*14-10000s +# older than the very newest +# and weekly copies (that is, dated no more than 7*86400+10000s apart) +# for at least 7*4 days, ie the oldest will be at least 86400s*7*4-10000s +# older than the very newest + +set -e + +fail () { echo >&2 "$*"; exit 2; } +badusage () { fail "bad usage: $*"; } + +#-------------------- argument parsing -------------------- + +[ $# -ge 4 ] || badusage 'too few arguments' + +unit=$1 +slop=$2 +shift;shift + +[ $(($# % 2)) = 0 ] || badusage 'odd keep arguments (need min/extent pairs)' +argl="$*" + +alldigits () { + [ "x${1##*[^0-9]}" = "x$1" ] || badusage "$2 must be all digits" + [ x$1 ] || badusage "$2 must be nonempty" +} + +while [ $# -gt 0 ]; do + min=$1; shift; extent=$1; shift + alldigits $min min + alldigits $extent extent +done + +#-------------------- scanning the directory ---------- + +# We build in $l a list of the relevant filenames and the time_t's +# they represent. And, while we're at it, we find the most recent +# such time_t ($ls) and its name ($ln). +# +# Each entry in $l is $time_t/$filename, and the list is +# newline-separated for the benefit of sort(1). + +ls=0 +for cn in [0-9]*; do + echo $cn + case "$cn" in + ????-??-??) + conv="$cn";; + ????-??-??T[0-2][0-9]+[0-9][0-9][0-9][0-9]|\ + ????-??-??T[0-2][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9]|\ + ????-??-??T[0-2][0-9]:[0-6][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9]) + conv="${cn%T*} ${cn#*T}";; + *) + echo >&2 "ignoring $cn" + continue;; + esac + cs=$(date -d "$conv" +%s) + if [ $cs -gt $ls ]; then + ls=$cs; ln=$cn + fi + l="$cs/$cn +$l" +done + +echo "newest $ln" + +#-------------------- main computation -------------------- + +# We go through the items from most to least recent +# ie in order of increasing age +# ie in order of decreasing time_t +# We constantly maintain records of this item (c) and the last two +# (b and a). +# +# We then check to see if any of the specified minimum/extent pairs +# mean we should keep c and b. +# +# We can delete c if b is older than every specified extent. b will +# then be the latest version we keep and is old enough. (Note that if +# the density isn't satisfied, the expected number of old items may +# not be satisfied either; in the worst case, if b is very old, we +# might end up with just two items left.) +# +# If we delete c then we just go on to the next c, which will +# definitely be older, so will be deleted too (because b remains +# unchanged): ie we then delete all the rest. +# +# If we don't delete c, we look at the gap between a and c. If this +# gap is not too long (according to any of the minimum/extent pairs) +# then it is OK to delete b. (A gap is too long if it's longer than a +# relevant pair's minimum, but a pair isn't relevant if c is older +# than the extent.) If we delete b then current c becomes the new b. +# +# If we don't delete either then b and c become the new a and b. + +- because b is clearly sufficient to +# satisfy the +# if we delete + +# {l,a,b,c}{s,n,a} = seconds, name of a,b,c where +# c is one we're looking at now and +# b is previous one +# a is one before that +# l is last (most recent) +# where a, b, c have not been removed + +as='' +an='' +bs='' +bn='' + +remove () { + echo "expire $1 (have $2)" +} + +l=$(sort -nr <