#!/bin/bash # usage: # expire-iso8601 # # [ ...] # # eg # /home/ian/junk/expire-iso8601 86400 10000 1 14 7 4 # uses units of 86400s (1 day) with a slop of 10ks; # it keeps daily copies (that is, dated no more than 86400+10000s apart) # for at least 1*14 days, ie the oldest will be at least 86400s*1*14-10000s # older than the very newest # and weekly copies (that is, dated no more than 7*86400+10000s apart) # for at least 7*4 days, ie the oldest will be at least 86400s*7*4-10000s # older than the very newest set -e fail () { echo >&2 "$*"; exit 2; } badusage () { fail "bad usage: $*"; } #-------------------- argument parsing -------------------- [ $# -ge 4 ] || badusage 'too few arguments' unit=$1 slop=$2 shift;shift [ $(($# % 2)) = 0 ] || badusage 'odd keep arguments (need min/extent pairs)' argl="$*" alldigits () { [ "x${1##*[^0-9]}" = "x$1" ] || badusage "$2 must be all digits" [ x$1 ] || badusage "$2 must be nonempty" } while [ $# -gt 0 ]; do min=$1; shift; extent=$1; shift alldigits $min min alldigits $extent extent done #-------------------- scanning the directory ---------- # We build in $l a list of the relevant filenames and the time_t's # they represent. And, while we're at it, we find the most recent # such time_t ($ls) and its name ($ln). # # Each entry in $l is $time_t/$filename, and the list is # newline-separated for the benefit of sort(1). ls=0 for cn in [0-9]*; do echo $cn case "$cn" in ????-??-??) conv="$cn";; ????-??-??T[0-2][0-9]+[0-9][0-9][0-9][0-9]|\ ????-??-??T[0-2][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9]|\ ????-??-??T[0-2][0-9]:[0-6][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9]) conv="${cn%T*} ${cn#*T}";; *) echo >&2 "ignoring $cn" continue;; esac cs=$(date -d "$conv" +%s) if [ $cs -gt $ls ]; then ls=$cs; ln=$cn fi l="$cs/$cn $l" done echo "newest $ln" #-------------------- main computation -------------------- # We go through the items from most to least recent # ie in order of increasing age # ie in order of decreasing time_t # We constantly maintain records of this item (c) and the last two # (b and a). # # We then check to see if any of the specified minimum/extent pairs # mean we should keep c and b. # # We can delete c if b is older than every specified extent. b will # then be the latest version we keep and is old enough. (Note that if # the density isn't satisfied, the expected number of old items may # not be satisfied either; in the worst case, if b is very old, we # might end up with just two items left.) # # If we delete c then we just go on to the next c, which will # definitely be older, so will be deleted too (because b remains # unchanged): ie we then delete all the rest. # # If we don't delete c, we look at the gap between a and c. If this # gap is not too long (according to any of the minimum/extent pairs) # then it is OK to delete b. (A gap is too long if it's longer than a # relevant pair's minimum, but a pair isn't relevant if c is older # than the extent.) If we delete b then current c becomes the new b. # # If we don't delete either then b and c become the new a and b. - because b is clearly sufficient to # satisfy the # if we delete # {l,a,b,c}{s,n,a} = seconds, name of a,b,c where # c is one we're looking at now and # b is previous one # a is one before that # l is last (most recent) # where a, b, c have not been removed as='' an='' bs='' bn='' remove () { echo "expire $1 (have $2)" } l=$(sort -nr <