#! @BASH@ ### ### Backup script ### ### (c) 2012 Mark Wooding ### ###----- Licensing notice --------------------------------------------------- ### ### This file is part of the `rsync-backup' program. ### ### rsync-backup is free software; you can redistribute it and/or modify ### it under the terms of the GNU General Public License as published by ### the Free Software Foundation; either version 2 of the License, or ### (at your option) any later version. ### ### rsync-backup is distributed in the hope that it will be useful, ### but WITHOUT ANY WARRANTY; without even the implied warranty of ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ### GNU General Public License for more details. ### ### You should have received a copy of the GNU General Public License ### along with rsync-backup; if not, write to the Free Software Foundation, ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. set -e thishost=$(hostname -s) quis=${0##*/} . @pkgdatadir@/lib.sh verbose=: dryrun=nil ###-------------------------------------------------------------------------- ### Utility functions. RSYNCOPTS="--verbose" do_rsync () { ## Run rsync(1) in an appropriate manner. Configuration should ovrride ## this or set $RSYNCOPTS if it wants to do something weirder. Arguments ## to this function are passed on to rsync. rsync \ --archive --hard-links --numeric-ids --del \ --sparse --compress \ --one-file-system \ --partial \ $RSYNCOPTS \ --filter="dir-merge .rsync-backup" \ "$@" } log () { case $dryrun in t) echo >&2 " *** $*" ;; nil) now=$(date +"%Y-%m-%d %H:%M:%S %z") echo >&9 "$now $*" ;; esac } maybe () { ## Run CMD, if this isn't a dry run. case $dryrun in t) echo >&2 " +++ $*" ;; nil) "$@" ;; esac } copy () { prefix=$1 ## Copy lines from stdin to stdout, adding PREFIX. while IFS= read -r line; do printf "%s %s\n" "$prefix" "$line" done } run () { tag=$1 cmd=$2; shift 2 ## Run CMD, logging its output in a pleasing manner. case $dryrun in t) echo >&2 " *** RUN $tag" echo >&2 " +++ $cmd $*" rc=0 ;; nil) log "BEGIN $tag" rc=$( { { { ( set +e "$cmd" "$@" 3>&- 4>&- 5>&- 9>&- echo $? >&5; ) | copy "|" >&4; } 2>&1 | copy "*" >&4; } 4>&1 | cat >&9; } 5>&1 $mnt/.lock mount -oremount,ro $mnt" || return $? ## Done. hostpath $mnt } unsnap_ro () { fs=$1 mnt=$2 ## Check that the filesystem still has our lock marker. hostrun "unsnap-ro $mnt" " case \$(cat $mnt/.lock) in rsync-backup) ;; *) echo unlocked by someone else; exit 31 ;; esac mount -oremount,rw $mnt rm $mnt/.lock" || return $? } ## Snapshot using LVM. SNAPSIZE="-l10%ORIGIN" snap_lvm () { vg=$1 lv=$2 ## Make the snapshot. hostrun "snap-lvm $vg/$lv" " lvcreate --snapshot -n$lv.bkp $SNAPSIZE $vg/$lv mkdir -p $SNAPDIR/$lv mount -oro /dev/$vg/$lv.bkp $SNAPDIR/$lv" || return $? ## Done. hostpath $SNAPDIR/$lv } unsnap_lvm () { vg=$1 lv=$2 ## Remove the snapshot. Sometimes LVM doesn't notice that the snapshot is ## no longer in open immdiately, so try several times. hostrun "unsnap-lvm $vg/$lv" " umount $SNAPDIR/$lv rc=1 for i in 1 2 3 4; do if lvremove -f $vg/$lv.bkp; then rc=0; break; fi sleep 2 done exit $rc" || return $? } ## Complicated snapshot using LVM, where the volume group and filesystem are ## owned by different machines, so they need to be synchronized during the ## snapshot. do_rfreezefs () { lvhost=$1 vg=$2 lv=$3 fshost=$4 fsdir=$5 ## Engage in the rfreezefs protocol with the filesystem host. This ## involves some hairy plumbing. We want to get exit statuses out of both ## halves. set +e ssh $fshost rfreezefs $fsdir | { set -e ## Read the codebook from the remote end. ready=nil while read line; do set -- $line case "$1" in PORT) port=$2 ;; TOKEN) eval tok_$2=$3 ;; READY) ready=t; break ;; *) echo >&2 "$quis: unexpected keyword $1 (rfreezefs to $rhost)" exit 1 ;; esac done case $ready in nil) echo >&2 "$quis: unexpected eof (rfreezefs to $rhost)" exit 1 ;; esac ## Connect to the filesystem host's TCP port and get it to freeze its ## filesystem. exec 3<>/dev/tcp/$fshost/$port echo $tok_FREEZE >&3 read tok <&3 case $tok in "$tok_FROZEN") ;; *) echo >&2 "$quis: unexpected token $tok (rfreezefs $fsdir on $fshost)" exit 1 ;; esac ## Get the volume host to create the snapshot. set +e _hostrun >&2 3>&- $userat$lvhost \ "lvcreate --snapshot -n$lv.bkp $SNAPSIZE $vg/$lv" snaprc=$? set -e ## The filesystem can thaw now. echo $tok_THAW >&3 read tok <&3 case $tok in "$tok_THAWED") ;; *) _hostrun >&2 3>&- $userat$lvhost "lvremove -f $vg/$lv.bkp" || : echo >&2 "$quis: unexpected token $tok (rfreezefs $fsdir on $fshost)" exit 1 ;; esac ## Done. exit $snaprc } ## Sift through the wreckage to find out what happened. rc_rfreezefs=${PIPESTATUS[0]} rc_snapshot=${PIPESTATUS[1]} set -e case $rc_rfreezefs:$rc_snapshot in 0:0) ;; 112:*) echo >&2 "$quis: EMERGENCY failed to thaw $fsdir on $fshost!" exit 112 ;; *) echo >&2 "$quis: failed to snapshot $vg/$lv ($fsdir on $fshost)" exit 1 ;; esac ## Mount the snapshot on the volume host. _hostrun >&2 $userat$lvhost " mkdir -p $SNAPDIR/$lv mount -oro /dev/$vg/$lv.bkp $SNAPDIR/$lv" } snap_rfreezefs () { rhost=$1 vg=$2 lv=$3 rfs=$4 set -e run "snap-rfreezefs $host:$vg/$lv $rhost:$rfs" \ do_rfreezefs $host $vg $lv $rhost $rfs || return $? hostpath $SNAPDIR/$lv } unsnap_rfreezefs () { ## Unshapping is the same as for plain LVM. rhost=$1 vg=$2 lv=$3 rfs=$4 unsnap_lvm $vg $lv } ###-------------------------------------------------------------------------- ### Expiry computations. expire () { ## Read dates on stdin; write to stdout `EXPIRE date' for dates which ## should be expired and `RETAIN date' for dates which should be retained. ## Get the current date and convert it into useful forms. now=$(date +%Y-%m-%d) parsedate $now now_jdn=$(julian $now) now_year=$year now_month=$month now_day=$day kept=: ## Work through each date in the input. while read date; do keep=nil ## Convert the date into a useful form. jdn=$(julian $date) parsedate $date ## Work through the policy list. if [ $jdn -le $now_jdn ]; then while read ival age; do ## Decide whether the policy entry applies to this date. apply=nil case $age in forever) apply=t ;; year) if [ $year -eq $now_year ] || ([ $year -eq $(( $now_year - 1 )) ] && [ $month -ge $now_month ]) then apply=t; fi ;; month) if ([ $month -eq $now_month ] && [ $year -eq $now_year ]) || ((([ $month -eq $(( $now_month - 1 )) ] && [ $year -eq $now_year ]) || ([ $month -eq 12 ] && [ $now_month -eq 1 ] && [ $year -eq $(( $now_year - 1 )) ])) && [ $day -ge $now_day ]) then apply=t; fi ;; week) if [ $jdn -ge $(( $now_jdn - 7 )) ]; then apply=t; fi ;; *) echo >&2 "$quis: unknown age symbol \`$age'" exit 1 ;; esac case $apply in nil) continue ;; esac ## Find the interval marker for this date. case $ival in daily) marker=$date ;; weekly) ydn=$(julian $year-01-01) wk=$(( ($jdn - $ydn)/7 + 1 )) marker=$year-w$wk ;; monthly) marker=$year-$month ;; annually | yearly) marker=$year ;; *) echo >&2 "$quis: unknown interval symbol \`$ival'" exit 1 ;; esac ## See if we've alredy retained something in this interval. case $kept in *:"$marker":*) ;; *) keep=t kept=$kept$marker: ;; esac done <new.fshash } local_fshash () { { echo "*** $host $fs $date"; echo fshash -c$STOREDIR/fshash.cache -H$HASH new/ } >$localmap } expire_backups () { { seen=: for i in *-*-*; do i=${i%%.*} case $i in *[!-0-9]*) continue ;; esac case $seen in *:"$i":*) continue ;; esac seen=$seen$i: echo $i done; } | expire | while read op date; do case $op,$dryrun in RETAIN,t) echo >&2 " --- keep $date" ;; EXPIRE,t) echo >&2 " --- delete $date" ;; RETAIN,nil) echo "keep $date" ;; EXPIRE,nil) echo "delete $date" $verbose -n " expire $date..." rm -rf $date $date.* delete_index $host $fs $date $verbose " done" ;; esac done } ## Backup hooks. defhook setup defhook precommit defhook postcommit backup_precommit_hook () { host=$1 fs=$2 date=$3 ## Compatibility: You can override this hook in the configuration file for ## special effects; but it's better to use `addhook precommit'. : } addhook precommit backup_precommit_hook backup_commit_hook () { host=$1 fs=$2 date=$3 ## Compatibility: You can override this hook in the configuration file for ## special effects; but it's better to use `addhook commit'. : } addhook commit backup_commit_hook do_backup () { date=$1 fs=$2 fsarg=$3 ## Back up FS on the current host. set -e attempt=0 ## Run a hook beforehand. set +e; runhook setup $host $fs $date; rc=$?; set -e case $? in 0) ;; 99) log "BACKUP of $host:$fs SKIPPED by hook"; return 0 ;; *) log "BACKUP of $host:$fs FAILED (hook returns $?)"; return $? ;; esac ## Report the start of this attempt. log "START BACKUP of $host:$fs" ## Maybe we need to retry the backup. while :; do ## Create and mount the remote snapshot. case $dryrun in t) maybe snap_$snap $fs $fsarg snapmnt="" ;; nil) snapmnt=$(snap_$snap $snapargs $fs $fsarg) || return $? ;; esac $verbose " create snapshot" ## Build the list of hardlink sources. linkdests="" for i in $host $like; do d=$STOREDIR/$i/$fs/last/ if [ -d $d ]; then linkdests="$linkdests --link-dest=$d"; fi done ## Copy files from the remote snapshot. maybe mkdir -p new/ case $dryrun in t) $verbose " running rsync" ;; nil) $verbose -n " running rsync..." ;; esac set +e run "RSYNC of $host:$fs (snapshot on $snapmnt)" do_rsync \ $linkdests \ $rsyncargs \ $snapmnt/ new/ rc_rsync=$? set -e case $dryrun in nil) $verbose " done" ;; esac ## Collect a map of the snapshot for verification purposes. set +e case $dryrun in t) $verbose " remote fshash" ;; nil) $verbose -n " remote fshash..." ;; esac run "@$host: fshash $fs" remote_fshash rc_fshash=$? set -e case $dryrun in nil) $verbose " done" ;; esac ## Remove the snapshot. maybe unsnap_$snap $snapargs $fs $fsarg $verbose " remove snapshot" ## If we failed to copy, then give up. case $rc_rsync:$rc_fshash in 0:0) ;; 0:*) return $rc_fshash ;; *) return $rc_rsync ;; esac ## Get a matching map of the files received. maybe mkdir -m750 -p $STOREDIR/tmp/ localmap=$STOREDIR/tmp/fshash.$host.$fs.$date case $dryrun in t) $verbose " local fshash" ;; nil) $verbose -n " local fshash..." ;; esac run "local fshash $host:$fs" local_fshash || return $? case $dryrun in nil) $verbose " done" ;; esac ## Compare the two maps. set +e run "compare fshash maps for $host:$fs" diff -u new.fshash $localmap rc_diff=$? set -e case $rc_diff in 0) break ;; 1) if [ $attempt -ge $retry ]; then return $rc; fi $verbose " fshash mismatch; retrying" attempt=$(( $attempt + 1 )) ;; *) return $rc_diff ;; esac done ## Glorious success. maybe rm -f $localmap $verbose " fshash match" ## Commit this backup. case $dryrun in nil) runhook precommit $host $fs $date mv new $date mv new.fshash $date.fshash insert_index $host $fs $date $VOLUME runhook commit $host $fs $date mkdir hack ln -s $date hack/last mv hack/last . rmdir hack ;; esac $verbose " commit" ## Expire old backups. case "${expire_policy+t},${default_policy+t}" in ,t) expire_policy=$default_policy ;; esac case "${expire_policy+t},$dryrun" in t,nil) run "expiry for $host:$fs" expire_backups ;; t,t) expire_backups ;; esac clear_policy=t ## Report success. case $dryrun in t) log "END BACKUP of $host:$fs" ;; nil) log "SUCCESSFUL BACKUP of $host:$fs" ;; esac } run_backup_cmd () { fs=$1 date=$2 cmd=$3; shift 3 ## try_backup FS DATE COMMAND ARGS ... ## ## Run COMMAND ARGS to back up filesystem FS on the current host, ## maintaining a log, and checking whether it worked. The caller has ## usually worked out the DATE in order to set up the filesystem, and we ## need it to name the log file properly. ## Find a name for the log file. In unusual circumstances, we may have ## deleted old logs from today, so just checking for an unused sequence ## number is insufficient. Instead, check all of the logfiles for today, ## and use a sequence number that's larger than any of them. case $dryrun in t) log=/dev/null ;; nil) seq=1 for i in "$logdir/$host/$fs.$date#"*; do tail=${i##*#} case "$tail" in [!1-9]* | *[!0-9]*) continue ;; esac if [ -f "$i" -a $tail -ge $seq ]; then seq=$(( tail + 1 )); fi done log="$logdir/$host/$fs.$date#$seq" ;; esac ## Run the backup command. case $dryrun in nil) mkdir -p $logdir/$host ;; esac if ! "$cmd" "$@" 9>$log 1>&9; then echo >&2 echo >&2 "$quis: backup of $host:$fs FAILED!" bkprc=1 fi ## Clear away any old logfiles. remove_old_logfiles "$logdir/$host/$fs" } backup () { ## backup FS[:ARG] ... ## ## Back up the filesystems on the currently selected host using the ## currently selected snapshot type. ## Make sure that there's a store volume. We must do this here rather than ## in the main body of the script, since the configuration file needs a ## chance to override STOREDIR. if ! [ -r $STOREDIR/.rsync-backup-store ]; then echo >&2 "$quis: no backup volume mounted" exit 15 fi ## Read the volume name if we don't have one already. Again, this allows ## the configuration file to provide a volume name. case "${VOLUME+t}${VOLUME-nil}" in nil) VOLUME=$(cat $METADIR/volume) ;; esac ## Back up each requested file system in turn. for fs in "$@"; do ## Parse the argument. case $fs in *:*) fsarg=${fs#*:} fs=${fs%%:*} ;; *) fsarg="" ;; esac $verbose " filesystem $fs" ## Move to the store directory and set up somewhere to put this backup. cd $STOREDIR case $dryrun in nil) if [ ! -d $host ]; then mkdir -m755 $host chown root:root $host fi if [ ! -d $host/$fs ]; then mkdir -m750 $host/$fs chown root:backup $host/$fs fi ;; esac cd $host/$fs ## Find out if we've already copied this filesystem today. date=$(date +%Y-%m-%d) if [ $dryrun = nil ] && [ -d $date ]; then $verbose " already dumped" continue fi ## Do the backup of this filesystem. run_backup_cmd $fs $date do_backup $date $fs $fsarg done } ###-------------------------------------------------------------------------- ### Configuration functions. defhook start defhook end done_first_host_p=nil host () { host=$1 like= userat= case $done_first_host_p in nil) runhook start; done_first_host_p=t ;; esac case "${expire_policy+t},${default_policy+t}" in t,) default_policy=$expire_policy ;; esac unset expire_policy $verbose "host $host" } snaptype () { snap=$1; shift; snapargs="$*"; retry=0; } rsyncargs () { rsyncargs="$*"; } like () { like="$*"; } retry () { retry="$*"; } user () { userat="$*@"; } retain () { case $clear_policy in t) unset expire_policy; clear_policy=nil ;; esac expire_policy="${expire_policy+$expire_policy }$*" } ###-------------------------------------------------------------------------- ### Read the configuration and we're done. usage () { echo "usage: $quis [-nv] [-c CONF]" } version () { echo "$quis version $VERSION" } whine () { echo >&8 "$@"; } while getopts "hVvc:n" opt; do case "$opt" in h) usage; exit 0 ;; V) version; config; exit 0 ;; v) verbose=whine ;; c) conf=$OPTARG ;; n) dryrun=t ;; *) exit 1 ;; esac done shift $((OPTIND - 1)) case $# in 0) ;; *) usage >&2; exit 1 ;; esac exec 8>&1 . "$conf" runhook end $bkprc case "$bkprc" in 0) $verbose "All backups successful" ;; *) $verbose "Backups FAILED" ;; esac ###----- That's all, folks -------------------------------------------------- exit $bkprc