chiark - git - mdw - distorted-backup/blob - lvm-rmsnap.in

   1 #! @PERL@
   2 ### -*-perl-*-
   3 ###
   4 ### Remove an LVM snapshot, without falling foul of LVM bugs
   5 ###
   6 ### (c) 2011 Mark Wooding
   7 ###
   8
   9 ###----- Licensing notice ---------------------------------------------------
  10 ###
  11 ### This program is free software; you can redistribute it and/or modify
  12 ### it under the terms of the GNU General Public License as published by
  13 ### the Free Software Foundation; either version 2 of the License, or
  14 ### (at your option) any later version.
  15 ###
  16 ### This program is distributed in the hope that it will be useful,
  17 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 ### GNU General Public License for more details.
  20 ###
  21 ### You should have received a copy of the GNU General Public License
  22 ### along with this program; if not, write to the Free Software Foundation,
  23 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  24
  25 use Cwd qw(realpath);
  26 use Errno qw(:POSIX);
  27 use Fcntl qw(:mode);
  28 use File::stat;
  29 use Getopt::Long qw(:config gnu_compat bundling no_ignore_case);
  30 use IO::Handle;
  31 use Time::HiRes qw(time);
  32
  33 our $VERSION = "@VERSION@";
  34
  35 ###--------------------------------------------------------------------------
  36 ### Utilities.
  37
  38 ## Error handling and reporting.
  39 (our $QUIS = $0) =~ s:^.*/::;
  40 our $DEBUG = 0;
  41 sub whine ($) { my ($msg) = @_; print STDERR "$QUIS: $msg\n"; }
  42 sub burble ($) { my ($msg) = @_; whine $msg if $DEBUG; }
  43 sub fail ($) { my ($msg) = @_; whine $msg; exit $! || ($? >> 8) || 255; }
  44
  45 ## Cleanups.  Call `cleanup BLOCK' to arrange to have BLOCK executed at the
  46 ## end of the program.
  47 our @CLEANUP = ();
  48 sub runcleanups { for my $f (@CLEANUP) { &$f } }
  49 END { runcleanups; }
  50 $SIG{INT} = $SIG{TERM} = sub {
  51   my $sig = shift;
  52   runcleanups;
  53   $SIG{$sig} = 'DEFAULT';
  54   kill $sig => $$;
  55 };
  56 sub cleanup (&) { unshift @CLEANUP, $_[0]; }
  57
  58 sub fixint ($) { my ($x) = @_; return $x =~ /^0/ ? oct $x : $x + 0; }
  59
  60 ###--------------------------------------------------------------------------
  61 ### Device fiddling.
  62
  63 sub devsys ($) {
  64   ## devsys DEV
  65   ##
  66   ## Return a sysfs path for a device DEV.
  67
  68   my ($dev) = @_;
  69   my $st = stat $dev or fail "stat ($dev): $!";
  70   my $kind;
  71   if (S_ISBLK($st->mode)) { $kind = "block"; }
  72   elsif (S_ISCHR($st->mode)) { $kind = "char"; }
  73   else { fail "$dev is not a device"; }
  74   my ($maj, $min) = (($st->rdev >> 8) & 0xff, $st->rdev & 0xff);
  75   (my $whole = realpath "/sys/dev/$kind/$maj:$min") =~ s:^/sys/:/:;
  76   return $whole;
  77 }
  78
  79 our %DMTAB = ();
  80
  81 sub dmtable_update () {
  82   ## dmtable_update
  83   ##
  84   ## Update the device-mapper table in %DMTAB.
  85
  86   burble "re-read device-mapper table";
  87   %DMTAB = ();
  88   open my $dt, "-|", "dmsetup", "table" or fail "open (dm table): $!";
  89   while (my $line = $dt->getline) {
  90     my ($dev, $rest) = split /[:\s]+/, $line, 2;
  91     push @{$DMTAB{$dev}}, [split ' ', $rest];
  92   }
  93   close $dt or fail "dmsetup table failed (rc = $?)";
  94 }
  95
  96 sub dmname ($) {
  97   ## dmname SYSPATH
  98   ##
  99   ## Return the device-mapper node name for the sysfs path SYSPATH.
 100
 101   my ($sys) = @_;
 102   open my $f, "<", "/sys$sys/dm/name" or fail "open ($sys/dm/name): $!";
 103   chomp (my $name = $f->getline);
 104   close $f;
 105   return $name;
 106 }
 107
 108 ###--------------------------------------------------------------------------
 109 ### I/O utilities.
 110
 111 sub sel ($;$$$) {
 112   ## sel TIMEOUT, [READS, WRITES, EXCEPTIONS]
 113   ##
 114   ## Wait for at most TIMEOUT seconds (indefinitely if TIMEOUT is `undef').
 115   ## Each of READS, WRITES and EXCEPTIONS is a listref containing FILE => SUB
 116   ## pairs: if the FILE is readable (writable, has an exceptional condition)
 117   ## then the SUB is invoked.
 118
 119   my ($t, $r, $w, $x) = @_;
 120   my ($vr, $vw, $vx);
 121   my (%r, %w, %x);
 122
 123   ## Read the arguments and build a data structure.
 124   for my $i ([$r, \$vr, \%r], [$w, \$vw, \%w], [$x, \$vx, \%x]) {
 125     my ($a, $v, $h) = @$i;
 126     next unless $a;
 127     my @a = @$a;
 128     while (@a) {
 129       my ($f, $g) = splice @a, 0, 2;
 130       my $fd = $f->fileno;
 131       $h->{$fd} = $g;
 132       vec($$v, $fd, 1) = 1;
 133     }
 134   }
 135
 136   ## Do the wait and sift through the results.
 137   defined select $vr, $vw, $vx, $t or fail "select: $!";
 138   for my $i ([$vr, \%r], [$vw, \%w], [$vx, \%x]) {
 139     my ($v, $h) = @$i;
 140     while (my ($f, $g) = each %$h) {
 141       if (vec $v, $f, 1) { &$g; }
 142     }
 143   }
 144 }
 145
 146 sub doread ($;$) {
 147   ## doread FILE, [LEN]
 148   ##
 149   ## Read LEN bytes (or a default amount) from FILE.  If the file ends,
 150   ## return undef.  If reading would block then return an empty string.
 151   ## Otherwise return he stuff.
 152
 153   my ($f, $n) = @_;
 154   $n = sysread $f, my $buf, $n // 4096;
 155   if (!defined $n) { return "" if $! == EAGAIN; fail "read: $!"; }
 156   elsif (!$n) { return undef; }
 157   else { return $buf; }
 158 }
 159
 160 sub run ($$@) {
 161   ## run WHAT, PROG, ARGS...
 162   ##
 163   ## Run PROG, passing it ARGS.  Fails if PROG exits nonzero.
 164
 165   my ($what, $prog, @args) = @_;
 166   system($prog, @args) == 0 or fail "$prog ($what) failed (rc = $?)";
 167 }
 168
 169 sub capture ($@) {
 170   ## capture PROG, ARGS...
 171   ##
 172   ## Run PROG, passing it ARGS.  Returns exit status, stdout, and stderr, as
 173   ## strings.
 174
 175   my ($prog, @args) = @_;
 176   my ($out, $err) = ("", "");
 177   my ($outpipe_in, $outpipe_out, $errpipe_in, $errpipe_out);
 178   pipe $outpipe_in, $outpipe_out or fail "pipe ($prog out): $!";
 179   pipe $errpipe_in, $errpipe_out or fail "pipe ($prog err): $!";
 180   defined (my $kid = fork) or fail "fork ($prog): $!";
 181   if ($kid == 0) {
 182     close $outpipe_in
 183       and close $errpipe_in
 184       and open STDOUT, ">&", $outpipe_out
 185       and open STDERR, ">&", $errpipe_out
 186       and exec $prog, @args
 187       or fail "exec $prog: $!";
 188   }
 189   close $outpipe_out;
 190   close $errpipe_out;
 191   for (;;) {
 192     my @r = ();
 193     for my $i ([\$outpipe_in, \$out, "out"],
 194                [\$errpipe_in, \$err, "err"]) {
 195       my ($p, $b, $w) = @$i;
 196       push @r, $$p => sub {
 197         my $buf = doread $$p;
 198         if (defined $buf) { $$b .= $buf; }
 199         else { close $$p; $$p = undef; }
 200       } if $$p;
 201     }
 202     last unless @r;
 203     sel undef, \@r;
 204   }
 205   waitpid $kid, 0 or fail "waitpid ($prog): $!";
 206   return $?, $out, $err;
 207 }
 208
 209 ###--------------------------------------------------------------------------
 210 ### Monitoring udev events.
 211
 212 sub umon_create (@) {
 213   ## umon_create ARGS...
 214   ##
 215   ## Create a udev monitor, with the given `udevadm monitor' arguments, and
 216   ## return an object.  We always select only kernel events.  We try to wait
 217   ## for the monitor to start up before returning.  Don't trust this: use
 218   ## `umon_sync' anyway.
 219
 220   my @args = @_;
 221   my $u = {};
 222
 223   ## Start the monitor process.
 224   $u->{KID} = open($u->{PIPE}, "-|",
 225                    "stdbuf", "-o0",
 226                    "udevadm", "monitor", "--kernel", "--property", @args)
 227     or fail "open (umon): $!";
 228   cleanup { kill 9, $u->{KID} };
 229   $u->{PIPE}->blocking(0) or fail "set non-blocking (umon): $!";
 230
 231   ## Wait for the end of the preamble, indicated by the first blank line.
 232   ## From observation with strace(1), this means that the monitor has
 233   ## successfully attached itself to its netlink socket and is ready to fetch
 234   ## events.
 235   my $ok = 0;
 236   my $buf = "";
 237   my $now = time;
 238   my $end = $now + 5;
 239   while (!$ok) {
 240     sel
 241       $end - $now,
 242       [ $u->{PIPE} => sub {
 243           defined (my $b = doread $u->{PIPE}) or fail "read (umon): eof";
 244           $buf .= $b;
 245           if ($buf =~ /\n\n(.*)$/) { $ok = 1; $buf = $1; }
 246         }
 247       ];
 248     $now = time;
 249     if ($now >= $end) { fail "umon timeout"; }
 250   }
 251   $u->{BUF} = $buf;
 252
 253   ## Done.
 254   return $u;
 255 }
 256
 257 sub umon_read ($) {
 258   ## umon_read UMON
 259   ##
 260   ## Read events from UMON, as a list of hash references mapping properties
 261   ## to their values.
 262
 263   my ($u) = @_;
 264   my @s = ();
 265   for (;;) {
 266     defined (my $buf = doread $u->{PIPE}) or fail "read (umon): end of file";
 267     $buf eq "" and last;
 268     $buf = $u->{BUF} . $buf;
 269     my @r = split /\n\n/, $buf, -1;
 270     $u->{BUF} = pop @r;
 271     for my $r (@r) {
 272       push @s, { map { /^(\w+)=(.*)$/ } split /\n/, $r };
 273     }
 274   }
 275   return @s;
 276 }
 277
 278 sub umon_sync ($$) {
 279   ## umon_sync UMON, DEV
 280   ##
 281   ## Wait for UMON to report an event about the device DEV (without its
 282   ## `/dev/' prefix), triggering periodically just in case it missed one.
 283   ## This is useful for synchronizing.  Returns the list of events which
 284   ## weren't interesting.
 285
 286   my ($u, $dev) = @_;
 287   my $now = time;
 288   my $retry = 0;
 289   my $done = 0;
 290   my @ev = ();
 291   burble "sync with udev";
 292
 293   until ($done) {
 294
 295     ## Too late.  Trigger a change event and try again.
 296     if ($now >= $retry) {
 297       $retry = $now + 2;
 298       run "trigger $dev", "udevadm", "trigger", "--sysname-match=$dev";
 299     }
 300
 301     ## Now read events and see what happens.
 302     sel
 303       $retry - $now,
 304       [ $u->{PIPE} => sub {
 305           my @e = umon_read $u;
 306           while (@e) {
 307             my $e = shift @e;
 308             if ($e->{DEVNAME} eq $dev) { $done = 1; push @ev, @e; last; }
 309             else { push @ev, $e; }
 310           }
 311         }
 312       ];
 313     $now = time;
 314   }
 315
 316   return @ev;
 317 }
 318
 319 ###--------------------------------------------------------------------------
 320 ### Main code.
 321
 322 ## Parse the command line.
 323 our $USAGE = "usage: $QUIS VGNAME/LVNAME";
 324 sub version { print "$QUIS, version $VERSION\n"; }
 325 sub help {
 326   print <<EOF;
 327 $USAGE
 328
 329 Options:
 330   -h, --help            Show this help text.
 331   -v, --version         Show the program version number.
 332   -d, --debug           Show debugging information.
 333   -n, --no-act          Don't take corrective actions.
 334 EOF
 335 }
 336
 337 our $NOACT = 0;
 338 GetOptions('help|h|?'           => sub { version; help; exit; },
 339            'version|v'          => sub { version; exit; },
 340            'debug|d'            => \$DEBUG,
 341            'noact|n'            => \$NOACT)
 342   and @ARGV == 1
 343   and @ARGV[0] =~ m:(.+)/(.+):
 344   or do { print STDERR $USAGE, "\n"; exit 1; };
 345 our ($VG, $LV) = ($1, $2);
 346
 347 ## Check that the volume in question actually exists, and is a device-mapper
 348 ## device, before we wheel out the big guns.
 349 dmtable_update;
 350 our $SYS = devsys "/dev/$VG/$LV";
 351 burble "sysfs name is $SYS";
 352 my $t = $DMTAB{dmname $SYS}
 353   or fail "/dev/$VG/$LV isn't a device-mapper device";
 354 if ($DEBUG) {
 355   burble "found table...";
 356   burble "\t" . join " ", @$_ foreach @$t;
 357 }
 358 $t->[0][2] eq "snapshot" or fail "/dev/$VG/$LV isn't a snapshot";
 359
 360 ## Create a udev monitor.  We're only interested in disk-shaped block
 361 ## devices.  (If we use some other device kind for synchronization then this
 362 ## filter will have to be broadened.)
 363 my $u = umon_create "--subsystem-match=block/disk";
 364
 365 ## Prepare for the awful synchronization hack.  We need to make sure, below,
 366 ## that we've read all of the interesting events resulting from an `lvremove'
 367 ## call.  To do this, we wait for an event on a different device -- but we
 368 ## must avoid being fooled by spurious events on this device.  As an attempt
 369 ## to minimize the probability of this going wrong, acquire a pet device
 370 ## which nobody else is using.  The best idea seems to be a loopback device.
 371 open my $lopipe, "-|", "losetup", "--show", "--find", "/etc/motd"
 372   or fail "open (losetup attach)";
 373 chomp (my $lo = $lopipe->getline);
 374 { local $/ = undef; <$lopipe>; }
 375 $lo =~ s:^/dev/::;
 376 $lopipe->close or fail "wait (losetup attach): $!";
 377 cleanup { system "losetup", "--detach", "/dev/$lo" };
 378
 379 ## Initial synchronization, to make sure stuff works.
 380 umon_sync $u, $lo;
 381
 382 ## Try to remove the snapshot.  Capture stdout and stderr, and relay them if
 383 ## nothing serious went wrong.
 384 burble "initial attempt to remove snapshot";
 385 my ($rc, $out, $err) = capture "lvremove", "--force", "$VG/$LV";
 386 if ($rc != 0x500) {
 387   print STDOUT $out;
 388   print STDERR $err;
 389   burble "lvremove didn't explode (rc = $rc): we're done here";
 390   if ($rc >> 8) { $rc >>= 8 }
 391   elsif ($rc & 255) { $rc += 128 }
 392   exit $rc;
 393 }
 394 burble "initial lvremove failed";
 395
 396 ## OK, stuff went wrong.  First see if there was a udev cookie left over, and
 397 ## if so try to release it.  It's important to know that we've read all of
 398 ## the relevant uevents, so synchronize again.
 399 my @e = umon_sync $u, $lo;
 400 my %c = ();
 401 for my $e (@e) {
 402   $c{($e->{DM_COOKIE} & 0xffff) | 0xd4d0000} = 1
 403     if $e->{DEVPATH} eq $SYS && exists $e->{DM_COOKIE};
 404 }
 405 burble "cookies used: " . join ", ", map { sprintf "0x%x", $_ } keys %c;
 406
 407 ## Find the used cookies which are still extant, and release them.
 408 open $uc, "-|", "dmsetup", "udevcookies" or fail "open (cookies): $!";
 409 $uc->getline;
 410 my @leak = ();
 411 while (my $l = $uc->getline) {
 412   my @f = split ' ', $l;
 413   push @leak, $f[0] if $c{fixint $f[0]};
 414 }
 415 close $uc or fail "udevcookies failed (rc = $?)";
 416 for my $c (@leak) {
 417   burble "release leaked cookie $c";
 418   run "release cookie", "dmsetup", "udevreleasecookie", $c unless $NOACT;
 419 }
 420
 421 ## If we're very unlucky, the origin volume may still be suspended.  Resume
 422 ## it now, or the next attempt will get stuck.  (Resuming is idempotent, so
 423 ## we don't need to check whether it's already running.)  Finding the origin
 424 ## is annoying: search the device-mapper table for a device with a
 425 ## `snapshot-origin' table referencing the same backing store as the
 426 ## snapshot.
 427 my $back = $DMTAB{dmname $SYS}[0][3];
 428 my $orig = undef;
 429 burble "backend device $back";
 430 for my $dm (keys %DMTAB) {
 431   my $t = $DMTAB{$dm};
 432   next unless @$t == 1 &&
 433     $t->[0][2] eq "snapshot-origin" &&
 434     $t->[0][3] eq $back;
 435   defined $orig and fail "snapshot appears to have multiple origins";
 436   $orig = $dm;
 437 }
 438 defined $orig or fail "couldn't find snapshot origin device";
 439 burble "found origin volume $orig; resuming...";
 440 run "resume origin $orig", "dmsetup", "resume", $orig unless $NOACT;
 441
 442 ## See whether removing the snapshot again helps any.
 443 burble "retry snapshot removal";
 444 run "retry", "lvremove", "--force", "$VG/$LV" unless $NOACT;
 445
 446 ## OK, we're on the way to recovery.  The origin device may now be not a
 447 ## snapshot-origin any more.  Refresh the device-mapper table and inspect it.
 448 dmtable_update;
 449 if (-d "/sys/dev/block/$back") {
 450   my $backdm = dmname "/dev/block/$back";
 451   if ($DMTAB{$orig}[0][2] ne "snapshot-origin") {
 452     burble "origin released but backend $backdm still exists: remove";
 453     run "remove backend $backdm", "dmsetup", "remove", $backdm
 454       unless $NOACT;
 455   }
 456 }
 457
 458 ## All done.  There, that wasn't so bad, was it?
 459 burble "completed successfully";
 460 exit 0;
 461
 462 ###----- That's all, folks --------------------------------------------------