X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?p=chiark-utils.git;a=blobdiff_plain;f=scripts%2Fgit-cache-proxy;h=2f38de1154be164d316d7fb06579a22841d521c1;hp=f797e74920cc36822141ad2422ad683d6f8e4281;hb=eede02a315ee2d05bfada0e62f5a9746046d3492;hpb=cb8ee35c18f491d1a9e0b334a0d0e165a52adfe1 diff --git a/scripts/git-cache-proxy b/scripts/git-cache-proxy index f797e74..2f38de1 100755 --- a/scripts/git-cache-proxy +++ b/scripts/git-cache-proxy @@ -2,6 +2,12 @@ # # git caching proxy +# Suitable only for exposing to semi-trusted clients: clients are not +# supposed to be able to take over the server. However, clients can +# probably deny service to each other because the current +# implementation is not very good at handling various out-of-course +# situations (notably, clients which are too slow). + # usage: run it on some port, and then clone or fetch # "git://:/[ ]" # where is http:///... or git:///... @@ -14,11 +20,22 @@ # fetch=try use what is in the cache if the fetch/clone fails # timeout= length of time to allow for fetch/clone +# example inetd.conf line: +# 9419 stream tcp nowait git-cache /usr/bin/git-cache-proxy git-cache-proxy +# you'll need to +# adduser git-cache +# mkdir /var/cache/git-cache-proxy +# chown git-cache /var/cache/git-cache-proxy + +# git-cache-proxy +# Copyright 2010 Tony Finch +# Copyright 2013 Ian Jackson +# # git-cache-proxy is free software; you can redistribute it and/or # modify them under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 3, or (at # your option) any later version. -# +# # git-cache-proxy is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU @@ -40,9 +57,18 @@ use POSIX; use Socket; use Sys::Syslog; use Fcntl qw(:flock SEEK_SET); +use File::Path qw(remove_tree); our $us = 'git-cache-proxy'; +our $debug = 0; +our $housekeepingeverydays = 1; +our $treeexpiredays = 21; +our $fetchtimeout = 1800; +our $maxfetchtimeout = 3600; +our $cachedir = '/var/cache/git-cache-proxy'; +our $housekeepingonly = 0; + #---------- error handling and logging ---------- # This is a bit fiddly, because we want to catch errors sent to stderr @@ -70,9 +96,10 @@ sub ensurelog () { sub logm ($$) { my ($pri, $msg) = @_; + return if $pri eq 'debug' && !$debug; if ($client eq '(local)') { print STDERR "$us: $pri: $msg\n" or die $!; - exit 1; + return; } ensurelog(); my $mainmsg = sprintf "%s-%s: %s", $server, $client, $msg; @@ -80,11 +107,11 @@ sub logm ($$) { my $wholemsg = sprintf("%s [%d] %s: %s\n", strftime("%Y-%m-%d %H:%M:%S Z", gmtime), $$, - $pri, + $pri eq 'err' ? 'error' : $pri, $mainmsg); print $log $wholemsg; } else { - syslog $pri, $mainmsg; + syslog $pri, "%s", "$pri $mainmsg"; } } @@ -126,24 +153,31 @@ sub gitfail ($) { #---------- argument parsing ---------- -our $fetchtimeout = 1800; -our $maxfetchtimeout = 3600; -our $cachedir = '/var/cache/git-cache-proxy'; - for (;;) { last unless @ARGV; last unless $ARGV[0] =~ m/^-/; $_ = shift @ARGV; for (;;) { last unless m/^-./; - if (s/^-L(.*)$//) { + if (s/^-H/-/) { + $housekeepingonly++; + } elsif (s/^-D/-/) { + $debug++; + } elsif (s/^-L(.*)$//) { my $logfile = $_; open STDERR, ">>", $logfile or fail "open $logfile: $!"; $log = \*STDERR; } elsif (s/^-d(.*)$//) { $cachedir = $1; - } elsif (s/^--(maxfetchtimeout|fetchtimeout)=(\d+)$//) { - ${ $::{$1} } = $2; + } elsif (s/^--( max-fetch-timeout + | fetch-timeout + | tree-expire-days + | housekeeping-interval-days + )=(\d+)$//x) { + my $vn = $1; + $vn =~ y/-//d; + die $vn unless defined ${ $::{$vn} }; + ${ $::{$vn} } = $2; } else { fail "bad usage: unknown option `$_'"; } @@ -152,13 +186,36 @@ for (;;) { !@ARGV or fail "bad usage: no non-option arguments permitted"; -#---------- main program ---------- - -chdir $cachedir or fail "chdir $cachedir: $!"; +#---------- utility functions ---------- -our ($service,$specpath,$spechost,$subdir); -our ($tmpd,$gitd,$lock); -our ($fetch,$url); +sub lockfile ($$$) { + my ($fh, $fn, $flockmode) = @_; + my $what = $fn.(($flockmode & ~LOCK_NB) == LOCK_SH ? " (shared)" : ""); + for (;;) { + close $fh; + open $fh, '+>', $fn or fail "open/create $fn for lock: $!"; + logm 'debug', "lock $what: acquiring"; + if (!flock $fh, $flockmode) { + if ($flockmode & LOCK_NB && $! == EWOULDBLOCK) { + return 0; # ok then + } + fail "lock $what: $!"; + } + stat $fh or fail "stat opened $fn: $!"; + my $fh_ino = ((stat _)[1]); + if (!stat $fn) { + $! == ENOENT or fail "stat $fn: $!"; + next; + } + my $fn_ino = ((stat _)[1]); + if ($fn_ino == $fh_ino) { + logm 'debug', "lock $what: acquired"; + return 1; + } + logm 'debug', "lock $what: deleted, need to loop again"; + # oh dear + } +} sub xread { my $length = shift; @@ -172,6 +229,19 @@ sub xread { return $buffer; } +#---------- main program ---------- + +chdir $cachedir or fail "chdir $cachedir: $!"; + +our ($service,$specpath,$spechost,$subdir); +our ($tmpd,$gitd,$lock); +our ($fetch,$url); + +sub servinfo ($) { + my ($msg) = @_; + logm 'info', "service `$specpath': $msg"; +} + sub readcommand () { $SIG{ALRM} = sub { fail "timeout" }; alarm 30; @@ -203,7 +273,7 @@ sub readcommand () { $fetch = 0; } elsif (m/^fetch=must$/) { $fetch = 2; # the default - } elsif (m/^timeout=(\d+)$/) { + } elsif (m/^timeout=(\d+)$/ && $1 >= 1) { $fetchtimeout = $1 <= $maxfetchtimeout ? $1 : $maxfetchtimeout; } elsif ($must) { gitfail "unknown/unsupported option `$_'"; @@ -218,19 +288,18 @@ sub readcommand () { $subdir =~ s|,|\\,|g; $subdir =~ s|/|,|g; - logm 'info', "$specpath locking"; - $tmpd= "$subdir\\.tmp"; $gitd= "$subdir\\.git"; $lock = "$subdir\\.lock"; + + servinfo "locking"; } sub clonefetch () { - open LOCK, "+>", $lock or fail "open/create $lock: $!"; - flock LOCK, LOCK_EX or fail "lock exclusive $lock: $!"; + lockfile \*LOCK, $lock, LOCK_EX; - my $exists = stat $gitd; - $exists or $!==ENOENT or fail "stat $gitd: $!"; + my $exists = lstat $gitd; + $exists or $!==ENOENT or fail "lstat $gitd: $!"; our $fetchfail = ''; @@ -241,10 +310,10 @@ sub clonefetch () { if (!$exists) { system qw(rm -rf --), $tmpd; @cmd = (qw(git clone -q --mirror), $url, $tmpd); - logm 'info', "$specpath cloning @cmd"; + servinfo "cloning"; } else { @cmd = (qw(git remote update --prune)); - logm 'info', "$specpath fetching @cmd"; + servinfo "fetching"; } my $cmd = "@cmd[0..1]"; @@ -263,10 +332,9 @@ sub clonefetch () { my $timedout = 0; { local $SIG{ALRM} = sub { - logm 'info', "$specpath fetch/clone timeout"; + servinfo "fetch/clone timeout"; $timedout=1; kill 9, -$child; }; -logm 'info', "timeout=$fetchtimeout"; alarm($fetchtimeout); $!=0; { local $/=undef; $fetcherr = ; } !FETCHERR->error or fail "read pipe from fetch/clone: $!"; @@ -289,7 +357,7 @@ logm 'info', "timeout=$fetchtimeout"; if ($fetch >= 2) { gitfail $fetchfail; } else { - logm 'info', "$specpath fetch/clone failed: $fetchfail"; + servinfo "fetch/clone failed: $fetchfail"; } } @@ -305,24 +373,102 @@ logm 'info', "timeout=$fetchtimeout"; gitfail "no cached data, and not cloned: $fetchfail"; } - logm 'info', "$specpath sharing"; - flock LOCK, LOCK_UN or fail "unlock $lock: $!"; - flock LOCK, LOCK_SH or fail "lock shared $lock: $!"; - # actually, just relocking as shared would have the same semantics - # but it's best to be explicit + servinfo "sharing"; + lockfile \*LOCK, $lock, LOCK_SH; # NB releases and relocks - if (chdir $gitd) { + if (stat $gitd) { return 1; } - $!==ENOENT or fail "chdir $gitd: $!"; + $!==ENOENT or fail "stat $gitd: $!"; # Well, err, someone must have taken the lock in between # and garbage collected it. How annoying. return 0; } +sub hkfail ($) { my ($msg) = @_; fail "housekeeping: $msg"; } + +sub housekeeping () { + logm 'info', "housekeeping started"; + foreach $lock (<[a-z]*\\.lock>) { + my $subdir = $lock; $subdir =~ s/\\.lock$//; + if (!lstat $lock) { + $! == ENOENT or hkfail "$lock: lstat: $!"; + next; + } + if (-M _ <= $treeexpiredays) { + logm 'debug', "housekeeping: subdirs $subdir: touched recently"; + next; + } + if (!lockfile \*LOCK, $lock, LOCK_EX|LOCK_NB) { + logm 'info', "housekeeping: subdirs $subdir: lock busy, skipping"; + next; + } + logm 'info', "housekeeping: subdirs $subdir: cleaning"; + eval { + foreach my $suffix (qw(tmp git)) { + my $dir = "${subdir}\\.$suffix"; + my $errs; + remove_tree($dir, { safe=>1, error=>\$errs }); + if (stat $dir) { + foreach my $err (@$errs) { + my ($file, $message) = %$err; + logm 'info', "problem deleting: $file: $message"; + } + die "$dir: problem deleting file(s)\n"; + } + } + }; + if (length $@) { + chomp $@; + logm 'warning', "housekeeping: $subdir: cleanup prevented: $@"; + } else { + unlink $lock or hkfail "remove $lock: $!"; + } + } + open HS, ">", "Housekeeping.stamp" or hkfail "touch Housekeeping.stamp: $!"; + close HS or hkfail "close Housekeeping.stamp: $!"; + logm 'info', "housekeeping finished"; +} + +sub housekeepingcheck ($$) { + my ($dofork, $force) = @_; + if (!$force) { + if (!lockfile \*HLOCK, "Housekeeping.lock", LOCK_EX|LOCK_NB) { + logm 'debug', "housekeeping lock taken, not running"; + close HLOCK; + return 0; + } + } + if ($force) { + logm 'info', "housekeeping forced"; + } elsif (!lstat "Housekeeping.stamp") { + $! == ENOENT or fail "lstat Housekeeping.stamp: $!"; + logm 'info', "housekeeping not done yet, will run"; + } elsif (-M _ <= $housekeepingeverydays) { + logm 'debug', "housekeeping done recently"; + close HLOCK; + return 0; + } + if ($dofork) { + my $child = fork; + defined $child or hkfail "fork: $!"; + if (!$child) { + housekeeping(); + exit 0; + } + } else { + housekeeping(); + } + close HLOCK; + return 1; +} + sub runcommand () { - logm 'info', "$specpath servicing"; + servinfo "serving"; + + chdir $gitd or fail "chdir $gitd: $!"; + exec qw(git-upload-pack --strict --timeout=1000 .) or fail "exec git-upload-pack: $!"; } @@ -330,7 +476,12 @@ sub runcommand () { sub daemonservice () { readcommand(); while (!clonefetch()) { } + housekeepingcheck(1,0); runcommand(); } -daemonservice(); +if ($housekeepingonly) { + housekeepingcheck(0, $housekeepingonly>=2); +} else { + daemonservice(); +}