X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?p=chiark-utils.git;a=blobdiff_plain;f=scripts%2Fgit-cache-proxy;h=3e57a4f5530e2971794dd3a82612590421624c8e;hp=12a4f52cdff44f3f412b6ce0cc4ca32574609393;hb=c80b877026deff9a58854e9e06bdcef9f6427fcb;hpb=fa641e4c6e88702eb71dfd6b489e3f8592730e42 diff --git a/scripts/git-cache-proxy b/scripts/git-cache-proxy index 12a4f52..3e57a4f 100755 --- a/scripts/git-cache-proxy +++ b/scripts/git-cache-proxy @@ -2,6 +2,12 @@ # # git caching proxy +# Suitable only for exposing to semi-trusted clients: clients are not +# supposed to be able to take over the server. However, clients can +# probably deny service to each other because the current +# implementation is not very good at handling various out-of-course +# situations (notably, clients which are too slow). + # usage: run it on some port, and then clone or fetch # "git://:/[ ]" # where is http:///... or git:///... @@ -14,11 +20,22 @@ # fetch=try use what is in the cache if the fetch/clone fails # timeout= length of time to allow for fetch/clone +# example inetd.conf line: +# 9419 stream tcp nowait git-cache /usr/bin/git-cache-proxy git-cache-proxy +# you'll need to +# adduser git-cache +# mkdir /var/cache/git-cache-proxy +# chown git-cache /var/cache/git-cache-proxy + +# git-cache-proxy +# Copyright 2010 Tony Finch +# Copyright 2013,2014 Ian Jackson +# # git-cache-proxy is free software; you can redistribute it and/or # modify them under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 3, or (at # your option) any later version. -# +# # git-cache-proxy is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU @@ -44,6 +61,14 @@ use File::Path qw(remove_tree); our $us = 'git-cache-proxy'; +our $debug = 0; +our $housekeepingeverydays = 1; +our $treeexpiredays = 21; +our $fetchtimeout = 1800; +our $maxfetchtimeout = 3600; +our $cachedir = '/var/cache/git-cache-proxy'; +our $housekeepingonly = 0; + #---------- error handling and logging ---------- # This is a bit fiddly, because we want to catch errors sent to stderr @@ -71,9 +96,10 @@ sub ensurelog () { sub logm ($$) { my ($pri, $msg) = @_; + return if $pri eq 'debug' && !$debug; if ($client eq '(local)') { print STDERR "$us: $pri: $msg\n" or die $!; - exit 1; + return; } ensurelog(); my $mainmsg = sprintf "%s-%s: %s", $server, $client, $msg; @@ -81,11 +107,11 @@ sub logm ($$) { my $wholemsg = sprintf("%s [%d] %s: %s\n", strftime("%Y-%m-%d %H:%M:%S Z", gmtime), $$, - $pri, + $pri eq 'err' ? 'error' : $pri, $mainmsg); print $log $wholemsg; } else { - syslog $pri, $mainmsg; + syslog $pri, "%s", "$pri $mainmsg"; } } @@ -127,26 +153,31 @@ sub gitfail ($) { #---------- argument parsing ---------- -our $housekeepingthreshdays = 1; -our $treeexpiredays = 21; -our $fetchtimeout = 1800; -our $maxfetchtimeout = 3600; -our $cachedir = '/var/cache/git-cache-proxy'; - for (;;) { last unless @ARGV; last unless $ARGV[0] =~ m/^-/; $_ = shift @ARGV; for (;;) { last unless m/^-./; - if (s/^-L(.*)$//) { + if (s/^-H/-/) { + $housekeepingonly++; + } elsif (s/^-D/-/) { + $debug++; + } elsif (s/^-L(.*)$//) { my $logfile = $_; open STDERR, ">>", $logfile or fail "open $logfile: $!"; $log = \*STDERR; } elsif (s/^-d(.*)$//) { $cachedir = $1; - } elsif (s/^--(maxfetchtimeout|fetchtimeout)=(\d+)$//) { - ${ $::{$1} } = $2; + } elsif (s/^--( max-fetch-timeout + | fetch-timeout + | tree-expire-days + | housekeeping-interval-days + )=(\d+)$//x) { + my $vn = $1; + $vn =~ y/-//d; + die $vn unless defined ${ $::{$vn} }; + ${ $::{$vn} } = $2; } else { fail "bad usage: unknown option `$_'"; } @@ -155,13 +186,36 @@ for (;;) { !@ARGV or fail "bad usage: no non-option arguments permitted"; -#---------- main program ---------- +#---------- utility functions ---------- -chdir $cachedir or fail "chdir $cachedir: $!"; - -our ($service,$specpath,$spechost,$subdir); -our ($tmpd,$gitd,$lock); -our ($fetch,$url); +sub lockfile ($$$) { + my ($fh, $fn, $flockmode) = @_; + my $what = $fn.(($flockmode & ~LOCK_NB) == LOCK_SH ? " (shared)" : ""); + for (;;) { + close $fh; + open $fh, '+>', $fn or fail "open/create $fn for lock: $!"; + logm 'debug', "lock $what: acquiring"; + if (!flock $fh, $flockmode) { + if ($flockmode & LOCK_NB && $! == EWOULDBLOCK) { + return 0; # ok then + } + fail "lock $what: $!"; + } + stat $fh or fail "stat opened $fn: $!"; + my $fh_ino = ((stat _)[1]); + if (!stat $fn) { + $! == ENOENT or fail "stat $fn: $!"; + next; + } + my $fn_ino = ((stat _)[1]); + if ($fn_ino == $fh_ino) { + logm 'debug', "lock $what: acquired"; + return 1; + } + logm 'debug', "lock $what: deleted, need to loop again"; + # oh dear + } +} sub xread { my $length = shift; @@ -175,6 +229,14 @@ sub xread { return $buffer; } +#---------- main program ---------- + +chdir $cachedir or fail "chdir $cachedir: $!"; + +our ($service,$specpath,$spechost,$subdir); +our ($tmpd,$gitd,$lock); +our ($fetch,$url); + sub servinfo ($) { my ($msg) = @_; logm 'info', "service `$specpath': $msg"; @@ -211,7 +273,7 @@ sub readcommand () { $fetch = 0; } elsif (m/^fetch=must$/) { $fetch = 2; # the default - } elsif (m/^timeout=(\d+)$/) { + } elsif (m/^timeout=(\d+)$/ && $1 >= 1) { $fetchtimeout = $1 <= $maxfetchtimeout ? $1 : $maxfetchtimeout; } elsif ($must) { gitfail "unknown/unsupported option `$_'"; @@ -234,11 +296,10 @@ sub readcommand () { } sub clonefetch () { - open LOCK, "+>", $lock or fail "open/create $lock: $!"; - flock LOCK, LOCK_EX or fail "lock exclusive $lock: $!"; + lockfile \*LOCK, $lock, LOCK_EX; - my $exists = stat $gitd; - $exists or $!==ENOENT or fail "stat $gitd: $!"; + my $exists = lstat $gitd; + $exists or $!==ENOENT or fail "lstat $gitd: $!"; our $fetchfail = ''; @@ -313,51 +374,71 @@ sub clonefetch () { } servinfo "sharing"; - flock LOCK, LOCK_UN or fail "unlock $lock: $!"; - flock LOCK, LOCK_SH or fail "lock shared $lock: $!"; - # actually, just relocking as shared would have the same semantics - # but it's best to be explicit + lockfile \*LOCK, $lock, LOCK_SH; # NB releases and relocks - if (chdir $gitd) { + if (stat $gitd) { return 1; } - $!==ENOENT or fail "chdir $gitd: $!"; + $!==ENOENT or fail "stat $gitd: $!"; # Well, err, someone must have taken the lock in between # and garbage collected it. How annoying. return 0; } +sub hkfail ($) { my ($msg) = @_; fail "housekeeping: $msg"; } + sub housekeeping () { + logm 'info', "housekeeping started"; foreach $lock (<[a-z]*\\.lock>) { + my $subdir = $lock; $subdir =~ s/\\.lock$//; if (!lstat $lock) { - $! == ENOENT or fail "housekeeping: $lock: stat: $!"; + $! == ENOENT or hkfail "$lock: lstat: $!"; next; } if (-M _ <= $treeexpiredays) { - logm 'debug', "housekeeping: $lock: not too old"; + logm 'debug', "housekeeping: subdirs $subdir: touched recently"; next; } - my $subdir = $lock; $subdir =~ s/\\.lock$//; - my $ok = 1; - foreach my $suffix (qw(tmp git)) { - my $dir = "${subdir}\\.$suffix"; - my $errs; - remove_tree($dir, { safe=>1, error=>\$errs }); - $ok = 0 if @$errs; - foreach my $err (@$errs) { - logm 'warning', "problem deleting: $err[0]: $err[1]"; + if (!lockfile \*LOCK, $lock, LOCK_EX|LOCK_NB) { + logm 'info', "housekeeping: subdirs $subdir: lock busy, skipping"; + next; + } + logm 'info', "housekeeping: subdirs $subdir: cleaning"; + eval { + foreach my $suffix (qw(tmp git)) { + my $dir = "${subdir}\\.$suffix"; + my $tdir = "${subdir}\\.tmp"; + if ($dir ne $tdir) { + if (!rename $dir,$tdir) { + next if $! == ENOENT; + die "$dir: cannot rename to $tdir: $!\n"; + } + } + system qw(rm -rf --), $tdir; + if (stat $tdir) { + die "$dir: problem deleting file(s), rm exited $?\n"; + } elsif ($! != ENOENT) { + die "$tdir: cannot stat after deletion: $!\n"; + } } + }; + if (length $@) { + chomp $@; + logm 'warning', "housekeeping: $subdir: cleanup prevented: $@"; + } else { + unlink $lock or hkfail "remove $lock: $!"; } - if ($ok) { - + } + open HS, ">", "Housekeeping.stamp" or hkfail "touch Housekeeping.stamp: $!"; + close HS or hkfail "close Housekeeping.stamp: $!"; + logm 'info', "housekeeping finished"; +} sub housekeepingcheck ($$) { my ($dofork, $force) = @_; - open HLOCK, "+>", "Housekeeping.lock" - or fail "open/create Housekeeping.lock: $!"; if (!$force) { - if (flock HLOCK, LOCK_EX|LOCK_NB) { + if (!lockfile \*HLOCK, "Housekeeping.lock", LOCK_EX|LOCK_NB) { logm 'debug', "housekeeping lock taken, not running"; close HLOCK; return 0; @@ -366,29 +447,34 @@ sub housekeepingcheck ($$) { if ($force) { logm 'info', "housekeeping forced"; } elsif (!lstat "Housekeeping.stamp") { - $! == ENOENT or fail "stat housekeeping.stamp: $!"; - logm 'info', "housekeeping stamp missing, will run"; - } elsif (-M _ <= $housekeepingthreshdays) { + $! == ENOENT or fail "lstat Housekeeping.stamp: $!"; + logm 'info', "housekeeping not done yet, will run"; + } elsif (-M _ <= $housekeepingeverydays) { logm 'debug', "housekeeping done recently"; close HLOCK; return 0; } if ($dofork) { my $child = fork; - defined $child or fail "fork for housekeeping: $!"; + defined $child or fail "fork: $!"; if (!$child) { + open STDERR, "|logger -p daemon.warning -t '$us(housekeeping)'" + or die "fork: logger $!"; housekeeping(); exit 0; } - return 1; } else { housekeeping(); - return 1; } + close HLOCK; + return 1; } sub runcommand () { - servinfo "servicing"; + servinfo "serving"; + + chdir $gitd or fail "chdir $gitd: $!"; + exec qw(git-upload-pack --strict --timeout=1000 .) or fail "exec git-upload-pack: $!"; } @@ -396,7 +482,12 @@ sub runcommand () { sub daemonservice () { readcommand(); while (!clonefetch()) { } + housekeepingcheck(1,0); runcommand(); } -daemonservice(); +if ($housekeepingonly) { + housekeepingcheck(0, $housekeepingonly>=2); +} else { + daemonservice(); +}