X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?p=chiark-utils.git;a=blobdiff_plain;f=scripts%2Fgit-cache-proxy;h=450a9f21e6d4fc7c6427da563dc79503087d36d5;hp=b3cf41293082d40077ce58b69336ad7e3c819ed2;hb=3b070901e08a7446746d0e2a43fc3ab19d6162fd;hpb=d0def44170245b1d235e5e081f62b091ec89f85b diff --git a/scripts/git-cache-proxy b/scripts/git-cache-proxy index b3cf412..450a9f2 100755 --- a/scripts/git-cache-proxy +++ b/scripts/git-cache-proxy @@ -1,7 +1,7 @@ #!/usr/bin/perl -w # # git caching proxy -# + # usage: run it on some port, and then clone or fetch # "git://:/[ ]" # where is http:///... or git:///... @@ -14,6 +14,25 @@ # fetch=try use what is in the cache if the fetch/clone fails # timeout= length of time to allow for fetch/clone +# git-cache-proxy is free software; you can redistribute it and/or +# modify them under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3, or (at +# your option) any later version. +# +# git-cache-proxy is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, consult the Free Software Foundation's +# website at www.fsf.org, or the GNU Project website at www.gnu.org. +# +# (Some code taken from userv-utils's git-daemon.in and git-service.in +# which were written by Tony Finch and subsequently +# heavily modified by Ian Jackson +# and were released under CC0 1.0. The whole program is now GPLv3+.) + use strict; use warnings; @@ -21,62 +40,73 @@ use POSIX; use Socket; use Sys::Syslog; use Fcntl qw(:flock SEEK_SET); +use File::Path qw(remove_tree); our $us = 'git-cache-proxy'; + +#---------- error handling and logging ---------- + +# This is a bit fiddly, because we want to catch errors sent to stderr +# and dump them to syslog if we can, but only if we are running as an +# inetd service. + our $log; # filehandle (ref), or "1" meaning syslog -BEGIN { - open STDERR, ">/dev/null" or exit 255; - open TEMPERR, "+>", undef or exit 255; - open STDERR, ">&TEMPERR" or exit 255; +sub ntoa { + my $sockaddr = shift; + return ('(local)') unless defined $sockaddr; + my ($port,$addr) = sockaddr_in $sockaddr; + $addr = inet_ntoa $addr; + return ("[$addr]:$port",$addr,$port); +} - sub ntoa { - my $sockaddr = shift; - return ('(local)') unless defined $sockaddr; - my ($port,$addr) = sockaddr_in $sockaddr; - $addr = inet_ntoa $addr; - return ("[$addr]:$port",$addr,$port); - } +our ($client) = ntoa getpeername STDIN; +our ($server) = ntoa getsockname STDIN; - our ($client,$client_addr,$client_port) = ntoa getpeername STDIN; - our ($server,$server_addr,$server_port) = ntoa getsockname STDIN; +sub ensurelog () { + return if $log; + openlog $us, qw(pid), 'daemon'; + $log = 1; +} - sub ensurelog () { - return if $log; - openlog $us, qw(pid), 'daemon'; - $log = 1; +sub logm ($$) { + my ($pri, $msg) = @_; + if ($client eq '(local)') { + print STDERR "$us: $pri: $msg\n" or die $!; + exit 1; } - - sub logm ($$) { - my ($pri, $msg) = @_; - ensurelog(); - my $mainmsg = sprintf "%s-%s: %s", $server, $client, $msg; - if (ref $log) { - my $wholemsg = sprintf("%s [%d] %s: %s\n", - strftime("%Y-%m-%d %H:%M:%S Z", gmtime), - $$, - $pri, - $mainmsg); - print $log $wholemsg; - } else { - syslog $pri, $mainmsg; - } + ensurelog(); + my $mainmsg = sprintf "%s-%s: %s", $server, $client, $msg; + if (ref $log) { + my $wholemsg = sprintf("%s [%d] %s: %s\n", + strftime("%Y-%m-%d %H:%M:%S Z", gmtime), + $$, + $pri, + $mainmsg); + print $log $wholemsg; + } else { + syslog $pri, $mainmsg; } +} + +if ($client ne '(local)') { + open STDERR, ">/dev/null" or exit 255; + open TEMPERR, "+>", undef or exit 255; + open STDERR, ">&TEMPERR" or exit 255; +} - END { +END { + if ($client ne '(local)') { if ($?) { logm 'crit', "crashing ($?)"; } seek TEMPERR, 0, SEEK_SET; while () { chomp; logm 'crit', $_; } - exit $?; } + exit $?; } -our $fetchtimeout = 1800; -our $maxfetchtimeout = 3600; - sub fail ($) { my ($msg) = @_; logm 'err', $msg; @@ -95,6 +125,12 @@ sub gitfail ($) { exit 0; } +#---------- argument parsing ---------- + +our $housekeepingthreshdays = 1; +our $treeexpiredays = 21; +our $fetchtimeout = 1800; +our $maxfetchtimeout = 3600; our $cachedir = '/var/cache/git-cache-proxy'; for (;;) { @@ -119,77 +155,89 @@ for (;;) { !@ARGV or fail "bad usage: no non-option arguments permitted"; -chdir $cachedir or fail "chdir $cachedir: $!"; +#---------- main program ---------- -our ($service,$specpath,$spechost); +chdir $cachedir or fail "chdir $cachedir: $!"; -$SIG{ALRM} = sub { fail "timeout" }; -alarm 30; +our ($service,$specpath,$spechost,$subdir); +our ($tmpd,$gitd,$lock); +our ($fetch,$url); sub xread { my $length = shift; my $buffer = ""; while ($length > length $buffer) { my $ret = sysread STDIN, $buffer, $length, length $buffer; - fail "Expected $length bytes, got ".length $buffer + fail "expected $length bytes, got ".length $buffer if defined $ret and $ret == 0; fail "read: $!" if not defined $ret and $! != EINTR and $! != EAGAIN; } return $buffer; } -my $hex_len = xread 4; -fail "Bad hex in packet length" unless $hex_len =~ m|^[0-9a-fA-F]{4}$|; -my $line = xread -4 + hex $hex_len; -unless (($service,$specpath,$spechost) = $line =~ - m|^(git-[a-z-]+) /*([!-~ ]+)\0host=([!-~]+)\0$|) { - $line =~ s|[^ -~]+| |g; - gitfail "unknown/unsupported instruction `$line'" + +sub servinfo ($) { + my ($msg) = @_; + logm 'info', "service `$specpath': $msg"; } -alarm 0; - -$service eq 'git-upload-pack' - or gitfail "unknown/unsupported service `$service'"; - -my $fetch = 2; # 0:don't; 1:try; 2:force -my $url = $specpath; - -while ($url =~ s#\s+(\[)([^][{}]+)\]$## || - $url =~ s#\s+(\{)([^][{}]+)\}$##) { - $_ = $2; - my $must = $1 eq '{'; - if (m/^fetch=try$/) { - $fetch = 1; - } elsif (m/^fetch=no$/) { - $fetch = 0; - } elsif (m/^fetch=must$/) { - $fetch = 2; # the default - } elsif (m/^timeout=(\d+)$/) { - $fetchtimeout = $1 <= $maxfetchtimeout ? $1 : $maxfetchtimeout; - } elsif ($must) { - gitfail "unknown/unsupported option `$_'"; +sub readcommand () { + $SIG{ALRM} = sub { fail "timeout" }; + alarm 30; + + my $hex_len = xread 4; + fail "Bad hex in packet length" unless $hex_len =~ m|^[0-9a-fA-F]{4}$|; + my $line = xread -4 + hex $hex_len; + unless (($service,$specpath,$spechost) = $line =~ + m|^(git-[a-z-]+) /*([!-~ ]+)\0host=([!-~]+)\0$|) { + $line =~ s|[^ -~]+| |g; + gitfail "unknown/unsupported instruction `$line'" + } + + alarm 0; + + $service eq 'git-upload-pack' + or gitfail "unknown/unsupported service `$service'"; + + $fetch = 2; # 0:don't; 1:try; 2:force + $url = $specpath; + + while ($url =~ s#\s+(\[)([^][{}]+)\]$## || + $url =~ s#\s+(\{)([^][{}]+)\}$##) { + $_ = $2; + my $must = $1 eq '{'; + if (m/^fetch=try$/) { + $fetch = 1; + } elsif (m/^fetch=no$/) { + $fetch = 0; + } elsif (m/^fetch=must$/) { + $fetch = 2; # the default + } elsif (m/^timeout=(\d+)$/) { + $fetchtimeout = $1 <= $maxfetchtimeout ? $1 : $maxfetchtimeout; + } elsif ($must) { + gitfail "unknown/unsupported option `$_'"; + } } -} -$url =~ m{^(?:https?|git)://[-.0-9a-z]+/} - or gitfail "unknown/unsupported url scheme or format `$url'"; + $url =~ m{^(?:https?|git)://[-.0-9a-z]+/} + or gitfail "unknown/unsupported url scheme or format `$url'"; -our $subdir = $url; -$subdir =~ s|\\|\\\\|g; -$subdir =~ s|,|\\,|g; -$subdir =~ s|/|,|g; + $subdir = $url; + $subdir =~ s|\\|\\\\|g; + $subdir =~ s|,|\\,|g; + $subdir =~ s|/|,|g; -logm 'info', "$specpath locking"; + $tmpd= "$subdir\\.tmp"; + $gitd= "$subdir\\.git"; + $lock = "$subdir\\.lock"; -my $tmpd= "$subdir\\.tmp"; -my $gitd= "$subdir\\.git"; -my $lock = "$subdir\\.lock"; + servinfo "locking"; +} -for (;;) { +sub clonefetch () { open LOCK, "+>", $lock or fail "open/create $lock: $!"; flock LOCK, LOCK_EX or fail "lock exclusive $lock: $!"; - my $exists = stat $gitd; + my $exists = lstat $gitd; $exists or $!==ENOENT or fail "stat $gitd: $!"; our $fetchfail = ''; @@ -201,10 +249,10 @@ for (;;) { if (!$exists) { system qw(rm -rf --), $tmpd; @cmd = (qw(git clone -q --mirror), $url, $tmpd); - logm 'info', "$specpath cloning @cmd"; + servinfo "cloning"; } else { @cmd = (qw(git remote update --prune)); - logm 'info', "$specpath fetching @cmd"; + servinfo "fetching"; } my $cmd = "@cmd[0..1]"; @@ -223,10 +271,9 @@ for (;;) { my $timedout = 0; { local $SIG{ALRM} = sub { - logm 'info', "$specpath fetch/clone timeout"; + servinfo "fetch/clone timeout"; $timedout=1; kill 9, -$child; }; -logm 'info', "timeout=$fetchtimeout"; alarm($fetchtimeout); $!=0; { local $/=undef; $fetcherr = ; } !FETCHERR->error or fail "read pipe from fetch/clone: $!"; @@ -249,7 +296,7 @@ logm 'info', "timeout=$fetchtimeout"; if ($fetch >= 2) { gitfail $fetchfail; } else { - logm 'info', "$specpath fetch/clone failed: $fetchfail"; + servinfo "fetch/clone failed: $fetchfail"; } } @@ -265,21 +312,91 @@ logm 'info', "timeout=$fetchtimeout"; gitfail "no cached data, and not cloned: $fetchfail"; } - logm 'info', "$specpath sharing"; + servinfo "sharing"; flock LOCK, LOCK_UN or fail "unlock $lock: $!"; flock LOCK, LOCK_SH or fail "lock shared $lock: $!"; # actually, just relocking as shared would have the same semantics # but it's best to be explicit if (chdir $gitd) { - last; + return 1; } $!==ENOENT or fail "chdir $gitd: $!"; # Well, err, someone must have taken the lock in between # and garbage collected it. How annoying. + return 0; +} + +sub housekeeping () { + foreach $lock (<[a-z]*\\.lock>) { + if (!lstat $lock) { + $! == ENOENT or fail "housekeeping: $lock: stat: $!"; + next; + } + if (-M _ <= $treeexpiredays) { + logm 'debug', "housekeeping: $lock: not too old"; + next; + } + my $subdir = $lock; $subdir =~ s/\\.lock$//; + my $ok = 1; + foreach my $suffix (qw(tmp git)) { + my $dir = "${subdir}\\.$suffix"; + my $errs; + remove_tree($dir, { safe=>1, error=>\$errs }); + $ok = 0 if @$errs; + foreach my $err (@$errs) { + logm 'warning', "problem deleting: $err[0]: $err[1]"; + } + } + if ($ok) { + + +sub housekeepingcheck ($$) { + my ($dofork, $force) = @_; + open HLOCK, "+>", "Housekeeping.lock" + or fail "open/create Housekeeping.lock: $!"; + if (!$force) { + if (flock HLOCK, LOCK_EX|LOCK_NB) { + logm 'debug', "housekeeping lock taken, not running"; + close HLOCK; + return 0; + } + } + if ($force) { + logm 'info', "housekeeping forced"; + } elsif (!lstat "Housekeeping.stamp") { + $! == ENOENT or fail "stat housekeeping.stamp: $!"; + logm 'info', "housekeeping stamp missing, will run"; + } elsif (-M _ <= $housekeepingthreshdays) { + logm 'debug', "housekeeping done recently"; + close HLOCK; + return 0; + } + if ($dofork) { + my $child = fork; + defined $child or fail "fork for housekeeping: $!"; + if (!$child) { + housekeeping(); + exit 0; + } + return 1; + } else { + housekeeping(); + return 1; + } +} + +sub runcommand () { + servinfo "servicing"; + exec qw(git-upload-pack --strict --timeout=1000 .) + or fail "exec git-upload-pack: $!"; +} + +sub daemonservice () { + readcommand(); + while (!clonefetch()) { } + runcommand(); } -logm 'info', "$specpath servicing"; -exec qw(git-upload-pack --strict --timeout=1000 .) - or fail "exec git-upload-pack: $!"; +daemonservice();