X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?p=chiark-utils.git;a=blobdiff_plain;f=scripts%2Fgit-cache-proxy;h=647f6b60d3c152dae98a16240f0bf9181dcee86b;hp=df522a0126238d40d97a0c342ea7e22910269bd5;hb=a14cb06ac8fff27e407a39993b2b20f9a7109f7c;hpb=18cb26fc56f3384564491dc155c104639695d550 diff --git a/scripts/git-cache-proxy b/scripts/git-cache-proxy index df522a0..647f6b6 100755 --- a/scripts/git-cache-proxy +++ b/scripts/git-cache-proxy @@ -2,6 +2,12 @@ # # git caching proxy +# Suitable only for exposing to semi-trusted clients: clients are not +# supposed to be able to take over the server. However, clients can +# probably deny service to each other because the current +# implementation is not very good at handling various out-of-course +# situations (notably, clients which are too slow). + # usage: run it on some port, and then clone or fetch # "git://:/[ ]" # where is http:///... or git:///... @@ -14,11 +20,23 @@ # fetch=try use what is in the cache if the fetch/clone fails # timeout= length of time to allow for fetch/clone +# example inetd.conf line: +# 9419 stream tcp nowait git-cache /usr/bin/git-cache-proxy git-cache-proxy +# you'll need to +# adduser git-cache +# mkdir /var/cache/git-cache-proxy +# chown git-cache /var/cache/git-cache-proxy + +# git-cache-proxy +# Copyright 2010 Tony Finch +# Copyright 2013,2014 Ian Jackson +# Copyright 2017 Citrix +# # git-cache-proxy is free software; you can redistribute it and/or # modify them under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 3, or (at # your option) any later version. -# +# # git-cache-proxy is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU @@ -40,9 +58,18 @@ use POSIX; use Socket; use Sys::Syslog; use Fcntl qw(:flock SEEK_SET); +use File::Path qw(remove_tree); our $us = 'git-cache-proxy'; +our $debug = 0; +our $housekeepingeverydays = 1; +our $treeexpiredays = 21; +our $fetchtimeout = 1800; +our $maxfetchtimeout = 3600; +our $cachedir = '/var/cache/git-cache-proxy'; +our $housekeepingonly = 0; + #---------- error handling and logging ---------- # This is a bit fiddly, because we want to catch errors sent to stderr @@ -70,9 +97,10 @@ sub ensurelog () { sub logm ($$) { my ($pri, $msg) = @_; + return if $pri eq 'debug' && !$debug; if ($client eq '(local)') { print STDERR "$us: $pri: $msg\n" or die $!; - exit 1; + return; } ensurelog(); my $mainmsg = sprintf "%s-%s: %s", $server, $client, $msg; @@ -80,11 +108,11 @@ sub logm ($$) { my $wholemsg = sprintf("%s [%d] %s: %s\n", strftime("%Y-%m-%d %H:%M:%S Z", gmtime), $$, - $pri, + $pri eq 'err' ? 'error' : $pri, $mainmsg); print $log $wholemsg; } else { - syslog $pri, $mainmsg; + syslog $pri, "%s", "$pri $mainmsg"; } } @@ -126,24 +154,31 @@ sub gitfail ($) { #---------- argument parsing ---------- -our $fetchtimeout = 1800; -our $maxfetchtimeout = 3600; -our $cachedir = '/var/cache/git-cache-proxy'; - for (;;) { last unless @ARGV; last unless $ARGV[0] =~ m/^-/; $_ = shift @ARGV; for (;;) { last unless m/^-./; - if (s/^-L(.*)$//) { + if (s/^-H/-/) { + $housekeepingonly++; + } elsif (s/^-D/-/) { + $debug++; + } elsif (s/^-L(.*)$//) { my $logfile = $_; open STDERR, ">>", $logfile or fail "open $logfile: $!"; $log = \*STDERR; } elsif (s/^-d(.*)$//) { $cachedir = $1; - } elsif (s/^--(maxfetchtimeout|fetchtimeout)=(\d+)$//) { - ${ $::{$1} } = $2; + } elsif (s/^--( max-fetch-timeout + | fetch-timeout + | tree-expire-days + | housekeeping-interval-days + )=(\d+)$//x) { + my $vn = $1; + $vn =~ y/-//d; + die $vn unless defined ${ $::{$vn} }; + ${ $::{$vn} } = $2; } else { fail "bad usage: unknown option `$_'"; } @@ -152,80 +187,120 @@ for (;;) { !@ARGV or fail "bad usage: no non-option arguments permitted"; -#---------- main program ---------- - -chdir $cachedir or fail "chdir $cachedir: $!"; - -our ($service,$specpath,$spechost); +#---------- utility functions ---------- -$SIG{ALRM} = sub { fail "timeout" }; -alarm 30; +sub lockfile ($$$) { + my ($fh, $fn, $flockmode) = @_; + my $what = $fn.(($flockmode & ~LOCK_NB) == LOCK_SH ? " (shared)" : ""); + for (;;) { + close $fh; + open $fh, '+>', $fn or fail "open/create $fn for lock: $!"; + logm 'debug', "lock $what: acquiring"; + if (!flock $fh, $flockmode) { + if ($flockmode & LOCK_NB && $! == EWOULDBLOCK) { + return 0; # ok then + } + fail "lock $what: $!"; + } + stat $fh or fail "stat opened $fn: $!"; + my $fh_ino = ((stat _)[1]); + if (!stat $fn) { + $! == ENOENT or fail "stat $fn: $!"; + next; + } + my $fn_ino = ((stat _)[1]); + if ($fn_ino == $fh_ino) { + logm 'debug', "lock $what: acquired"; + return 1; + } + logm 'debug', "lock $what: deleted, need to loop again"; + # oh dear + } +} sub xread { my $length = shift; my $buffer = ""; while ($length > length $buffer) { my $ret = sysread STDIN, $buffer, $length, length $buffer; - fail "Expected $length bytes, got ".length $buffer + fail "expected $length bytes, got ".length $buffer if defined $ret and $ret == 0; fail "read: $!" if not defined $ret and $! != EINTR and $! != EAGAIN; } return $buffer; } -my $hex_len = xread 4; -fail "Bad hex in packet length" unless $hex_len =~ m|^[0-9a-fA-F]{4}$|; -my $line = xread -4 + hex $hex_len; -unless (($service,$specpath,$spechost) = $line =~ - m|^(git-[a-z-]+) /*([!-~ ]+)\0host=([!-~]+)\0$|) { - $line =~ s|[^ -~]+| |g; - gitfail "unknown/unsupported instruction `$line'" + +#---------- main program ---------- + +chdir $cachedir or fail "chdir $cachedir: $!"; + +our ($service,$specpath,$spechost,$subdir); +our ($tmpd,$gitd,$lock); +our ($fetch,$url); + +sub servinfo ($) { + my ($msg) = @_; + logm 'info', "service `$specpath': $msg"; } -alarm 0; - -$service eq 'git-upload-pack' - or gitfail "unknown/unsupported service `$service'"; - -my $fetch = 2; # 0:don't; 1:try; 2:force -my $url = $specpath; - -while ($url =~ s#\s+(\[)([^][{}]+)\]$## || - $url =~ s#\s+(\{)([^][{}]+)\}$##) { - $_ = $2; - my $must = $1 eq '{'; - if (m/^fetch=try$/) { - $fetch = 1; - } elsif (m/^fetch=no$/) { - $fetch = 0; - } elsif (m/^fetch=must$/) { - $fetch = 2; # the default - } elsif (m/^timeout=(\d+)$/) { - $fetchtimeout = $1 <= $maxfetchtimeout ? $1 : $maxfetchtimeout; - } elsif ($must) { - gitfail "unknown/unsupported option `$_'"; +sub readcommand () { + $SIG{ALRM} = sub { fail "timeout" }; + alarm 30; + + my $hex_len = xread 4; + fail "Bad hex in packet length" unless $hex_len =~ m|^[0-9a-fA-F]{4}$|; + my $line = xread -4 + hex $hex_len; + unless (($service,$specpath,$spechost) = $line =~ + m|^(git-[a-z-]+) /*([!-~ ]+)\0host=([!-~]+)\0$|) { + $line =~ s|[^ -~]+| |g; + gitfail "unknown/unsupported instruction `$line'" } -} -$url =~ m{^(?:https?|git)://[-.0-9a-z]+/} - or gitfail "unknown/unsupported url scheme or format `$url'"; + alarm 0; + + $service eq 'git-upload-pack' + or gitfail "unknown/unsupported service `$service'"; + + $fetch = 2; # 0:don't; 1:try; 2:force + $url = $specpath; + + while ($url =~ s#\s+(\[)([^][{}]+)\]$## || + $url =~ s#\s+(\{)([^][{}]+)\}$##) { + $_ = $2; + my $must = $1 eq '{'; + if (m/^fetch=try$/) { + $fetch = 1; + } elsif (m/^fetch=no$/) { + $fetch = 0; + } elsif (m/^fetch=must$/) { + $fetch = 2; # the default + } elsif (m/^timeout=(\d+)$/ && $1 >= 1) { + $fetchtimeout = $1 <= $maxfetchtimeout ? $1 : $maxfetchtimeout; + } elsif ($must) { + gitfail "unknown/unsupported option `$_'"; + } + } -our $subdir = $url; -$subdir =~ s|\\|\\\\|g; -$subdir =~ s|,|\\,|g; -$subdir =~ s|/|,|g; + $url =~ m{^(?:https?|git)://[-.0-9a-z]+/} + or gitfail "unknown/unsupported url scheme or format `$url'"; -logm 'info', "$specpath locking"; + $subdir = $url; + $subdir =~ s|\\|\\\\|g; + $subdir =~ s|,|\\,|g; + $subdir =~ s|/|,|g; -my $tmpd= "$subdir\\.tmp"; -my $gitd= "$subdir\\.git"; -my $lock = "$subdir\\.lock"; + $tmpd= "$subdir\\.tmp"; + $gitd= "$subdir\\.git"; + $lock = "$subdir\\.lock"; -for (;;) { - open LOCK, "+>", $lock or fail "open/create $lock: $!"; - flock LOCK, LOCK_EX or fail "lock exclusive $lock: $!"; + servinfo "locking"; +} + +sub clonefetch () { + lockfile \*LOCK, $lock, LOCK_EX; - my $exists = stat $gitd; - $exists or $!==ENOENT or fail "stat $gitd: $!"; + my $exists = lstat $gitd; + $exists or $!==ENOENT or fail "lstat $gitd: $!"; our $fetchfail = ''; @@ -236,10 +311,10 @@ for (;;) { if (!$exists) { system qw(rm -rf --), $tmpd; @cmd = (qw(git clone -q --mirror), $url, $tmpd); - logm 'info', "$specpath cloning @cmd"; + servinfo "cloning"; } else { @cmd = (qw(git remote update --prune)); - logm 'info', "$specpath fetching @cmd"; + servinfo "fetching"; } my $cmd = "@cmd[0..1]"; @@ -258,10 +333,9 @@ for (;;) { my $timedout = 0; { local $SIG{ALRM} = sub { - logm 'info', "$specpath fetch/clone timeout"; + servinfo "fetch/clone timeout"; $timedout=1; kill 9, -$child; }; -logm 'info', "timeout=$fetchtimeout"; alarm($fetchtimeout); $!=0; { local $/=undef; $fetcherr = ; } !FETCHERR->error or fail "read pipe from fetch/clone: $!"; @@ -284,9 +358,10 @@ logm 'info', "timeout=$fetchtimeout"; if ($fetch >= 2) { gitfail $fetchfail; } else { - logm 'info', "$specpath fetch/clone failed: $fetchfail"; + servinfo "fetch/clone failed: $fetchfail"; } } + alarm 0; if (!$exists) { rename $tmpd, $gitd or fail "rename fresh $tmpd to $gitd: $!"; @@ -300,21 +375,121 @@ logm 'info', "timeout=$fetchtimeout"; gitfail "no cached data, and not cloned: $fetchfail"; } - logm 'info', "$specpath sharing"; - flock LOCK, LOCK_UN or fail "unlock $lock: $!"; - flock LOCK, LOCK_SH or fail "lock shared $lock: $!"; - # actually, just relocking as shared would have the same semantics - # but it's best to be explicit + servinfo "sharing"; + lockfile \*LOCK, $lock, LOCK_SH; # NB releases and relocks - if (chdir $gitd) { - last; + if (stat $gitd) { + return 1; } - $!==ENOENT or fail "chdir $gitd: $!"; + $!==ENOENT or fail "stat $gitd: $!"; # Well, err, someone must have taken the lock in between # and garbage collected it. How annoying. + return 0; +} + +sub hkfail ($) { my ($msg) = @_; fail "housekeeping: $msg"; } + +sub housekeeping () { + logm 'info', "housekeeping started"; + foreach $lock (<[a-z]*\\.lock>) { + my $subdir = $lock; $subdir =~ s/\\.lock$//; + if (!lstat $lock) { + $! == ENOENT or hkfail "$lock: lstat: $!"; + next; + } + if (-M _ <= $treeexpiredays) { + logm 'debug', "housekeeping: subdirs $subdir: touched recently"; + next; + } + if (!lockfile \*LOCK, $lock, LOCK_EX|LOCK_NB) { + logm 'info', "housekeeping: subdirs $subdir: lock busy, skipping"; + next; + } + logm 'info', "housekeeping: subdirs $subdir: cleaning"; + eval { + foreach my $suffix (qw(tmp git)) { + my $dir = "${subdir}\\.$suffix"; + my $tdir = "${subdir}\\.tmp"; + if ($dir ne $tdir) { + if (!rename $dir,$tdir) { + next if $! == ENOENT; + die "$dir: cannot rename to $tdir: $!\n"; + } + } + system qw(rm -rf --), $tdir; + if (stat $tdir) { + die "$dir: problem deleting file(s), rm exited $?\n"; + } elsif ($! != ENOENT) { + die "$tdir: cannot stat after deletion: $!\n"; + } + } + }; + if (length $@) { + chomp $@; + logm 'warning', "housekeeping: $subdir: cleanup prevented: $@"; + } else { + unlink $lock or hkfail "remove $lock: $!"; + } + } + open HS, ">", "Housekeeping.stamp" or hkfail "touch Housekeeping.stamp: $!"; + close HS or hkfail "close Housekeeping.stamp: $!"; + logm 'info', "housekeeping finished"; } -logm 'info', "$specpath servicing"; -exec qw(git-upload-pack --strict --timeout=1000 .) - or fail "exec git-upload-pack: $!"; +sub housekeepingcheck ($$) { + my ($dofork, $force) = @_; + if (!$force) { + if (!lockfile \*HLOCK, "Housekeeping.lock", LOCK_EX|LOCK_NB) { + logm 'debug', "housekeeping lock taken, not running"; + close HLOCK; + return 0; + } + } + if ($force) { + logm 'info', "housekeeping forced"; + } elsif (!lstat "Housekeeping.stamp") { + $! == ENOENT or fail "lstat Housekeeping.stamp: $!"; + logm 'info', "housekeeping not done yet, will run"; + } elsif (-M _ <= $housekeepingeverydays) { + logm 'debug', "housekeeping done recently"; + close HLOCK; + return 0; + } + if ($dofork) { + my $child = fork; + defined $child or fail "fork: $!"; + if (!$child) { + open STDERR, "|logger -p daemon.warning -t '$us(housekeeping)'" + or die "fork: logger $!"; + housekeeping(); + exit 0; + } + } else { + housekeeping(); + } + close HLOCK; + return 1; +} + +sub runcommand () { + servinfo "serving"; + + chdir $gitd or fail "chdir $gitd: $!"; + + exec qw(git-upload-pack --strict --timeout=1000 .) + or fail "exec git-upload-pack: $!"; +} + +sub daemonservice () { + readcommand(); + while (!clonefetch()) { } + housekeepingcheck(1,0); + runcommand(); +} + +if ($housekeepingonly) { + housekeepingcheck(0, $housekeepingonly>=2); +} else { + daemonservice(); +}