X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?p=chiark-utils.git;a=blobdiff_plain;f=scripts%2Fgit-cache-proxy;h=2f38de1154be164d316d7fb06579a22841d521c1;hp=d8ef34df617349c50e5dec41e29770e9ede32928;hb=eede02a315ee2d05bfada0e62f5a9746046d3492;hpb=4d1cbdd9af99eeeefa99c2f3ef9a9bd1db52f8c2 diff --git a/scripts/git-cache-proxy b/scripts/git-cache-proxy old mode 100644 new mode 100755 index d8ef34d..2f38de1 --- a/scripts/git-cache-proxy +++ b/scripts/git-cache-proxy @@ -1,10 +1,54 @@ -#!/usr/bin/perl +#!/usr/bin/perl -w # # git caching proxy -# + +# Suitable only for exposing to semi-trusted clients: clients are not +# supposed to be able to take over the server. However, clients can +# probably deny service to each other because the current +# implementation is not very good at handling various out-of-course +# situations (notably, clients which are too slow). + # usage: run it on some port, and then clone or fetch -# git://:/ +# "git://:/[ ]" # where is http:///... or git:///... +# and is zero or more (whitespace-separated) of +# [] will be ignored if not recognised +# {} error if not recognised +# options currently known: +# fetch=must fail if the fetch/clone from upstream fails +# fetch=no just use what is in the cache +# fetch=try use what is in the cache if the fetch/clone fails +# timeout= length of time to allow for fetch/clone + +# example inetd.conf line: +# 9419 stream tcp nowait git-cache /usr/bin/git-cache-proxy git-cache-proxy +# you'll need to +# adduser git-cache +# mkdir /var/cache/git-cache-proxy +# chown git-cache /var/cache/git-cache-proxy + +# git-cache-proxy +# Copyright 2010 Tony Finch +# Copyright 2013 Ian Jackson +# +# git-cache-proxy is free software; you can redistribute it and/or +# modify them under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3, or (at +# your option) any later version. +# +# git-cache-proxy is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, consult the Free Software Foundation's +# website at www.fsf.org, or the GNU Project website at www.gnu.org. +# +# (Some code taken from userv-utils's git-daemon.in and git-service.in +# which were written by Tony Finch and subsequently +# heavily modified by Ian Jackson +# and were released under CC0 1.0. The whole program is now GPLv3+.) use strict; use warnings; @@ -12,11 +56,26 @@ use warnings; use POSIX; use Socket; use Sys::Syslog; +use Fcntl qw(:flock SEEK_SET); +use File::Path qw(remove_tree); our $us = 'git-cache-proxy'; -our $log; # filehandle (ref), or "1" meaning syslog + +our $debug = 0; +our $housekeepingeverydays = 1; +our $treeexpiredays = 21; our $fetchtimeout = 1800; our $maxfetchtimeout = 3600; +our $cachedir = '/var/cache/git-cache-proxy'; +our $housekeepingonly = 0; + +#---------- error handling and logging ---------- + +# This is a bit fiddly, because we want to catch errors sent to stderr +# and dump them to syslog if we can, but only if we are running as an +# inetd service. + +our $log; # filehandle (ref), or "1" meaning syslog sub ntoa { my $sockaddr = shift; @@ -26,8 +85,8 @@ sub ntoa { return ("[$addr]:$port",$addr,$port); } -our ($client,$client_addr,$client_port) = ntoa getpeername STDIN; -our ($server,$server_addr,$server_port) = ntoa getsockname STDIN; +our ($client) = ntoa getpeername STDIN; +our ($server) = ntoa getsockname STDIN; sub ensurelog () { return if $log; @@ -35,39 +94,64 @@ sub ensurelog () { $log = 1; } -sub log ($) { +sub logm ($$) { my ($pri, $msg) = @_; + return if $pri eq 'debug' && !$debug; + if ($client eq '(local)') { + print STDERR "$us: $pri: $msg\n" or die $!; + return; + } ensurelog(); my $mainmsg = sprintf "%s-%s: %s", $server, $client, $msg; if (ref $log) { my $wholemsg = sprintf("%s [%d] %s: %s\n", strftime("%Y-%m-%d %H:%M:%S Z", gmtime), $$, - $pri, + $pri eq 'err' ? 'error' : $pri, $mainmsg); print $log $wholemsg; } else { - syslog $pri, $mainmsg; + syslog $pri, "%s", "$pri $mainmsg"; + } +} + +if ($client ne '(local)') { + open STDERR, ">/dev/null" or exit 255; + open TEMPERR, "+>", undef or exit 255; + open STDERR, ">&TEMPERR" or exit 255; +} + +END { + if ($client ne '(local)') { + if ($?) { logm 'crit', "crashing ($?)"; } + seek TEMPERR, 0, SEEK_SET; + while () { + chomp; + logm 'crit', $_; + } } + exit $?; } sub fail ($) { my ($msg) = @_; - log 'error', $msg; - exit 1; + logm 'err', $msg; + exit 0; } sub gitfail ($) { my ($msg) = @_; - log 'warning', $msg; + close LOCK; + alarm 60; + logm 'notice', $msg; my $gitmsg = "ERR $us: $msg"; $gitmsg = substr($gitmsg,0,65535); # just in case printf "%04x%s", length($gitmsg)+4, $gitmsg; flush STDOUT; - exit 1; + exit 0; } -our $cachedir = '/var/cache/git-cache-proxy'; +#---------- argument parsing ---------- for (;;) { last unless @ARGV; @@ -75,14 +159,25 @@ for (;;) { $_ = shift @ARGV; for (;;) { last unless m/^-./; - if (s/^-L(.*)$//) { + if (s/^-H/-/) { + $housekeepingonly++; + } elsif (s/^-D/-/) { + $debug++; + } elsif (s/^-L(.*)$//) { my $logfile = $_; open STDERR, ">>", $logfile or fail "open $logfile: $!"; $log = \*STDERR; } elsif (s/^-d(.*)$//) { $cachedir = $1; - } elsif (s/^--(maxfetchtimeout|fetchtimeout)=(\d+)$//) { - ${ $::{$1} } = $2; + } elsif (s/^--( max-fetch-timeout + | fetch-timeout + | tree-expire-days + | housekeeping-interval-days + )=(\d+)$//x) { + my $vn = $1; + $vn =~ y/-//d; + die $vn unless defined ${ $::{$vn} }; + ${ $::{$vn} } = $2; } else { fail "bad usage: unknown option `$_'"; } @@ -91,140 +186,302 @@ for (;;) { !@ARGV or fail "bad usage: no non-option arguments permitted"; -chdir $cachedir or fail "chdir $cachedir: $!"; - -our ($service,$specpath,$spechost); +#---------- utility functions ---------- -$SIG{ALRM} = sub { fail "timeout" }; -alarm 30; +sub lockfile ($$$) { + my ($fh, $fn, $flockmode) = @_; + my $what = $fn.(($flockmode & ~LOCK_NB) == LOCK_SH ? " (shared)" : ""); + for (;;) { + close $fh; + open $fh, '+>', $fn or fail "open/create $fn for lock: $!"; + logm 'debug', "lock $what: acquiring"; + if (!flock $fh, $flockmode) { + if ($flockmode & LOCK_NB && $! == EWOULDBLOCK) { + return 0; # ok then + } + fail "lock $what: $!"; + } + stat $fh or fail "stat opened $fn: $!"; + my $fh_ino = ((stat _)[1]); + if (!stat $fn) { + $! == ENOENT or fail "stat $fn: $!"; + next; + } + my $fn_ino = ((stat _)[1]); + if ($fn_ino == $fh_ino) { + logm 'debug', "lock $what: acquired"; + return 1; + } + logm 'debug', "lock $what: deleted, need to loop again"; + # oh dear + } +} sub xread { my $length = shift; my $buffer = ""; while ($length > length $buffer) { my $ret = sysread STDIN, $buffer, $length, length $buffer; - fail "Expected $length bytes, got ".length $buffer + fail "expected $length bytes, got ".length $buffer if defined $ret and $ret == 0; fail "read: $!" if not defined $ret and $! != EINTR and $! != EAGAIN; } return $buffer; } -my $hex_len = xread 4; -fail "Bad hex in packet length" unless $hex_len =~ m|^[0-9a-fA-F]{4}$|; -my $line = xread -4 + hex $hex_len; -unless (($service,$specpath,$spechost) = $line =~ - m|^(git-[a-z-]+) /*([!-~]+)\0host=([!-~]+)\0$|) { - $line =~ s|[^ -~]+| |g; - fail "Could not parse \"$line\"" -} - -alarm 0; - -$service eq 'git-upload-pack' - or gitfail "unknown/unsupported service `$service'"; - -my $fetch = 2; # 0:don't; 1:try; 2:force -my $url = $specpath; - -while ($url =~ s#^(\[)([^][{}])+\]## || - $url =~ s#^(\{)([^][{}])+\}##) { - $_ = $2; - my $must = $1 eq '{'; - if (m/^relaxed$/) { - $fetch = 1; - } elsif (m/^nofetch$/) { - $fetch = 0; - } elsif (m/^timeout=(\d+)$/) { - $fetchtimeout = $1 <= $maxfetchtimeout ? $1 : $maxfetchtimeout; - } elsif ($must) { - gitfail "unknown/unsupported option `$_'"; - } + +#---------- main program ---------- + +chdir $cachedir or fail "chdir $cachedir: $!"; + +our ($service,$specpath,$spechost,$subdir); +our ($tmpd,$gitd,$lock); +our ($fetch,$url); + +sub servinfo ($) { + my ($msg) = @_; + logm 'info', "service `$specpath': $msg"; } -$url =~ m{^(?:https?|git)://[-.0-9a-z]+/} - or gitfail "unknown/unsupported url scheme or format `$url'"; +sub readcommand () { + $SIG{ALRM} = sub { fail "timeout" }; + alarm 30; + + my $hex_len = xread 4; + fail "Bad hex in packet length" unless $hex_len =~ m|^[0-9a-fA-F]{4}$|; + my $line = xread -4 + hex $hex_len; + unless (($service,$specpath,$spechost) = $line =~ + m|^(git-[a-z-]+) /*([!-~ ]+)\0host=([!-~]+)\0$|) { + $line =~ s|[^ -~]+| |g; + gitfail "unknown/unsupported instruction `$line'" + } -our $subdir = $url; -$subdir =~ s|\\|\\\\|g; -$subdir =~ s|,|\\,|g; -$subdir =~ s|/|,|g; + alarm 0; + + $service eq 'git-upload-pack' + or gitfail "unknown/unsupported service `$service'"; + + $fetch = 2; # 0:don't; 1:try; 2:force + $url = $specpath; + + while ($url =~ s#\s+(\[)([^][{}]+)\]$## || + $url =~ s#\s+(\{)([^][{}]+)\}$##) { + $_ = $2; + my $must = $1 eq '{'; + if (m/^fetch=try$/) { + $fetch = 1; + } elsif (m/^fetch=no$/) { + $fetch = 0; + } elsif (m/^fetch=must$/) { + $fetch = 2; # the default + } elsif (m/^timeout=(\d+)$/ && $1 >= 1) { + $fetchtimeout = $1 <= $maxfetchtimeout ? $1 : $maxfetchtimeout; + } elsif ($must) { + gitfail "unknown/unsupported option `$_'"; + } + } -log 'info', "$specpath locking"; + $url =~ m{^(?:https?|git)://[-.0-9a-z]+/} + or gitfail "unknown/unsupported url scheme or format `$url'"; -my $tmpd= "$subdir\\.tmp"; -my $gitd= "$subdir\\.git"; -my $lock = "$subdir\\.lock"; + $subdir = $url; + $subdir =~ s|\\|\\\\|g; + $subdir =~ s|,|\\,|g; + $subdir =~ s|/|,|g; -open LOCK, "+>", $lock or fail "open/create $lock: $!"; -flock LOCK, LOCK_EX or fail "lock $lock: $!"; + $tmpd= "$subdir\\.tmp"; + $gitd= "$subdir\\.git"; + $lock = "$subdir\\.lock"; -$exists = stat $gitd; -$exists or $!==ENOENT or FAIL "stat $gitd: $!"; + servinfo "locking"; +} -if ($fetch) { +sub clonefetch () { + lockfile \*LOCK, $lock, LOCK_EX; - our @cmd; + my $exists = lstat $gitd; + $exists or $!==ENOENT or fail "lstat $gitd: $!"; - if (!$exists) { - system qw(rm -rf --), $tmpd; - @cmd = qw(git clone -q --mirror), $url; - log 'info', "$specpath cloning"; - } else { - @cmd = qw(git remote update --prune), $url; - log 'info', "$specpath fetching"; - } + our $fetchfail = ''; - my $child = open P, "-|"; - defined $child or fail "fork: $!"; - if (!$child) { - if ($exists) { - chdir $gitd or fail "chdir $gitd: $!"; - } - setpgrp or fail "setpgrp: $!"; - open STDERR, ">&STDOUT" or fail "redirect stderr: $!"; - exec @cmd or fail "exec $cmd[0]: $!"; - } + if ($fetch) { - local $SIG{ALARM} = sub { - kill 9, -$child; - - } or log 'crit', - }; + our @cmd; - alarm($fetchtimeout); - + if (!$exists) { + system qw(rm -rf --), $tmpd; + @cmd = (qw(git clone -q --mirror), $url, $tmpd); + servinfo "cloning"; + } else { + @cmd = (qw(git remote update --prune)); + servinfo "fetching"; + } + my $cmd = "@cmd[0..1]"; + + my $child = open FETCHERR, "-|"; + defined $child or fail "fork: $!"; + if (!$child) { + if ($exists) { + chdir $gitd or fail "chdir $gitd: $!"; + } + setpgrp or fail "setpgrp: $!"; + open STDERR, ">&STDOUT" or fail "redirect stderr: $!"; + exec @cmd or fail "exec $cmd[0]: $!"; + } -printf STDERR "%s [$$] %s %s\n", - strftime("%Y-%m-%d %H:%M:%S %Z", localtime), $server, $client; + my $fetcherr = ''; + my $timedout = 0; + { + local $SIG{ALRM} = sub { + servinfo "fetch/clone timeout"; + $timedout=1; kill 9, -$child; + }; + alarm($fetchtimeout); + $!=0; { local $/=undef; $fetcherr = ; } + !FETCHERR->error or fail "read pipe from fetch/clone: $!"; + alarm(10); + } + kill -9, $child or fail "kill fetch/clone: $!"; + $!=0; $?=0; if (!close FETCHERR) { + fail "reap fetch/clone: $!" if $!; + my $fetchfail = + !($? & 255) ? "$cmd died with error exit code ".($? >> 8) : + $? != 9 ? "$cmd died due to fatal signa, status $?" : + $timedout ? "$cmd timed out (${fetchtimeout}s)" : + "$cmd died due to unexpected SIGKILL"; + if (length $fetcherr) { + $fetchfail .= "\n$fetcherr"; + $fetchfail =~ s/\n$//; + $fetchfail =~ s{\n}{ // }g; + } + if ($fetch >= 2) { + gitfail $fetchfail; + } else { + servinfo "fetch/clone failed: $fetchfail"; + } + } + if (!$exists) { + rename $tmpd, $gitd or fail "rename fresh $tmpd to $gitd: $!"; + $exists = 1; + } + } else { + $fetchfail = 'not attempted'; + } + if (!$exists) { + gitfail "no cached data, and not cloned: $fetchfail"; + } + servinfo "sharing"; + lockfile \*LOCK, $lock, LOCK_SH; # NB releases and relocks -$service eq ' + if (stat $gitd) { + return 1; + } + $!==ENOENT or fail "stat $gitd: $!"; -@@READ_URLMAP@@ + # Well, err, someone must have taken the lock in between + # and garbage collected it. How annoying. + return 0; +} -fail "No global mapping for $uri" unless defined $serve_user; +sub hkfail ($) { my ($msg) = @_; fail "housekeeping: $msg"; } -my ($hn,$ha,$at,$naddrs,@addrs) = gethostbyname $spechost; -fail "hostname/address mismatch ($spechost $server_addr)" unless grep { - $server_addr eq inet_ntoa $_ - } @addrs; +sub housekeeping () { + logm 'info', "housekeeping started"; + foreach $lock (<[a-z]*\\.lock>) { + my $subdir = $lock; $subdir =~ s/\\.lock$//; + if (!lstat $lock) { + $! == ENOENT or hkfail "$lock: lstat: $!"; + next; + } + if (-M _ <= $treeexpiredays) { + logm 'debug', "housekeeping: subdirs $subdir: touched recently"; + next; + } + if (!lockfile \*LOCK, $lock, LOCK_EX|LOCK_NB) { + logm 'info', "housekeeping: subdirs $subdir: lock busy, skipping"; + next; + } + logm 'info', "housekeeping: subdirs $subdir: cleaning"; + eval { + foreach my $suffix (qw(tmp git)) { + my $dir = "${subdir}\\.$suffix"; + my $errs; + remove_tree($dir, { safe=>1, error=>\$errs }); + if (stat $dir) { + foreach my $err (@$errs) { + my ($file, $message) = %$err; + logm 'info', "problem deleting: $file: $message"; + } + die "$dir: problem deleting file(s)\n"; + } + } + }; + if (length $@) { + chomp $@; + logm 'warning', "housekeeping: $subdir: cleanup prevented: $@"; + } else { + unlink $lock or hkfail "remove $lock: $!"; + } + } + open HS, ">", "Housekeeping.stamp" or hkfail "touch Housekeeping.stamp: $!"; + close HS or hkfail "close Housekeeping.stamp: $!"; + logm 'info', "housekeeping finished"; +} -our @opts; +sub housekeepingcheck ($$) { + my ($dofork, $force) = @_; + if (!$force) { + if (!lockfile \*HLOCK, "Housekeeping.lock", LOCK_EX|LOCK_NB) { + logm 'debug', "housekeeping lock taken, not running"; + close HLOCK; + return 0; + } + } + if ($force) { + logm 'info', "housekeeping forced"; + } elsif (!lstat "Housekeeping.stamp") { + $! == ENOENT or fail "lstat Housekeeping.stamp: $!"; + logm 'info', "housekeeping not done yet, will run"; + } elsif (-M _ <= $housekeepingeverydays) { + logm 'debug', "housekeeping done recently"; + close HLOCK; + return 0; + } + if ($dofork) { + my $child = fork; + defined $child or hkfail "fork: $!"; + if (!$child) { + housekeeping(); + exit 0; + } + } else { + housekeeping(); + } + close HLOCK; + return 1; +} -push @opts, "-D$_=${$::{$_}}" - for qw(service specpath spechost - client client_addr client_port - server server_addr server_port); +sub runcommand () { + servinfo "serving"; -fail "no user $serve_user" unless getpwnam($serve_user); + chdir $gitd or fail "chdir $gitd: $!"; -syslog 'notice', "$client $service $uri $serve_user"; + exec qw(git-upload-pack --strict --timeout=1000 .) + or fail "exec git-upload-pack: $!"; +} -my @cmd = ('userv', '-t300', @opts, $serve_user, $service); -no warnings; # suppress errors to stderr -exec @cmd or fail "exec userv: $!"; +sub daemonservice () { + readcommand(); + while (!clonefetch()) { } + housekeepingcheck(1,0); + runcommand(); +} -# end +if ($housekeepingonly) { + housekeepingcheck(0, $housekeepingonly>=2); +} else { + daemonservice(); +}