#
# git caching proxy
+# Suitable only for exposing to semi-trusted clients: clients are not
+# supposed to be able to take over the server. However, clients can
+# probably deny service to each other because the current
+# implementation is not very good at handling various out-of-course
+# situations (notably, clients which are too slow).
+
# usage: run it on some port, and then clone or fetch
# "git://<realhost>:<realport>/<real-git-url>[ <options>]"
# where <real-git-url> is http://<host>/... or git://<host>/...
# fetch=no just use what is in the cache
# fetch=try use what is in the cache if the fetch/clone fails
# timeout=<seconds> length of time to allow for fetch/clone
-
+# housekeeping-interval-days=<integer> } housekeeping tuning parameters
+# tree-expire-days=<integer> }
+# gc-interval-days=<integer> }
+
+# example inetd.conf line:
+# 9419 stream tcp nowait git-cache /usr/bin/git-cache-proxy git-cache-proxy
+# you'll need to
+# adduser git-cache
+# mkdir /var/cache/git-cache-proxy
+# chown git-cache /var/cache/git-cache-proxy
+
+# git-cache-proxy
+# Copyright 2010 Tony Finch
+# Copyright 2013,2014 Ian Jackson
+# Copyright 2017 Citrix
+#
# git-cache-proxy is free software; you can redistribute it and/or
# modify them under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 3, or (at
# your option) any later version.
-#
+#
# git-cache-proxy is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
our $us = 'git-cache-proxy';
+our $debug = 0;
+our $housekeepingeverydays = 1;
+our $gcintervaldays = 10;
+our $treeexpiredays = 21;
+our $fetchtimeout = 3600;
+our $maxfetchtimeout = 7200;
+our $servetimeout = 3600;
+our $cachedir = '/var/cache/git-cache-proxy';
+our $housekeepingonly = 0;
+
#---------- error handling and logging ----------
# This is a bit fiddly, because we want to catch errors sent to stderr
sub logm ($$) {
my ($pri, $msg) = @_;
+ return if $pri eq 'debug' && !$debug;
if ($client eq '(local)') {
print STDERR "$us: $pri: $msg\n" or die $!;
- exit 1;
+ return;
}
ensurelog();
my $mainmsg = sprintf "%s-%s: %s", $server, $client, $msg;
my $wholemsg = sprintf("%s [%d] %s: %s\n",
strftime("%Y-%m-%d %H:%M:%S Z", gmtime),
$$,
- $pri,
+ $pri eq 'err' ? 'error' : $pri,
$mainmsg);
print $log $wholemsg;
} else {
- syslog $pri, $mainmsg;
+ syslog $pri, "%s", "$pri $mainmsg";
}
}
exit 0;
}
+$SIG{ALRM} = sub { fail "timeout" };
+
sub gitfail ($) {
my ($msg) = @_;
close LOCK;
#---------- argument parsing ----------
-our $housekeepingthreshdays = 1;
-our $treeexpiredays = 21;
-our $fetchtimeout = 1800;
-our $maxfetchtimeout = 3600;
-our $cachedir = '/var/cache/git-cache-proxy';
-
for (;;) {
last unless @ARGV;
last unless $ARGV[0] =~ m/^-/;
$_ = shift @ARGV;
for (;;) {
last unless m/^-./;
- if (s/^-L(.*)$//) {
+ if (s/^-H/-/) {
+ $housekeepingonly++;
+ } elsif (s/^-D/-/) {
+ $debug++;
+ } elsif (s/^-L(.*)$//) {
my $logfile = $_;
open STDERR, ">>", $logfile or fail "open $logfile: $!";
$log = \*STDERR;
} elsif (s/^-d(.*)$//) {
$cachedir = $1;
- } elsif (s/^--(maxfetchtimeout|fetchtimeout)=(\d+)$//) {
- ${ $::{$1} } = $2;
+ } elsif (s/^--( max-fetch-timeout
+ | fetch-timeout
+ | serve-timeout
+ | tree-expire-days
+ | housekeeping-interval-days
+ | gc-interval-days
+ )=(\d+)$//x) {
+ my $vn = $1;
+ $vn =~ y/-//d;
+ die $vn unless defined ${ $::{$vn} };
+ ${ $::{$vn} } = $2;
} else {
fail "bad usage: unknown option `$_'";
}
#---------- utility functions ----------
-sub lockfile ($$) {
+sub lockfile ($$$) {
my ($fh, $fn, $flockmode) = @_;
+ my $what = $fn.(($flockmode & ~LOCK_NB) == LOCK_SH ? " (shared)" : "");
for (;;) {
close $fh;
open $fh, '+>', $fn or fail "open/create $fn for lock: $!";
- if (!flock $fh, $lockmode) {
- if ($lockmode & LOCK_NB and $! == EWOULDBLOCK) {
+ logm 'debug', "lock $what: acquiring";
+ if (!flock $fh, $flockmode) {
+ if ($flockmode & LOCK_NB && $! == EWOULDBLOCK) {
return 0; # ok then
}
- fail "lock $fn".
- (($flockmode & ~LOCK_NB) == LOCK_SH ? " (shared)" : "").
- ": $!";
+ fail "lock $what: $!";
}
stat $fh or fail "stat opened $fn: $!";
my $fh_ino = ((stat _)[1]);
next;
}
my $fn_ino = ((stat _)[1]);
- return 1 if $fn_ino == $fh_ino;
+ if ($fn_ino == $fh_ino) {
+ logm 'debug', "lock $what: acquired";
+ return 1;
+ }
+ logm 'debug', "lock $what: deleted, need to loop again";
# oh dear
}
}
}
sub readcommand () {
- $SIG{ALRM} = sub { fail "timeout" };
alarm 30;
my $hex_len = xread 4;
$fetch = 0;
} elsif (m/^fetch=must$/) {
$fetch = 2; # the default
- } elsif (m/^timeout=(\d+)$/) {
+ } elsif (m/^timeout=(\d+)$/ && $1 >= 1) {
$fetchtimeout = $1 <= $maxfetchtimeout ? $1 : $maxfetchtimeout;
} elsif ($must) {
gitfail "unknown/unsupported option `$_'";
servinfo "locking";
}
+sub update_gcstamp ($) {
+ my ($gitdir) = (@_);
+ my $gcdone = "$gitdir/cache-proxy-gc.stamp";
+ if (open GCSTAMP, '>', $gcdone) {
+ close GCSTAMP;
+ } else {
+ $!==ENOENT or fail "create $gcdone: $!";
+ }
+}
+
sub clonefetch () {
lockfile \*LOCK, $lock, LOCK_EX;
if ($fetch) {
+ my $rbits = '';
+ vec($rbits,0,1) = 1;
+ my $ebits = $rbits;
+ my $r=select $rbits,undef,$ebits,0;
+ $r>=0 or fail "select recheck STDOUT failed: $!";
+ if ($r) {
+ servinfo 'client disconnected (stdin unexpectedly'.
+ (vec($rbits,0,1) ? ' readable' : '').
+ (vec($ebits,0,1) ? ' exception' : '').
+ ')';
+ exit 0;
+ }
+
our @cmd;
if (!$exists) {
servinfo "fetch/clone failed: $fetchfail";
}
}
+ alarm 0;
if (!$exists) {
+ update_gcstamp($tmpd);
rename $tmpd, $gitd or fail "rename fresh $tmpd to $gitd: $!";
$exists = 1;
}
servinfo "sharing";
lockfile \*LOCK, $lock, LOCK_SH; # NB releases and relocks
- if (chdir $gitd) {
+ if (stat $gitd) {
return 1;
}
- $!==ENOENT or fail "chdir $gitd: $!";
+ $!==ENOENT or fail "stat $gitd: $!";
# Well, err, someone must have taken the lock in between
# and garbage collected it. How annoying.
return 0;
}
+sub hkfail ($) { my ($msg) = @_; fail "housekeeping: $msg"; }
+
sub housekeeping () {
+ logm 'info', "housekeeping started";
foreach $lock (<[a-z]*\\.lock>) {
+ my $subdir = $lock; $subdir =~ s/\\.lock$//;
+ my $gcdone = "$subdir\\.git/cache-proxy-gc.stamp";
if (!lstat $lock) {
- $! == ENOENT or fail "housekeeping: $lock: lstat: $!";
+ $! == ENOENT or hkfail "$lock: lstat: $!";
next;
}
+ my ($mode_what,$mode_locknb,$mode_action);
if (-M _ <= $treeexpiredays) {
- logm 'debug', "housekeeping: $lock: not too old";
- next;
+ my $gccheck = sub {
+ if (!lstat "$gcdone") {
+ $! == ENOENT or hkfail "$gcdone: lstat: $!";
+ return 1, "touched recently, never gc'd!";
+ } elsif (-M _ <= $gcintervaldays) {
+ return 0, "touched recently, gc'd recently";
+ } else {
+ return 1, "touched recently, needs gc";
+ }
+ };
+ my ($needsgc, $gcmsg) = $gccheck->();
+ logm 'debug', "housekeeping: subdirs $subdir: $gcmsg";
+ next unless $needsgc;
+ $mode_what = 'garbage collecting';
+ $mode_locknb = 0;
+ $mode_action = sub {
+ my ($needsgc, $gcmsg) = $gccheck->();
+ if (!$needsgc) {
+ logm 'info',
+ "housekeeping: subdirs $subdir: someone else has gc'd";
+ return;
+ }
+ logm 'debug', "housekeeping: subdirs $subdir: $gcmsg (2)";
+ my $gclog = "$subdir/gc.log";
+ unlink $gclog or $!==ENOENT or hkfail "remove $gclog: $!";
+ my $child = fork // hkfail "fork (for $subdir): $!";
+ if (!$child) {
+ if (!chdir "$subdir\\.git") {
+ exit 0 if $!==ENOENT;
+ die "for gc: chdir $subdir: $!\n";
+ }
+ exec qw(git gc --quiet);
+ die "exec git gc (for $subdir): $!\n";
+ }
+ waitpid($child, 0) == $child or hkfail "waitpid failed! $!";
+ if ($?) {
+ logm 'err',
+ "housekeeping: subdirs $subdir: gc failed (wait status $?)";
+ } else {
+ update_gcstamp("$subdir\\.git");
+ logm 'debug',
+ "housekeeping: subdirs $subdir: gc done";
+ }
+ };
+ } else {
+ $mode_what = 'cleaning';
+ $mode_locknb = LOCK_NB;
+ $mode_action = sub {
+ eval {
+ foreach my $suffix (qw(tmp git)) {
+ my $dir = "${subdir}\\.$suffix";
+ my $tdir = "${subdir}\\.tmp";
+ if ($dir ne $tdir) {
+ if (!rename $dir,$tdir) {
+ next if $! == ENOENT;
+ die "$dir: cannot rename to $tdir: $!\n";
+ }
+ }
+ system qw(rm -rf --), $tdir;
+ if (stat $tdir) {
+ die "$dir: problem deleting file(s), rm exited $?\n";
+ } elsif ($! != ENOENT) {
+ die "$tdir: cannot stat after deletion: $!\n";
+ }
+ }
+ };
+ if (length $@) {
+ chomp $@;
+ logm 'warning', "housekeeping: $subdir: cleanup prevented: $@";
+ } else {
+ unlink $lock or hkfail "remove $lock: $!";
+ }
+ };
}
- my $subdir = $lock; $subdir =~ s/\\.lock$//;
- my $ok = 1;
- foreach my $suffix (qw(tmp git)) {
- my $dir = "${subdir}\\.$suffix";
- my $errs;
- remove_tree($dir, { safe=>1, error=>\$errs });
- $ok = 0 if @$errs;
- foreach my $err (@$errs) {
- logm 'warning', "problem deleting: $err[0]: $err[1]";
- }
+ if (!lockfile \*LOCK, $lock, LOCK_EX|$mode_locknb) {
+ die $! unless $mode_locknb;
+ logm 'info', "housekeeping: subdirs $subdir: lock busy, skipping";
+ next;
}
- if ($ok) {
-
+ logm 'info', "housekeeping: subdirs $subdir: $mode_what";
+ $mode_action->();
+ }
+ open HS, ">", "Housekeeping.stamp" or hkfail "touch Housekeeping.stamp: $!";
+ close HS or hkfail "close Housekeeping.stamp: $!";
+ logm 'info', "housekeeping finished";
+}
sub housekeepingcheck ($$) {
my ($dofork, $force) = @_;
- or fail "open/create Housekeeping.lock: $!";
if (!$force) {
if (!lockfile \*HLOCK, "Housekeeping.lock", LOCK_EX|LOCK_NB) {
logm 'debug', "housekeeping lock taken, not running";
logm 'info', "housekeeping forced";
} elsif (!lstat "Housekeeping.stamp") {
$! == ENOENT or fail "lstat Housekeeping.stamp: $!";
- logm 'info', "housekeeping stamp missing, will run";
- } elsif (-M _ <= $housekeepingthreshdays) {
+ logm 'info', "housekeeping not done yet, will run";
+ } elsif (-M _ <= $housekeepingeverydays) {
logm 'debug', "housekeeping done recently";
close HLOCK;
return 0;
}
if ($dofork) {
my $child = fork;
- defined $child or fail "fork for housekeeping: $!";
+ defined $child or fail "fork: $!";
if (!$child) {
+ open STDERR, "|logger -p daemon.warning -t '$us(housekeeping)'"
+ or die "fork: logger $!";
housekeeping();
exit 0;
}
- return 1;
} else {
housekeeping();
- return 1;
}
+ close HLOCK;
+ return 1;
}
sub runcommand () {
- servinfo "servicing";
- exec qw(git-upload-pack --strict --timeout=1000 .)
+ servinfo "serving";
+
+ chdir $gitd or fail "chdir $gitd: $!";
+
+ exec qw(git-upload-pack --strict), "--timeout=$servetimeout", qw(.)
or fail "exec git-upload-pack: $!";
}
sub daemonservice () {
readcommand();
while (!clonefetch()) { }
+ housekeepingcheck(1,0);
runcommand();
}
-daemonservice();
+if ($housekeepingonly) {
+ housekeepingcheck(0, $housekeepingonly>=2);
+} else {
+ daemonservice();
+}