#!/usr/bin/perl -w # # git caching proxy # usage: run it on some port, and then clone or fetch # "git://:/[ ]" # where is http:///... or git:///... # and is zero or more (whitespace-separated) of # [] will be ignored if not recognised # {} error if not recognised # options currently known: # fetch=must fail if the fetch/clone from upstream fails # fetch=no just use what is in the cache # fetch=try use what is in the cache if the fetch/clone fails # timeout= length of time to allow for fetch/clone # example inetd.conf line: # 9419 stream tcp nowait git-cache /usr/bin/git-cache-proxy git-cache-proxy # you'll need to # adduser git-cache # mkdir /var/cache/git-cache-proxy # chown git-cache /var/cache/git-cache-proxy # git-cache-proxy # Copyright 2010 Tony Finch # Copyright 2013 Ian Jackson # # git-cache-proxy is free software; you can redistribute it and/or # modify them under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 3, or (at # your option) any later version. # # git-cache-proxy is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, consult the Free Software Foundation's # website at www.fsf.org, or the GNU Project website at www.gnu.org. # # (Some code taken from userv-utils's git-daemon.in and git-service.in # which were written by Tony Finch and subsequently # heavily modified by Ian Jackson # and were released under CC0 1.0. The whole program is now GPLv3+.) use strict; use warnings; use POSIX; use Socket; use Sys::Syslog; use Fcntl qw(:flock SEEK_SET); use File::Path qw(remove_tree); our $us = 'git-cache-proxy'; our $debug = 0; our $housekeepingeverydays = 1; our $treeexpiredays = 21; our $fetchtimeout = 1800; our $maxfetchtimeout = 3600; our $cachedir = '/var/cache/git-cache-proxy'; our $housekeepingonly = 0; #---------- error handling and logging ---------- # This is a bit fiddly, because we want to catch errors sent to stderr # and dump them to syslog if we can, but only if we are running as an # inetd service. our $log; # filehandle (ref), or "1" meaning syslog sub ntoa { my $sockaddr = shift; return ('(local)') unless defined $sockaddr; my ($port,$addr) = sockaddr_in $sockaddr; $addr = inet_ntoa $addr; return ("[$addr]:$port",$addr,$port); } our ($client) = ntoa getpeername STDIN; our ($server) = ntoa getsockname STDIN; sub ensurelog () { return if $log; openlog $us, qw(pid), 'daemon'; $log = 1; } sub logm ($$) { my ($pri, $msg) = @_; return if $pri eq 'debug' && !$debug; if ($client eq '(local)') { print STDERR "$us: $pri: $msg\n" or die $!; return; } ensurelog(); my $mainmsg = sprintf "%s-%s: %s", $server, $client, $msg; if (ref $log) { my $wholemsg = sprintf("%s [%d] %s: %s\n", strftime("%Y-%m-%d %H:%M:%S Z", gmtime), $$, $pri eq 'err' ? 'error' : $pri, $mainmsg); print $log $wholemsg; } else { syslog $pri, "%s", "$pri $mainmsg"; } } if ($client ne '(local)') { open STDERR, ">/dev/null" or exit 255; open TEMPERR, "+>", undef or exit 255; open STDERR, ">&TEMPERR" or exit 255; } END { if ($client ne '(local)') { if ($?) { logm 'crit', "crashing ($?)"; } seek TEMPERR, 0, SEEK_SET; while () { chomp; logm 'crit', $_; } } exit $?; } sub fail ($) { my ($msg) = @_; logm 'err', $msg; exit 0; } sub gitfail ($) { my ($msg) = @_; close LOCK; alarm 60; logm 'notice', $msg; my $gitmsg = "ERR $us: $msg"; $gitmsg = substr($gitmsg,0,65535); # just in case printf "%04x%s", length($gitmsg)+4, $gitmsg; flush STDOUT; exit 0; } #---------- argument parsing ---------- for (;;) { last unless @ARGV; last unless $ARGV[0] =~ m/^-/; $_ = shift @ARGV; for (;;) { last unless m/^-./; if (s/^-H/-/) { $housekeepingonly++; } elsif (s/^-D/-/) { $debug++; } elsif (s/^-L(.*)$//) { my $logfile = $_; open STDERR, ">>", $logfile or fail "open $logfile: $!"; $log = \*STDERR; } elsif (s/^-d(.*)$//) { $cachedir = $1; } elsif (s/^--( max-fetch-timeout | fetch-timeout | tree-expire-days | housekeeping-interval-days )=(\d+)$//x) { my $vn = $1; $vn =~ y/-//d; die $vn unless defined ${ $::{$vn} }; ${ $::{$vn} } = $2; } else { fail "bad usage: unknown option `$_'"; } } } !@ARGV or fail "bad usage: no non-option arguments permitted"; #---------- utility functions ---------- sub lockfile ($$$) { my ($fh, $fn, $flockmode) = @_; my $what = $fn.(($flockmode & ~LOCK_NB) == LOCK_SH ? " (shared)" : ""); for (;;) { close $fh; open $fh, '+>', $fn or fail "open/create $fn for lock: $!"; logm 'debug', "lock $what: acquiring"; if (!flock $fh, $flockmode) { if ($flockmode & LOCK_NB && $! == EWOULDBLOCK) { return 0; # ok then } fail "lock $what: $!"; } stat $fh or fail "stat opened $fn: $!"; my $fh_ino = ((stat _)[1]); if (!stat $fn) { $! == ENOENT or fail "stat $fn: $!"; next; } my $fn_ino = ((stat _)[1]); if ($fn_ino == $fh_ino) { logm 'debug', "lock $what: acquired"; return 1; } logm 'debug', "lock $what: deleted, need to loop again"; # oh dear } } sub xread { my $length = shift; my $buffer = ""; while ($length > length $buffer) { my $ret = sysread STDIN, $buffer, $length, length $buffer; fail "expected $length bytes, got ".length $buffer if defined $ret and $ret == 0; fail "read: $!" if not defined $ret and $! != EINTR and $! != EAGAIN; } return $buffer; } #---------- main program ---------- chdir $cachedir or fail "chdir $cachedir: $!"; our ($service,$specpath,$spechost,$subdir); our ($tmpd,$gitd,$lock); our ($fetch,$url); sub servinfo ($) { my ($msg) = @_; logm 'info', "service `$specpath': $msg"; } sub readcommand () { $SIG{ALRM} = sub { fail "timeout" }; alarm 30; my $hex_len = xread 4; fail "Bad hex in packet length" unless $hex_len =~ m|^[0-9a-fA-F]{4}$|; my $line = xread -4 + hex $hex_len; unless (($service,$specpath,$spechost) = $line =~ m|^(git-[a-z-]+) /*([!-~ ]+)\0host=([!-~]+)\0$|) { $line =~ s|[^ -~]+| |g; gitfail "unknown/unsupported instruction `$line'" } alarm 0; $service eq 'git-upload-pack' or gitfail "unknown/unsupported service `$service'"; $fetch = 2; # 0:don't; 1:try; 2:force $url = $specpath; while ($url =~ s#\s+(\[)([^][{}]+)\]$## || $url =~ s#\s+(\{)([^][{}]+)\}$##) { $_ = $2; my $must = $1 eq '{'; if (m/^fetch=try$/) { $fetch = 1; } elsif (m/^fetch=no$/) { $fetch = 0; } elsif (m/^fetch=must$/) { $fetch = 2; # the default } elsif (m/^timeout=(\d+)$/ && $1 >= 1) { $fetchtimeout = $1 <= $maxfetchtimeout ? $1 : $maxfetchtimeout; } elsif ($must) { gitfail "unknown/unsupported option `$_'"; } } $url =~ m{^(?:https?|git)://[-.0-9a-z]+/} or gitfail "unknown/unsupported url scheme or format `$url'"; $subdir = $url; $subdir =~ s|\\|\\\\|g; $subdir =~ s|,|\\,|g; $subdir =~ s|/|,|g; $tmpd= "$subdir\\.tmp"; $gitd= "$subdir\\.git"; $lock = "$subdir\\.lock"; servinfo "locking"; } sub clonefetch () { lockfile \*LOCK, $lock, LOCK_EX; my $exists = lstat $gitd; $exists or $!==ENOENT or fail "lstat $gitd: $!"; our $fetchfail = ''; if ($fetch) { our @cmd; if (!$exists) { system qw(rm -rf --), $tmpd; @cmd = (qw(git clone -q --mirror), $url, $tmpd); servinfo "cloning"; } else { @cmd = (qw(git remote update --prune)); servinfo "fetching"; } my $cmd = "@cmd[0..1]"; my $child = open FETCHERR, "-|"; defined $child or fail "fork: $!"; if (!$child) { if ($exists) { chdir $gitd or fail "chdir $gitd: $!"; } setpgrp or fail "setpgrp: $!"; open STDERR, ">&STDOUT" or fail "redirect stderr: $!"; exec @cmd or fail "exec $cmd[0]: $!"; } my $fetcherr = ''; my $timedout = 0; { local $SIG{ALRM} = sub { servinfo "fetch/clone timeout"; $timedout=1; kill 9, -$child; }; alarm($fetchtimeout); $!=0; { local $/=undef; $fetcherr = ; } !FETCHERR->error or fail "read pipe from fetch/clone: $!"; alarm(10); } kill -9, $child or fail "kill fetch/clone: $!"; $!=0; $?=0; if (!close FETCHERR) { fail "reap fetch/clone: $!" if $!; my $fetchfail = !($? & 255) ? "$cmd died with error exit code ".($? >> 8) : $? != 9 ? "$cmd died due to fatal signa, status $?" : $timedout ? "$cmd timed out (${fetchtimeout}s)" : "$cmd died due to unexpected SIGKILL"; if (length $fetcherr) { $fetchfail .= "\n$fetcherr"; $fetchfail =~ s/\n$//; $fetchfail =~ s{\n}{ // }g; } if ($fetch >= 2) { gitfail $fetchfail; } else { servinfo "fetch/clone failed: $fetchfail"; } } if (!$exists) { rename $tmpd, $gitd or fail "rename fresh $tmpd to $gitd: $!"; $exists = 1; } } else { $fetchfail = 'not attempted'; } if (!$exists) { gitfail "no cached data, and not cloned: $fetchfail"; } servinfo "sharing"; lockfile \*LOCK, $lock, LOCK_SH; # NB releases and relocks if (stat $gitd) { return 1; } $!==ENOENT or fail "stat $gitd: $!"; # Well, err, someone must have taken the lock in between # and garbage collected it. How annoying. return 0; } sub hkfail ($) { my ($msg) = @_; fail "housekeeping: $msg"; } sub housekeeping () { logm 'info', "housekeeping started"; foreach $lock (<[a-z]*\\.lock>) { my $subdir = $lock; $subdir =~ s/\\.lock$//; if (!lstat $lock) { $! == ENOENT or hkfail "$lock: lstat: $!"; next; } if (-M _ <= $treeexpiredays) { logm 'debug', "housekeeping: subdirs $subdir: touched recently"; next; } if (!lockfile \*LOCK, $lock, LOCK_EX|LOCK_NB) { logm 'info', "housekeeping: subdirs $subdir: lock busy, skipping"; next; } logm 'info', "housekeeping: subdirs $subdir: cleaning"; my $ok = 1; foreach my $suffix (qw(tmp git)) { my $dir = "${subdir}\\.$suffix"; my $errs; remove_tree($dir, { safe=>1, error=>\$errs }); if (stat $dir) { $ok = 0; logm 'warning', "housekeeping: $dir: problems with". "deletion prevent cleanup:"; foreach my $err (@$errs) { logm 'info', "problem deleting: $err->[0]: $err->[1]"; } } } if ($ok) { unlink $lock or hkfail "remove $lock: $!"; } } open HS, ">", "Housekeeping.stamp" or hkfail "touch Housekeeping.stamp: $!"; close HS or hkfail "close Housekeeping.stamp: $!"; logm 'info', "housekeeping finished"; } sub housekeepingcheck ($$) { my ($dofork, $force) = @_; if (!$force) { if (!lockfile \*HLOCK, "Housekeeping.lock", LOCK_EX|LOCK_NB) { logm 'debug', "housekeeping lock taken, not running"; close HLOCK; return 0; } } if ($force) { logm 'info', "housekeeping forced"; } elsif (!lstat "Housekeeping.stamp") { $! == ENOENT or fail "lstat Housekeeping.stamp: $!"; logm 'info', "housekeeping not done yet, will run"; } elsif (-M _ <= $housekeepingeverydays) { logm 'debug', "housekeeping done recently"; close HLOCK; return 0; } if ($dofork) { my $child = fork; defined $child or hkfail "fork: $!"; if (!$child) { housekeeping(); exit 0; } } else { housekeeping(); } close HLOCK; return 1; } sub runcommand () { servinfo "serving"; chdir $gitd or fail "chdir $gitd: $!"; exec qw(git-upload-pack --strict --timeout=1000 .) or fail "exec git-upload-pack: $!"; } sub daemonservice () { readcommand(); while (!clonefetch()) { } housekeepingcheck(1,0); runcommand(); } if ($housekeepingonly) { housekeepingcheck(0, $housekeepingonly>=2); } else { daemonservice(); }