#!/usr/bin/perl # # git caching proxy # # usage: run it on some port, and then clone or fetch # git://:/OPTIONS # where is http:///... or git:///... # and OPTIONS is zero or more of # [] will be ignored if not recognised # {} error if not recognised # options currently known: # fetch=must fail if the fetch/clone from upstream fails # fetch=no just use what is in the cache # fetch=try use what is in the cache if the fetch/clone fails # timeout= length of time to allow for fetch/clone use strict; use warnings; use POSIX; use Socket; use Sys::Syslog; our $us = 'git-cache-proxy'; our $log; # filehandle (ref), or "1" meaning syslog our $fetchtimeout = 1800; our $maxfetchtimeout = 3600; sub ntoa { my $sockaddr = shift; return ('(local)') unless defined $sockaddr; my ($port,$addr) = sockaddr_in $sockaddr; $addr = inet_ntoa $addr; return ("[$addr]:$port",$addr,$port); } our ($client,$client_addr,$client_port) = ntoa getpeername STDIN; our ($server,$server_addr,$server_port) = ntoa getsockname STDIN; sub ensurelog () { return if $log; openlog $us, qw(pid), 'daemon'; $log = 1; } sub log ($) { my ($pri, $msg) = @_; ensurelog(); my $mainmsg = sprintf "%s-%s: %s", $server, $client, $msg; if (ref $log) { my $wholemsg = sprintf("%s [%d] %s: %s\n", strftime("%Y-%m-%d %H:%M:%S Z", gmtime), $$, $pri, $mainmsg); print $log $wholemsg; } else { syslog $pri, $mainmsg; } } sub fail ($) { my ($msg) = @_; log 'error', $msg; exit 1; } sub gitfail ($) { my ($msg) = @_; close LOCK; alarm 60; log 'notice', $msg; my $gitmsg = "ERR $us: $msg"; $gitmsg = substr($gitmsg,0,65535); # just in case printf "%04x%s", length($gitmsg)+4, $gitmsg; flush STDOUT; exit 1; } our $cachedir = '/var/cache/git-cache-proxy'; for (;;) { last unless @ARGV; last unless $ARGV[0] =~ m/^-/; $_ = shift @ARGV; for (;;) { last unless m/^-./; if (s/^-L(.*)$//) { my $logfile = $_; open STDERR, ">>", $logfile or fail "open $logfile: $!"; $log = \*STDERR; } elsif (s/^-d(.*)$//) { $cachedir = $1; } elsif (s/^--(maxfetchtimeout|fetchtimeout)=(\d+)$//) { ${ $::{$1} } = $2; } else { fail "bad usage: unknown option `$_'"; } } } !@ARGV or fail "bad usage: no non-option arguments permitted"; chdir $cachedir or fail "chdir $cachedir: $!"; our ($service,$specpath,$spechost); $SIG{ALRM} = sub { fail "timeout" }; alarm 30; sub xread { my $length = shift; my $buffer = ""; while ($length > length $buffer) { my $ret = sysread STDIN, $buffer, $length, length $buffer; fail "Expected $length bytes, got ".length $buffer if defined $ret and $ret == 0; fail "read: $!" if not defined $ret and $! != EINTR and $! != EAGAIN; } return $buffer; } my $hex_len = xread 4; fail "Bad hex in packet length" unless $hex_len =~ m|^[0-9a-fA-F]{4}$|; my $line = xread -4 + hex $hex_len; unless (($service,$specpath,$spechost) = $line =~ m|^(git-[a-z-]+) /*([!-~]+)\0host=([!-~]+)\0$|) { $line =~ s|[^ -~]+| |g; fail "Could not parse \"$line\"" } alarm 0; $service eq 'git-upload-pack' or gitfail "unknown/unsupported service `$service'"; my $fetch = 2; # 0:don't; 1:try; 2:force my $url = $specpath; while ($url =~ s#^(\[)([^][{}])+\]## || $url =~ s#^(\{)([^][{}])+\}##) { $_ = $2; my $must = $1 eq '{'; if (m/^fetch=try$/) { $fetch = 1; } elsif (m/^fetch=no$/) { $fetch = 0; } elsif (m/^fetch=must$/) { $fetch = 2; # the default } elsif (m/^timeout=(\d+)$/) { $fetchtimeout = $1 <= $maxfetchtimeout ? $1 : $maxfetchtimeout; } elsif ($must) { gitfail "unknown/unsupported option `$_'"; } } $url =~ m{^(?:https?|git)://[-.0-9a-z]+/} or gitfail "unknown/unsupported url scheme or format `$url'"; our $subdir = $url; $subdir =~ s|\\|\\\\|g; $subdir =~ s|,|\\,|g; $subdir =~ s|/|,|g; log 'info', "$specpath locking"; my $tmpd= "$subdir\\.tmp"; my $gitd= "$subdir\\.git"; my $lock = "$subdir\\.lock"; for (;;) { open LOCK, "+>", $lock or fail "open/create $lock: $!"; flock LOCK, LOCK_EX or fail "lock exclusive $lock: $!"; my $exists = stat $gitd; $exists or $!==ENOENT or FAIL "stat $gitd: $!"; our $fetchfail = ''; if ($fetch) { our @cmd; if (!$exists) { system qw(rm -rf --), $tmpd; @cmd = qw(git clone -q --mirror), $url; log 'info', "$specpath cloning"; } else { @cmd = qw(git remote update --prune), $url; log 'info', "$specpath fetching"; } my $child = open FETCHERR, "-|"; defined $child or fail "fork: $!"; if (!$child) { if ($exists) { chdir $gitd or fail "chdir $gitd: $!"; } setpgrp or fail "setpgrp: $!"; open STDERR, ">&STDOUT" or fail "redirect stderr: $!"; exec @cmd or fail "exec $cmd[0]: $!"; } my $timedout = 0; { local $SIG{ALARM} = sub { $timedout=1; kill 9, -$child; }; alarm($fetchtimeout); my $fetcherr = ''; $!=0; { local $/=undef; $fetcherr = ; } !FETCHERR->error or fail "read pipe from fetch/clone: $!"; alarm(10); } kill -9, $child or fail "kill fetch/clone: $!"; $!=0; $?=0; if (!close FETCHERR) { fail "reap fetch/clone: $!" if $!; my $fetchfail = !($? & 255) ? "$cmd[0] died with error exit code ".($? >> 8) : $? != 9 ? "$cmd[0] died due to fatal signa, status $?" : $timedout ? "$cmd[0] timed out (${fetchtimeout}s)" : "$cmd[0] died due to unexpected SIGKILL"; if (length $fetcherr) { $fetchfail .= "\n$fetcherr"; $fetchfail =~ s{\n}{ // }g; } if ($fetch >= 2) { gitfail $fetchfail; } else { log 'info', "$specpatch fetch failed: $fetchfail"; } } if (!$exists) { rename $tmpd, $gitd or fail "rename fresh $tmpd to $gitd: $!"; $exists = 1; } } else { $fetchfail = 'not attempted'; } if (!$exists) { gitfail "no cached data, and not cloned: $fetchfail"; } flock LOCK, LOCK_UN for fail "unlock $lock: $!"; flock LOCK, LOCK_SH for fail "lock shared $lock: $!"; # actually, just relocking as shared would have the same semantics # but it's best to be explicit if (chdir $gitd) { last; } $!==ENOENT or fail "chdir $gitd: $!"; # Well, err, someone must have taken the lock in between # and garbage collected it. How annoying. } exec qw(git-upload-pack --strict --timeout=1000 .) or fail "exec git-upload-pack: $!";