#!/usr/bin/perl -w # # git caching proxy # # usage: run it on some port, and then clone or fetch # "git://:/[ ]" # where is http:///... or git:///... # and is zero or more (whitespace-separated) of # [] will be ignored if not recognised # {} error if not recognised # options currently known: # fetch=must fail if the fetch/clone from upstream fails # fetch=no just use what is in the cache # fetch=try use what is in the cache if the fetch/clone fails # timeout= length of time to allow for fetch/clone use strict; use warnings; use POSIX; use Socket; use Sys::Syslog; use Fcntl qw(:flock SEEK_SET); our $us = 'git-cache-proxy'; our $log; # filehandle (ref), or "1" meaning syslog BEGIN { open STDERR, ">/dev/null" or exit 255; open TEMPERR, "+>", undef or exit 255; open STDERR, ">&TEMPERR" or exit 255; sub ntoa { my $sockaddr = shift; return ('(local)') unless defined $sockaddr; my ($port,$addr) = sockaddr_in $sockaddr; $addr = inet_ntoa $addr; return ("[$addr]:$port",$addr,$port); } our ($client,$client_addr,$client_port) = ntoa getpeername STDIN; our ($server,$server_addr,$server_port) = ntoa getsockname STDIN; sub ensurelog () { return if $log; openlog $us, qw(pid), 'daemon'; $log = 1; } sub logm ($$) { my ($pri, $msg) = @_; ensurelog(); my $mainmsg = sprintf "%s-%s: %s", $server, $client, $msg; if (ref $log) { my $wholemsg = sprintf("%s [%d] %s: %s\n", strftime("%Y-%m-%d %H:%M:%S Z", gmtime), $$, $pri, $mainmsg); print $log $wholemsg; } else { syslog $pri, $mainmsg; } } END { if ($?) { logm 'crit', "crashing ($?)"; } seek TEMPERR, 0, SEEK_SET; while () { chomp; logm 'crit', $_; } exit $?; } } our $fetchtimeout = 1800; our $maxfetchtimeout = 3600; sub fail ($) { my ($msg) = @_; logm 'err', $msg; exit 0; } sub gitfail ($) { my ($msg) = @_; close LOCK; alarm 60; logm 'notice', $msg; my $gitmsg = "ERR $us: $msg"; $gitmsg = substr($gitmsg,0,65535); # just in case printf "%04x%s", length($gitmsg)+4, $gitmsg; flush STDOUT; exit 0; } our $cachedir = '/var/cache/git-cache-proxy'; for (;;) { last unless @ARGV; last unless $ARGV[0] =~ m/^-/; $_ = shift @ARGV; for (;;) { last unless m/^-./; if (s/^-L(.*)$//) { my $logfile = $_; open STDERR, ">>", $logfile or fail "open $logfile: $!"; $log = \*STDERR; } elsif (s/^-d(.*)$//) { $cachedir = $1; } elsif (s/^--(maxfetchtimeout|fetchtimeout)=(\d+)$//) { ${ $::{$1} } = $2; } else { fail "bad usage: unknown option `$_'"; } } } !@ARGV or fail "bad usage: no non-option arguments permitted"; chdir $cachedir or fail "chdir $cachedir: $!"; our ($service,$specpath,$spechost); $SIG{ALRM} = sub { fail "timeout" }; alarm 30; sub xread { my $length = shift; my $buffer = ""; while ($length > length $buffer) { my $ret = sysread STDIN, $buffer, $length, length $buffer; fail "Expected $length bytes, got ".length $buffer if defined $ret and $ret == 0; fail "read: $!" if not defined $ret and $! != EINTR and $! != EAGAIN; } return $buffer; } my $hex_len = xread 4; fail "Bad hex in packet length" unless $hex_len =~ m|^[0-9a-fA-F]{4}$|; my $line = xread -4 + hex $hex_len; unless (($service,$specpath,$spechost) = $line =~ m|^(git-[a-z-]+) /*([!-~ ]+)\0host=([!-~]+)\0$|) { $line =~ s|[^ -~]+| |g; gitfail "unknown/unsupported instruction `$line'" } alarm 0; $service eq 'git-upload-pack' or gitfail "unknown/unsupported service `$service'"; my $fetch = 2; # 0:don't; 1:try; 2:force my $url = $specpath; while ($url =~ s#\s+(\[)([^][{}]+)\]$## || $url =~ s#\s+(\{)([^][{}]+)\}$##) { $_ = $2; my $must = $1 eq '{'; if (m/^fetch=try$/) { $fetch = 1; } elsif (m/^fetch=no$/) { $fetch = 0; } elsif (m/^fetch=must$/) { $fetch = 2; # the default } elsif (m/^timeout=(\d+)$/) { $fetchtimeout = $1 <= $maxfetchtimeout ? $1 : $maxfetchtimeout; } elsif ($must) { gitfail "unknown/unsupported option `$_'"; } } $url =~ m{^(?:https?|git)://[-.0-9a-z]+/} or gitfail "unknown/unsupported url scheme or format `$url'"; our $subdir = $url; $subdir =~ s|\\|\\\\|g; $subdir =~ s|,|\\,|g; $subdir =~ s|/|,|g; logm 'info', "$specpath locking"; my $tmpd= "$subdir\\.tmp"; my $gitd= "$subdir\\.git"; my $lock = "$subdir\\.lock"; for (;;) { open LOCK, "+>", $lock or fail "open/create $lock: $!"; flock LOCK, LOCK_EX or fail "lock exclusive $lock: $!"; my $exists = stat $gitd; $exists or $!==ENOENT or fail "stat $gitd: $!"; our $fetchfail = ''; if ($fetch) { our @cmd; if (!$exists) { system qw(rm -rf --), $tmpd; @cmd = (qw(git clone -q --mirror), $url, $tmpd); logm 'info', "$specpath cloning @cmd"; } else { @cmd = (qw(git remote update --prune)); logm 'info', "$specpath fetching @cmd"; } my $cmd = "@cmd[0..1]"; my $child = open FETCHERR, "-|"; defined $child or fail "fork: $!"; if (!$child) { if ($exists) { chdir $gitd or fail "chdir $gitd: $!"; } setpgrp or fail "setpgrp: $!"; open STDERR, ">&STDOUT" or fail "redirect stderr: $!"; exec @cmd or fail "exec $cmd[0]: $!"; } my $fetcherr = ''; my $timedout = 0; { local $SIG{ALRM} = sub { logm 'info', "$specpath fetch/clone timeout"; $timedout=1; kill 9, -$child; }; logm 'info', "timeout=$fetchtimeout"; alarm($fetchtimeout); $!=0; { local $/=undef; $fetcherr = ; } !FETCHERR->error or fail "read pipe from fetch/clone: $!"; alarm(10); } kill -9, $child or fail "kill fetch/clone: $!"; $!=0; $?=0; if (!close FETCHERR) { fail "reap fetch/clone: $!" if $!; my $fetchfail = !($? & 255) ? "$cmd died with error exit code ".($? >> 8) : $? != 9 ? "$cmd died due to fatal signa, status $?" : $timedout ? "$cmd timed out (${fetchtimeout}s)" : "$cmd died due to unexpected SIGKILL"; if (length $fetcherr) { $fetchfail .= "\n$fetcherr"; $fetchfail =~ s/\n$//; $fetchfail =~ s{\n}{ // }g; } if ($fetch >= 2) { gitfail $fetchfail; } else { logm 'info', "$specpath fetch/clone failed: $fetchfail"; } } if (!$exists) { rename $tmpd, $gitd or fail "rename fresh $tmpd to $gitd: $!"; $exists = 1; } } else { $fetchfail = 'not attempted'; } if (!$exists) { gitfail "no cached data, and not cloned: $fetchfail"; } logm 'info', "$specpath sharing"; flock LOCK, LOCK_UN or fail "unlock $lock: $!"; flock LOCK, LOCK_SH or fail "lock shared $lock: $!"; # actually, just relocking as shared would have the same semantics # but it's best to be explicit if (chdir $gitd) { last; } $!==ENOENT or fail "chdir $gitd: $!"; # Well, err, someone must have taken the lock in between # and garbage collected it. How annoying. } logm 'info', "$specpath servicing"; exec qw(git-upload-pack --strict --timeout=1000 .) or fail "exec git-upload-pack: $!";