X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?a=blobdiff_plain;f=hippotat;h=6aa3121c7ad68a72367930015817a9d91955e460;hb=e13eca8e1facf849a7825b815adc72ef142b7ca8;hp=fc0a8b66f2c39b99470562d2ad2b5bddd0843e2b;hpb=6b92614170718685ed8e92275282895f1e417ee9;p=hippotat.git diff --git a/hippotat b/hippotat index fc0a8b6..6aa3121 100755 --- a/hippotat +++ b/hippotat @@ -5,35 +5,36 @@ # # Copyright 2017 Ian Jackson # -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version, with the "CAF Login -# Exception" as published by Ian Jackson (version 2, or at your option -# any later version) as an Additional Permission. +# GPLv3+ # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. # -# You should have received a copy of the GNU Affero General Public -# License and the CAF Login Exception along with this program, in the -# file AGPLv3+CAFv2. If not, email Ian Jackson -# . - +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program, in the file GPLv3. If not, +# see . +#@ import sys; sys.path.append('@PYBUILD_INSTALL_DIR@') from hippotatlib import * import twisted.web import twisted.web.client +import urllib.parse import io class GeneralResponseConsumer(twisted.internet.protocol.Protocol): - def __init__(self, cl, req, desc): + def __init__(self, cl, req, resp, desc): self._cl = cl self._req = req + self._resp = resp self._desc = desc def _log(self, dflag, msg, **kwargs): @@ -42,9 +43,17 @@ class GeneralResponseConsumer(twisted.internet.protocol.Protocol): def connectionMade(self): self._log(DBG.HTTP_CTRL, 'connectionMade') + def connectionLostOK(self, reason): + return (reason.check(twisted.web.client.ResponseDone) or + reason.check(twisted.web.client.PotentialDataLoss)) + # twisted.web.client.PotentialDataLoss is an entirely daft + # exception. It will occur every time if the origin server does + # not provide a Content-Length. (hippotatd does, of course, but + # the HTTP transaction might be proxied.) + class ResponseConsumer(GeneralResponseConsumer): - def __init__(self, cl, req): - super().__init__(cl, req, 'RC') + def __init__(self, cl, req, resp): + super().__init__(cl, req, resp, 'RC') ssddesc = '[%s] %s' % (id(req), self._desc) self._ssd = SlipStreamDecoder(ssddesc, partial(queue_inbound, cl.ipif)) self._log(DBG.HTTP_CTRL, '__init__') @@ -57,9 +66,10 @@ class ResponseConsumer(GeneralResponseConsumer): self._handleexception() def connectionLost(self, reason): - self._log(DBG.HTTP_CTRL, 'connectionLost ' + str(reason)) - if not reason.check(twisted.web.client.ResponseDone): - self.latefailure() + reason_msg = 'connectionLost ' + str(reason) + self._log(DBG.HTTP_CTRL, reason_msg) + if not self.connectionLostOK(reason): + self._latefailure(reason_msg) return try: self._log(DBG.HTTP, 'ResponseDone') @@ -78,8 +88,7 @@ class ResponseConsumer(GeneralResponseConsumer): class ErrorResponseConsumer(GeneralResponseConsumer): def __init__(self, cl, req, resp): - super().__init__(cl, req, 'ERROR-RC') - self._resp = resp + super().__init__(cl, req, resp, 'ERROR-RC') self._m = b'' try: self._phrase = resp.phrase.decode('utf-8') @@ -96,7 +105,7 @@ class ErrorResponseConsumer(GeneralResponseConsumer): mbody = self._m.decode('utf-8') except Exception: mbody = repr(self._m) - if not reason.check(twisted.web.client.ResponseDone): + if not self.connectionLostOK(reason): mbody += ' || ' + str(reason) self._cl.req_err(self._req, "FAILED %d %s | %s" @@ -139,7 +148,7 @@ class Client(): 'req_ok %d %s %s' % (resp.code, repr(resp.phrase), str(resp)), idof=req) if resp.code == 200: - rc = ResponseConsumer(cl, req) + rc = ResponseConsumer(cl, req, resp) else: rc = ErrorResponseConsumer(cl, req, resp) @@ -151,9 +160,11 @@ class Client(): # later, by ResponsConsumer or ErrorResponsConsumer try: cl.log(DBG.HTTP_CTRL, 'req_err ' + str(err), idof=req) + cl.running_reported = False if isinstance(err, twisted.python.failure.Failure): err = err.getTraceback() - print('[%#x] %s' % (id(req), err), file=sys.stderr) + print('%s[%#x] %s' % (cl.desc, id(req), err.strip('\n').replace('\n',' / ')), + file=sys.stderr) if not isinstance(cl.outstanding[req], int): raise RuntimeError('[%#x] previously %s' % (id(req), cl.outstanding[req])) @@ -185,13 +196,15 @@ class Client(): d = mime_translate(d) + token = authtoken_make(cl.c.secret) + crlf = b'\r\n' lf = b'\n' mime = (b'--b' + crlf + b'Content-Type: text/plain; charset="utf-8"' + crlf + b'Content-Disposition: form-data; name="m"' + crlf + crlf + str(cl.c.client) .encode('ascii') + crlf + - cl.c.password + crlf + + token + crlf + str(cl.c.target_requests_outstanding) .encode('ascii') + crlf + str(cl.c.http_timeout) .encode('ascii') + crlf + @@ -211,8 +224,7 @@ class Client(): cl.log(DBG.HTTP_FULL, 'requesting: ' + str(mime)) hh = { 'User-Agent': ['hippotat'], - 'Content-Type': ['multipart/form-data; boundary="b"'], - 'Content-Length': [str(len(mime))] } + 'Content-Type': ['multipart/form-data; boundary="b"'] } bytesreader = io.BytesIO(mime) producer = twisted.web.client.FileBodyProducer(bytesreader) @@ -234,7 +246,18 @@ class Client(): clients = [ ] -def process_cfg(putative_servers, putative_clients): +def encode_url(urlstr): + # Oh, this is a disaster. We're given a URL as a `str', but the underlying + # machinery insists on having `bytes'. Assume we've been given a sensible + # URL, with escaping in all of the necessary places, except that it may + # contain non-ASCII characters: then encode as UTF-8 and squash the top- + # bit-set bytes down to percent escapes. + # + # This conses like it's going out of fashion, but it gets the job done. + return b''.join(bytes([b]) if b < 128 else '%%%02X' % b + for b in urlstr.encode('utf-8')) + +def process_cfg(_opts, putative_servers, putative_clients): global clients for ss in putative_servers.values(): @@ -244,6 +267,8 @@ def process_cfg(putative_servers, putative_clients): sections = cfg_process_client_common(c,ss,cs,ci) if not sections: continue + log_debug_config('processing client [%s %s]' % (ss, cs)) + def srch(getter,key): return cfg_search(getter,key,sections) c.http_timeout += srch(cfg.getint, 'http_timeout_grace') @@ -253,7 +278,10 @@ def process_cfg(putative_servers, putative_clients): c.max_queue_time = srch(cfg.getint, 'max_queue_time') c.vroutes = srch(cfg.get, 'vroutes') - try: c.url = srch(cfg.get,'url') + try: c.ifname = srch(cfg_get_raw, 'ifname_client') + except NoOptionError: pass + + try: c.url = encode_url(srch(cfg.get,'url')) except NoOptionError: cfg_process_saddrs(c, ss) c.url = c.saddrs[0].url()