X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/hippotat/blobdiff_plain/4224dc197474ed0a2535cdb100193fffaa697d0e..refs/heads/mdw/fixes:/hippotat diff --git a/hippotat b/hippotat index 997996d..6aa3121 100755 --- a/hippotat +++ b/hippotat @@ -26,13 +26,15 @@ from hippotatlib import * import twisted.web import twisted.web.client +import urllib.parse import io class GeneralResponseConsumer(twisted.internet.protocol.Protocol): - def __init__(self, cl, req, desc): + def __init__(self, cl, req, resp, desc): self._cl = cl self._req = req + self._resp = resp self._desc = desc def _log(self, dflag, msg, **kwargs): @@ -41,9 +43,17 @@ class GeneralResponseConsumer(twisted.internet.protocol.Protocol): def connectionMade(self): self._log(DBG.HTTP_CTRL, 'connectionMade') + def connectionLostOK(self, reason): + return (reason.check(twisted.web.client.ResponseDone) or + reason.check(twisted.web.client.PotentialDataLoss)) + # twisted.web.client.PotentialDataLoss is an entirely daft + # exception. It will occur every time if the origin server does + # not provide a Content-Length. (hippotatd does, of course, but + # the HTTP transaction might be proxied.) + class ResponseConsumer(GeneralResponseConsumer): def __init__(self, cl, req, resp): - super().__init__(cl, req, 'RC') + super().__init__(cl, req, resp, 'RC') ssddesc = '[%s] %s' % (id(req), self._desc) self._ssd = SlipStreamDecoder(ssddesc, partial(queue_inbound, cl.ipif)) self._log(DBG.HTTP_CTRL, '__init__') @@ -58,7 +68,7 @@ class ResponseConsumer(GeneralResponseConsumer): def connectionLost(self, reason): reason_msg = 'connectionLost ' + str(reason) self._log(DBG.HTTP_CTRL, reason_msg) - if not reason.check(twisted.web.client.ResponseDone): + if not self.connectionLostOK(reason): self._latefailure(reason_msg) return try: @@ -78,8 +88,7 @@ class ResponseConsumer(GeneralResponseConsumer): class ErrorResponseConsumer(GeneralResponseConsumer): def __init__(self, cl, req, resp): - super().__init__(cl, req, 'ERROR-RC') - self._resp = resp + super().__init__(cl, req, resp, 'ERROR-RC') self._m = b'' try: self._phrase = resp.phrase.decode('utf-8') @@ -96,7 +105,7 @@ class ErrorResponseConsumer(GeneralResponseConsumer): mbody = self._m.decode('utf-8') except Exception: mbody = repr(self._m) - if not reason.check(twisted.web.client.ResponseDone): + if not self.connectionLostOK(reason): mbody += ' || ' + str(reason) self._cl.req_err(self._req, "FAILED %d %s | %s" @@ -215,8 +224,7 @@ class Client(): cl.log(DBG.HTTP_FULL, 'requesting: ' + str(mime)) hh = { 'User-Agent': ['hippotat'], - 'Content-Type': ['multipart/form-data; boundary="b"'], - 'Content-Length': [str(len(mime))] } + 'Content-Type': ['multipart/form-data; boundary="b"'] } bytesreader = io.BytesIO(mime) producer = twisted.web.client.FileBodyProducer(bytesreader) @@ -238,6 +246,17 @@ class Client(): clients = [ ] +def encode_url(urlstr): + # Oh, this is a disaster. We're given a URL as a `str', but the underlying + # machinery insists on having `bytes'. Assume we've been given a sensible + # URL, with escaping in all of the necessary places, except that it may + # contain non-ASCII characters: then encode as UTF-8 and squash the top- + # bit-set bytes down to percent escapes. + # + # This conses like it's going out of fashion, but it gets the job done. + return b''.join(bytes([b]) if b < 128 else '%%%02X' % b + for b in urlstr.encode('utf-8')) + def process_cfg(_opts, putative_servers, putative_clients): global clients @@ -262,7 +281,7 @@ def process_cfg(_opts, putative_servers, putative_clients): try: c.ifname = srch(cfg_get_raw, 'ifname_client') except NoOptionError: pass - try: c.url = srch(cfg.get,'url') + try: c.url = encode_url(srch(cfg.get,'url')) except NoOptionError: cfg_process_saddrs(c, ss) c.url = c.saddrs[0].url()