X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?a=blobdiff_plain;f=hippotat;h=6aa3121c7ad68a72367930015817a9d91955e460;hb=e13eca8e1facf849a7825b815adc72ef142b7ca8;hp=fc0a8b66f2c39b99470562d2ad2b5bddd0843e2b;hpb=6b92614170718685ed8e92275282895f1e417ee9;p=hippotat.git

diff --git a/hippotat b/hippotat
index fc0a8b6..6aa3121 100755
--- a/hippotat
+++ b/hippotat
@@ -5,35 +5,36 @@
 #
 # Copyright 2017 Ian Jackson
 #
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version, with the "CAF Login
-# Exception" as published by Ian Jackson (version 2, or at your option
-# any later version) as an Additional Permission.
+# GPLv3+
 #
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
 #
-# You should have received a copy of the GNU Affero General Public
-# License and the CAF Login Exception along with this program, in the
-# file AGPLv3+CAFv2.  If not, email Ian Jackson
-# <ijackson@chiark.greenend.org.uk>.
-
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program, in the file GPLv3.  If not,
+#    see <http://www.gnu.org/licenses/>.
 
+#@ import sys; sys.path.append('@PYBUILD_INSTALL_DIR@')
 from hippotatlib import *
 
 import twisted.web
 import twisted.web.client
+import urllib.parse
 
 import io
 
 class GeneralResponseConsumer(twisted.internet.protocol.Protocol):
-  def __init__(self, cl, req, desc):
+  def __init__(self, cl, req, resp, desc):
     self._cl = cl
     self._req = req
+    self._resp = resp
     self._desc = desc
 
   def _log(self, dflag, msg, **kwargs):
@@ -42,9 +43,17 @@ class GeneralResponseConsumer(twisted.internet.protocol.Protocol):
   def connectionMade(self):
     self._log(DBG.HTTP_CTRL, 'connectionMade')
 
+  def connectionLostOK(self, reason):
+    return (reason.check(twisted.web.client.ResponseDone) or
+            reason.check(twisted.web.client.PotentialDataLoss))
+    # twisted.web.client.PotentialDataLoss is an entirely daft
+    # exception.  It will occur every time if the origin server does
+    # not provide a Content-Length.  (hippotatd does, of course, but
+    # the HTTP transaction might be proxied.)
+
 class ResponseConsumer(GeneralResponseConsumer):
-  def __init__(self, cl, req):
-    super().__init__(cl, req, 'RC')
+  def __init__(self, cl, req, resp):
+    super().__init__(cl, req, resp, 'RC')
     ssddesc = '[%s] %s' % (id(req), self._desc)
     self._ssd = SlipStreamDecoder(ssddesc, partial(queue_inbound, cl.ipif))
     self._log(DBG.HTTP_CTRL, '__init__')
@@ -57,9 +66,10 @@ class ResponseConsumer(GeneralResponseConsumer):
       self._handleexception()
 
   def connectionLost(self, reason):
-    self._log(DBG.HTTP_CTRL, 'connectionLost ' + str(reason))
-    if not reason.check(twisted.web.client.ResponseDone):
-      self.latefailure()
+    reason_msg = 'connectionLost ' + str(reason)
+    self._log(DBG.HTTP_CTRL, reason_msg)
+    if not self.connectionLostOK(reason):
+      self._latefailure(reason_msg)
       return
     try:
       self._log(DBG.HTTP, 'ResponseDone')
@@ -78,8 +88,7 @@ class ResponseConsumer(GeneralResponseConsumer):
 
 class ErrorResponseConsumer(GeneralResponseConsumer):
   def __init__(self, cl, req, resp):
-    super().__init__(cl, req, 'ERROR-RC')
-    self._resp = resp
+    super().__init__(cl, req, resp, 'ERROR-RC')
     self._m = b''
     try:
       self._phrase = resp.phrase.decode('utf-8')
@@ -96,7 +105,7 @@ class ErrorResponseConsumer(GeneralResponseConsumer):
       mbody = self._m.decode('utf-8')
     except Exception:
       mbody = repr(self._m)
-    if not reason.check(twisted.web.client.ResponseDone):
+    if not self.connectionLostOK(reason):
       mbody += ' || ' + str(reason)
     self._cl.req_err(self._req,
             "FAILED %d %s | %s"
@@ -139,7 +148,7 @@ class Client():
             'req_ok %d %s %s' % (resp.code, repr(resp.phrase), str(resp)),
             idof=req)
     if resp.code == 200:
-      rc = ResponseConsumer(cl, req)
+      rc = ResponseConsumer(cl, req, resp)
     else:
       rc = ErrorResponseConsumer(cl, req, resp)
 
@@ -151,9 +160,11 @@ class Client():
     # later, by ResponsConsumer or ErrorResponsConsumer
     try:
       cl.log(DBG.HTTP_CTRL, 'req_err ' + str(err), idof=req)
+      cl.running_reported = False
       if isinstance(err, twisted.python.failure.Failure):
         err = err.getTraceback()
-      print('[%#x] %s' % (id(req), err), file=sys.stderr)
+      print('%s[%#x] %s' % (cl.desc, id(req), err.strip('\n').replace('\n',' / ')),
+            file=sys.stderr)
       if not isinstance(cl.outstanding[req], int):
         raise RuntimeError('[%#x] previously %s' %
                            (id(req), cl.outstanding[req]))
@@ -185,13 +196,15 @@ class Client():
 
       d = mime_translate(d)
 
+      token = authtoken_make(cl.c.secret)
+
       crlf = b'\r\n'
       lf   =   b'\n'
       mime = (b'--b'                                        + crlf +
               b'Content-Type: text/plain; charset="utf-8"'  + crlf +
               b'Content-Disposition: form-data; name="m"'   + crlf + crlf +
               str(cl.c.client)            .encode('ascii')  + crlf +
-              cl.c.password                                 + crlf +
+              token                                         + crlf +
               str(cl.c.target_requests_outstanding)
                                           .encode('ascii')  + crlf +
               str(cl.c.http_timeout)      .encode('ascii')  + crlf +
@@ -211,8 +224,7 @@ class Client():
       cl.log(DBG.HTTP_FULL, 'requesting: ' + str(mime))
 
       hh = { 'User-Agent': ['hippotat'],
-             'Content-Type': ['multipart/form-data; boundary="b"'],
-             'Content-Length': [str(len(mime))] }
+             'Content-Type': ['multipart/form-data; boundary="b"'] }
 
       bytesreader = io.BytesIO(mime)
       producer = twisted.web.client.FileBodyProducer(bytesreader)
@@ -234,7 +246,18 @@ class Client():
 
 clients = [ ]
 
-def process_cfg(putative_servers, putative_clients):
+def encode_url(urlstr):
+  # Oh, this is a disaster.  We're given a URL as a `str', but the underlying
+  # machinery insists on having `bytes'.  Assume we've been given a sensible
+  # URL, with escaping in all of the necessary places, except that it may
+  # contain non-ASCII characters: then encode as UTF-8 and squash the top-
+  # bit-set bytes down to percent escapes.
+  #
+  # This conses like it's going out of fashion, but it gets the job done.
+  return b''.join(bytes([b]) if b < 128 else '%%%02X' % b
+                  for b in urlstr.encode('utf-8'))
+
+def process_cfg(_opts, putative_servers, putative_clients):
   global clients
 
   for ss in putative_servers.values():
@@ -244,6 +267,8 @@ def process_cfg(putative_servers, putative_clients):
       sections = cfg_process_client_common(c,ss,cs,ci)
       if not sections: continue
 
+      log_debug_config('processing client [%s %s]' % (ss, cs))
+
       def srch(getter,key): return cfg_search(getter,key,sections)
 
       c.http_timeout   += srch(cfg.getint, 'http_timeout_grace')
@@ -253,7 +278,10 @@ def process_cfg(putative_servers, putative_clients):
       c.max_queue_time  = srch(cfg.getint, 'max_queue_time')
       c.vroutes         = srch(cfg.get,    'vroutes')
 
-      try: c.url = srch(cfg.get,'url')
+      try: c.ifname     = srch(cfg_get_raw, 'ifname_client')
+      except NoOptionError: pass
+
+      try: c.url = encode_url(srch(cfg.get,'url'))
       except NoOptionError:
         cfg_process_saddrs(c, ss)
         c.url = c.saddrs[0].url()