chiark / gitweb /
cgi.py: Escape quote signs in `htmlescape' and `html_quotify'.
[chopwood] / cgi.py
... / ...
CommitLineData
1### -*-python-*-
2###
3### CGI machinery
4###
5### (c) 2013 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
10### This file is part of Chopwood: a password-changing service.
11###
12### Chopwood is free software; you can redistribute it and/or modify
13### it under the terms of the GNU Affero General Public License as
14### published by the Free Software Foundation; either version 3 of the
15### License, or (at your option) any later version.
16###
17### Chopwood is distributed in the hope that it will be useful,
18### but WITHOUT ANY WARRANTY; without even the implied warranty of
19### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20### GNU Affero General Public License for more details.
21###
22### You should have received a copy of the GNU Affero General Public
23### License along with Chopwood; if not, see
24### <http://www.gnu.org/licenses/>.
25
26from __future__ import with_statement
27
28import contextlib as CTX
29import os as OS; ENV = OS.environ
30import re as RX
31import sys as SYS
32import time as T
33import traceback as TB
34
35from auto import HOME, PACKAGE, VERSION
36import config as CONF; CFG = CONF.CFG
37import format as F
38import output as O; OUT = O.OUT; PRINT = O.PRINT
39import subcommand as SC
40import util as U
41
42###--------------------------------------------------------------------------
43### Configuration tweaks.
44
45_script_name = ENV.get('SCRIPT_NAME', '/cgi-bin/chpwd')
46
47CONF.DEFAULTS.update(
48
49 ## The URL of this program, when it's run through CGI.
50 SCRIPT_NAME = _script_name,
51
52 ## A (maybe relative) URL for static content. By default this comes from
53 ## the main script, but we hope that user agents cache it.
54 STATIC = _script_name + '/static')
55
56###--------------------------------------------------------------------------
57### Escaping and encoding.
58
59## Some handy regular expressions.
60R_URLESC = RX.compile('%([0-9a-fA-F]{2})')
61R_URLBAD = RX.compile('[^-\\w,.!]')
62R_HTMLBAD = RX.compile('[&<>\'"]')
63
64def urldecode(s):
65 """Decode a single form-url-encoded string S."""
66 return R_URLESC.sub(lambda m: chr(int(m.group(1), 16)),
67 s.replace('+', ' '))
68 return s
69
70def urlencode(s):
71 """Encode a single string S using form-url-encoding."""
72 return R_URLBAD.sub(lambda m: '%%%02x' % ord(m.group(0)), s)
73
74def htmlescape(s):
75 """Escape a literal string S so that HTML doesn't misinterpret it."""
76 return R_HTMLBAD.sub(lambda m: '&#x%02x;' % ord(m.group(0)), s)
77
78## Some standard character sequences, and HTML entity names for prettier
79## versions.
80html_quotify = U.StringSubst({
81 "<": '&lt;',
82 ">": '&gt;',
83 "&": '&amp;',
84 "`": '&lsquo;',
85 "'": '&rsquo;',
86 '"': '&quot;',
87 "``": '&ldquo;',
88 "''": '&rdquo;',
89 "--": '&ndash;',
90 "---": '&mdash;'
91})
92
93###--------------------------------------------------------------------------
94### Output machinery.
95
96class HTTPOutput (O.FileOutput):
97 """
98 Output driver providing an automatic HTTP header.
99
100 The `headerp' attribute is true if we've written a header. The `header'
101 method will print a custom header if this is wanted.
102 """
103
104 def __init__(me, *args, **kw):
105 """Constructor: initialize `headerp' flag."""
106 super(HTTPOutput, me).__init__(*args, **kw)
107 me.headerp = False
108
109 def write(me, msg):
110 """Output protocol: print a header if we've not written one already."""
111 if not me.headerp: me.header('text/plain')
112 super(HTTPOutput, me).write(msg)
113
114 def header(me, content_type = 'text/plain', **kw):
115 """
116 Print a header, if none has yet been printed.
117
118 Keyword arguments can be passed to emit HTTP headers: see `http_header'
119 for the formatting rules.
120 """
121 if me.headerp: return
122 me.headerp = True
123 for h in O.http_headers(content_type = content_type, **kw):
124 me.writeln(h)
125 me.writeln('')
126
127def cookie(name, value, **kw):
128 """
129 Return a HTTP `Set-Cookie' header.
130
131 The NAME and VALUE give the name and value of the cookie; both are
132 form-url-encoded to prevent misinterpretation (fortunately, `cgiparse'
133 knows to undo this transformation). The KW are other attributes to
134 declare: the names are forced to lower-case and underscores `_' are
135 replaced by hyphens `-'; a `True' value is assumed to indicate that the
136 attribute is boolean, and omitted.
137 """
138 attr = {}
139 for k, v in kw.iteritems():
140 k = '-'.join(i.lower() for i in k.split('_'))
141 attr[k] = v
142 try: maxage = int(attr['max-age'])
143 except KeyError: pass
144 else:
145 attr['expires'] = T.strftime('%a, %d %b %Y %H:%M:%S GMT',
146 T.gmtime(U.NOW + maxage))
147 return '; '.join(['%s=%s' % (urlencode(name), urlencode(value))] +
148 [v is not True and '%s=%s' % (k, v) or k
149 for k, v in attr.iteritems() if v])
150
151def action(*v, **kw):
152 """
153 Build a URL invoking this script.
154
155 The positional arguments V are used to construct a path which is appended
156 to the (deduced or configured) script name (and presumably will be read
157 back as `PATH_INFO'). The keyword arguments are (form-url-encoded and)
158 appended as a query string, if present.
159 """
160 url = '/'.join([CFG.SCRIPT_NAME] + list(v))
161 if kw:
162 url += '?' + ';'.join('%s=%s' % (urlencode(k), urlencode(kw[k]))
163 for k in sorted(kw))
164 return htmlescape(url)
165
166def static(name):
167 """Build a URL for the static file NAME."""
168 return htmlescape(CFG.STATIC + '/' + name)
169
170def redirect(where, **kw):
171 """
172 Write a complete redirection to some other URL.
173 """
174 OUT.header(content_type = 'text/html',
175 status = 302, location = where,
176 **kw)
177 PRINT("""\
178<html>
179<head><title>No, sorry, it's moved again.</title></head>
180<body><p>I'm <a href="%s">over here</a> now.<body>
181</html>""" % htmlescape(where))
182
183###--------------------------------------------------------------------------
184### Templates.
185
186## Where we find our templates.
187TMPLDIR = HOME
188
189## Keyword arguments for templates.
190STATE = U.Fluid()
191STATE.kw = {}
192
193## Set some basic keyword arguments.
194@CONF.hook
195def set_template_keywords():
196 STATE.kw.update(
197 package = PACKAGE,
198 version = VERSION,
199 script = CFG.SCRIPT_NAME,
200 static = CFG.STATIC)
201
202class TemplateFinder (object):
203 """
204 A magical fake dictionary whose keys are templates.
205 """
206 def __init__(me, dir):
207 me._cache = {}
208 me._dir = dir
209 def __getitem__(me, key):
210 try: return me._cache[key]
211 except KeyError: pass
212 with open(OS.path.join(me._dir, key)) as f: tmpl = f.read()
213 me._cache[key] = tmpl
214 return tmpl
215TMPL = TemplateFinder(TMPLDIR)
216
217@CTX.contextmanager
218def tmplkw(**kw):
219 """
220 Context manager: execute the body with additional keyword arguments
221 """
222 d = dict()
223 d.update(STATE.kw)
224 d.update(kw)
225 with STATE.bind(kw = d): yield
226
227FORMATOPS = {}
228
229class FormatHTML (F.SimpleFormatOperation):
230 """
231 ~H: escape output suitable for inclusion in HTML.
232
233 With `:', instead apply form-urlencoding.
234 """
235 def _convert(me, arg):
236 if me.colonp: return html_quotify(arg)
237 else: return htmlescape(arg)
238FORMATOPS['H'] = FormatHTML
239
240def format_tmpl(control, **kw):
241 with F.COMPILE.bind(opmaps = [FORMATOPS, F.BASEOPS]):
242 with tmplkw(**kw):
243 F.format(OUT, control, **STATE.kw)
244
245def page(template, header = {}, title = 'Chopwood', **kw):
246 header = dict(header, content_type = 'text/html')
247 OUT.header(**header)
248 format_tmpl(TMPL['wrapper.fhtml'],
249 title = title, payload = TMPL[template], **kw)
250
251###--------------------------------------------------------------------------
252### Error reporting.
253
254@CTX.contextmanager
255def cgi_errors(hook = None):
256 """
257 Context manager: report errors in the body as useful HTML.
258
259 If HOOK is given, then call it before reporting errors. It may have set up
260 useful stuff.
261 """
262 try:
263 yield None
264 except Exception, e:
265 if hook: hook()
266 if isinstance(e, U.ExpectedError) and not OUT.headerp:
267 page('error.fhtml',
268 header = dict(status = e.code),
269 title = 'Chopwood: error', error = e)
270 else:
271 exty, exval, extb = SYS.exc_info()
272 with tmplkw(exception = TB.format_exception_only(exty, exval),
273 traceback = TB.extract_tb(extb),
274 PARAM = sorted(PARAM),
275 COOKIE = sorted(COOKIE.items()),
276 PATH = PATH,
277 ENV = sorted(ENV.items())):
278 if OUT.headerp:
279 format_tmpl(TMPL['exception.fhtml'], toplevel = False)
280 else:
281 page('exception.fhtml',
282 header = dict(status = 500),
283 title = 'Chopwood: internal error',
284 toplevel = True)
285
286###--------------------------------------------------------------------------
287### CGI input.
288
289## Lots of global variables to be filled in by `cgiparse'.
290COOKIE = {}
291SPECIAL = {}
292PARAM = []
293PARAMDICT = {}
294PATH = []
295SSLP = False
296
297## Regular expressions for splitting apart query and cookie strings.
298R_QSPLIT = RX.compile('[;&]')
299R_CSPLIT = RX.compile(';')
300
301def split_keyvalue(string, delim, default):
302 """
303 Split a STRING, and generate the resulting KEY=VALUE pairs.
304
305 The string is split at DELIM; the components are parsed into KEY[=VALUE]
306 pairs. The KEYs and VALUEs are stripped of leading and trailing
307 whitespace, and form-url-decoded. If the VALUE is omitted, then the
308 DEFAULT is used unless the DEFAULT is `None' in which case the component is
309 simply ignored.
310 """
311 for kv in delim.split(string):
312 try:
313 k, v = kv.split('=', 1)
314 except ValueError:
315 if default is None: continue
316 else: k, v = kv, default
317 k, v = k.strip(), v.strip()
318 if not k: continue
319 k, v = urldecode(k), urldecode(v)
320 yield k, v
321
322def cgiparse():
323 """
324 Process all of the various exciting CGI environment variables.
325
326 We read environment variables and populate some tables left in global
327 variables: it's all rather old-school. Variables set are as follows.
328
329 `COOKIE'
330 A dictionary mapping cookie names to the values provided by the user
331 agent.
332
333 `SPECIAL'
334 A dictionary holding some special query parameters which are of
335 interest at a global level, and should not be passed to a subcommand
336 handler. No new entries will be added to this dictionary, though
337 values will be modified to reflect the query parameters discovered.
338 Conventionally, such parameters have names beginning with `%'.
339
340 `PARAM'
341 The query parameters as a list of (KEY, VALUE) pairs. Special
342 parameters are omitted.
343
344 `PARAMDICT'
345 The query parameters as a dictionary. Special parameters, and
346 parameters which appear more than once, are omitted.
347
348 `PATH'
349 The trailing `PATH_INFO' path, split at `/' markers, with any
350 trailing empty component removed.
351
352 `SSLP'
353 True if the client connection is carried over SSL or TLS.
354 """
355
356 global SSLP
357
358 def getenv(var):
359 try: return ENV[var]
360 except KeyError: raise U.ExpectedError, (500, "No `%s' supplied" % var)
361
362 ## Yes, we want the request method.
363 method = getenv('REQUEST_METHOD')
364
365 ## Acquire the query string.
366 if method == 'GET':
367 q = getenv('QUERY_STRING')
368
369 elif method == 'POST':
370
371 ## We must read the query string from stdin.
372 n = getenv('CONTENT_LENGTH')
373 if not n.isdigit():
374 raise U.ExpectedError, (500, "Invalid CONTENT_LENGTH")
375 n = int(n, 10)
376 if getenv('CONTENT_TYPE') != 'application/x-www-form-urlencoded':
377 raise U.ExpectedError, (500, "Unexpected content type `%s'" % ct)
378 q = SYS.stdin.read(n)
379 if len(q) != n:
380 raise U.ExpectedError, (500, "Failed to read correct length")
381
382 else:
383 raise U.ExpectedError, (500, "Unexpected request method `%s'" % method)
384
385 ## Populate the `SPECIAL', `PARAM' and `PARAMDICT' tables.
386 seen = set()
387 for k, v in split_keyvalue(q, R_QSPLIT, 't'):
388 if k in SPECIAL:
389 SPECIAL[k] = v
390 else:
391 PARAM.append((k, v))
392 if k in seen:
393 del PARAMDICT[k]
394 else:
395 PARAMDICT[k] = v
396 seen.add(k)
397
398 ## Parse out the cookies, if any.
399 try: c = ENV['HTTP_COOKIE']
400 except KeyError: pass
401 else:
402 for k, v in split_keyvalue(c, R_CSPLIT, None): COOKIE[k] = v
403
404 ## Set up the `PATH'.
405 try: p = ENV['PATH_INFO']
406 except KeyError: pass
407 else:
408 pp = p.lstrip('/').split('/')
409 if pp and not pp[-1]: pp.pop()
410 PATH[:] = pp
411
412 ## Check the crypto for the connection.
413 if ENV.get('SSL_PROTOCOL'):
414 SSLP = True
415
416###--------------------------------------------------------------------------
417### CGI subcommands.
418
419class Subcommand (SC.Subcommand):
420 """
421 A CGI subcommand object.
422
423 As for `subcommand.Subcommand', but with additional protocol for processing
424 CGI parameters.
425 """
426
427 def cgi(me, param, path):
428 """
429 Invoke the subcommand given a collection of CGI parameters.
430
431 PARAM is a list of (KEY, VALUE) pairs from the CGI query. The CGI query
432 parameters are checked against the subcommand's parameters (making sure
433 that mandatory parameters are supplied, that any switches are given
434 boolean values, and that only the `rest' parameter, if any, is
435 duplicated).
436
437 PATH is a list of trailing path components. They are used to satisfy the
438 `rest' parameter if there is one and there are no query parameters which
439 satisfy the `rest' parameter; otherwise, an `ExpectedError' is raised if
440 the list of path elements is non-empty.
441 """
442
443 ## We're going to make a pass over the supplied parameters, and we'll
444 ## check them off against the formal parameters as we go; so we'll need
445 ## to be able to look them up. We'll also keep track of the ones we've
446 ## seen so that we can make sure that all of the mandatory parameters
447 ## were actually supplied.
448 ##
449 ## To that end: `want' is a dictionary mapping parameter names to
450 ## functions which will do something useful with the value; `seen' is a
451 ## set of the parameters which have been assigned; and `kw' is going to
452 ## be the keyword-argument dictionary we pass to the handler function.
453 want = {}
454 kw = {}
455
456 def set_value(k, v):
457 """Set a simple value: we shouldn't see multiple values."""
458 if k in kw:
459 raise U.ExpectedError, (400, "Repeated parameter `%s'" % k)
460 kw[k] = v
461 def set_bool(k, v):
462 """Set a simple boolean value: for switches."""
463 set_value(k, v.lower() in ['true', 't', 'yes', 'y'])
464 def set_list(k, v):
465 """Append the value to a list: for the `rest' parameter."""
466 kw.setdefault(k, []).append(v)
467
468 ## Set up the `want' map.
469 for o in me.opts:
470 if o.argname: want[o.name] = set_value
471 else: want[o.name] = set_bool
472 for p in me.params: want[p.name] = set_value
473 for p in me.oparams: want[p.name] = set_value
474 if me.rparam: want[me.rparam.name] = set_list
475
476 ## Work through the list of supplied parameters.
477 for k, v in param:
478 try:
479 f = want[k]
480 except KeyError:
481 if v:
482 raise U.ExpectedError, (400, "Unexpected parameter `%s'" % k)
483 else:
484 f(k, v)
485
486 ## Deal with a path, if there is one.
487 if path:
488 if me.rparam and me.rparam.name not in kw:
489 kw[me.rparam.name] = path
490 else:
491 raise U.ExpectedError, (404, "Superfluous path elements")
492
493 ## Make sure we saw all of the mandatory parameters.
494 for p in me.params:
495 if p.name not in kw:
496 raise U.ExpectedError, (400, "Missing parameter `%s'" % p.name)
497
498 ## Invoke the subcommand.
499 me.func(**kw)
500
501def subcommand(name, contexts, desc, cls = Subcommand, *args, **kw):
502 """Decorator for defining CGI subcommands."""
503 return SC.subcommand(name, contexts, desc, cls = cls, *args, **kw)
504
505###----- That's all, folks --------------------------------------------------