chiark / gitweb /
Initial commit.
[chopwood] / cgi.py
CommitLineData
a2916c06
MW
1### -*-python-*-
2###
3### CGI machinery
4###
5### (c) 2013 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
10### This file is part of Chopwood: a password-changing service.
11###
12### Chopwood is free software; you can redistribute it and/or modify
13### it under the terms of the GNU Affero General Public License as
14### published by the Free Software Foundation; either version 3 of the
15### License, or (at your option) any later version.
16###
17### Chopwood is distributed in the hope that it will be useful,
18### but WITHOUT ANY WARRANTY; without even the implied warranty of
19### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20### GNU Affero General Public License for more details.
21###
22### You should have received a copy of the GNU Affero General Public
23### License along with Chopwood; if not, see
24### <http://www.gnu.org/licenses/>.
25
26from __future__ import with_statement
27
28import contextlib as CTX
29import os as OS; ENV = OS.environ
30import re as RX
31import sys as SYS
32import time as T
33import traceback as TB
34
35from auto import HOME, PACKAGE, VERSION
36import config as CONF; CFG = CONF.CFG
37import format as F
38import output as O; OUT = O.OUT; PRINT = O.PRINT
39import subcommand as SC
40import util as U
41
42###--------------------------------------------------------------------------
43### Configuration tweaks.
44
45_script_name = ENV.get('SCRIPT_NAME', '/cgi-bin/chpwd')
46
47CONF.DEFAULTS.update(
48
49 ## The URL of this program, when it's run through CGI.
50 SCRIPT_NAME = _script_name,
51
52 ## A (maybe relative) URL for static content. By default this comes from
53 ## the main script, but we hope that user agents cache it.
54 STATIC = _script_name + '/static')
55
56###--------------------------------------------------------------------------
57### Escaping and encoding.
58
59## Some handy regular expressions.
60R_URLESC = RX.compile('%([0-9a-fA-F]{2})')
61R_URLBAD = RX.compile('[^-\\w,.!]')
62R_HTMLBAD = RX.compile('[&<>]')
63
64def urldecode(s):
65 """Decode a single form-url-encoded string S."""
66 return R_URLESC.sub(lambda m: chr(int(m.group(1), 16)),
67 s.replace('+', ' '))
68 return s
69
70def urlencode(s):
71 """Encode a single string S using form-url-encoding."""
72 return R_URLBAD.sub(lambda m: '%%%02x' % ord(m.group(0)), s)
73
74def htmlescape(s):
75 """Escape a literal string S so that HTML doesn't misinterpret it."""
76 return R_HTMLBAD.sub(lambda m: '&#x%02x;' % ord(m.group(0)), s)
77
78## Some standard character sequences, and HTML entity names for prettier
79## versions.
80_quotify = U.StringSubst({
81 "`": '&lsquo;',
82 "'": '&rsquo;',
83 "``": '&ldquo;',
84 "''": '&rdquo;',
85 "--": '&ndash;',
86 "---": '&mdash;'
87})
88def html_quotify(s):
89 """Return a pretty HTML version of S."""
90 return _quotify(htmlescape(s))
91
92###--------------------------------------------------------------------------
93### Output machinery.
94
95class HTTPOutput (O.FileOutput):
96 """
97 Output driver providing an automatic HTTP header.
98
99 The `headerp' attribute is true if we've written a header. The `header'
100 method will print a custom header if this is wanted.
101 """
102
103 def __init__(me, *args, **kw):
104 """Constructor: initialize `headerp' flag."""
105 super(HTTPOutput, me).__init__(*args, **kw)
106 me.headerp = False
107
108 def write(me, msg):
109 """Output protocol: print a header if we've not written one already."""
110 if not me.headerp: me.header('text/plain')
111 super(HTTPOutput, me).write(msg)
112
113 def header(me, content_type = 'text/plain', **kw):
114 """
115 Print a header, if none has yet been printed.
116
117 Keyword arguments can be passed to emit HTTP headers: see `http_header'
118 for the formatting rules.
119 """
120 if me.headerp: return
121 me.headerp = True
122 for h in O.http_headers(content_type = content_type, **kw):
123 me.writeln(h)
124 me.writeln('')
125
126def cookie(name, value, **kw):
127 """
128 Return a HTTP `Set-Cookie' header.
129
130 The NAME and VALUE give the name and value of the cookie; both are
131 form-url-encoded to prevent misinterpretation (fortunately, `cgiparse'
132 knows to undo this transformation). The KW are other attributes to
133 declare: the names are forced to lower-case and underscores `_' are
134 replaced by hyphens `-'; a `True' value is assumed to indicate that the
135 attribute is boolean, and omitted.
136 """
137 attr = {}
138 for k, v in kw.iteritems():
139 k = '-'.join(i.lower() for i in k.split('_'))
140 attr[k] = v
141 try: maxage = int(attr['max-age'])
142 except KeyError: pass
143 else:
144 attr['expires'] = T.strftime('%a, %d %b %Y %H:%M:%S GMT',
145 T.gmtime(U.NOW + maxage))
146 return '; '.join(['%s=%s' % (urlencode(name), urlencode(value))] +
147 [v is not True and '%s=%s' % (k, v) or k
148 for k, v in attr.iteritems()])
149
150def action(*v, **kw):
151 """
152 Build a URL invoking this script.
153
154 The positional arguments V are used to construct a path which is appended
155 to the (deduced or configured) script name (and presumably will be read
156 back as `PATH_INFO'). The keyword arguments are (form-url-encoded and)
157 appended as a query string, if present.
158 """
159 url = '/'.join([CFG.SCRIPT_NAME] + list(v))
160 if kw:
161 url += '?' + ';'.join('%s=%s' % (urlencode(k), urlencode(kw[k]))
162 for k in sorted(kw))
163 return htmlescape(url)
164
165def static(name):
166 """Build a URL for the static file NAME."""
167 return htmlescape(CFG.STATIC + '/' + name)
168
169@CTX.contextmanager
170def html(title, **kw):
171 """
172 Context manager for HTML output.
173
174 Keyword arguments are output as HTTP headers (if no header has been written
175 yet). A `<head>' element is written, and a `<body>' opened, before the
176 context body is executed; the elements are closed off properly at the end.
177 """
178
179 kw = dict(kw, content_type = 'text/html')
180 OUT.header(**kw)
181
182 ## Write the HTML header.
183 PRINT("""\
184<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01//EN'
185 'http://www.w3c.org/TR/html4/strict.dtd'>
186<html>
187<head>
188 <title>%(title)s</title>
189 <link rel=stylesheet type='text/css' media=screen href='%(style)s'>
190 <meta http-equiv='Content-Script-Type' content='text/javascript'>
191 <script type='text/javascript' src='%(script)s'></script>
192</head>""" % dict(title = html_quotify(title),
193 style = static('chpwd.css'),
194 script = static('chpwd.js')))
195
196 ## Write the body.
197 PRINT('<body>')
198 yield None
199 PRINT('''\
200
201<div class=credits>
202 <a href="%(about)s">Chopwood</a>, version %(version)s:
203 copyright &copy; 2012 Mark Wooding
204</div>
205
206</body>
207</html>''' % dict(about = static('about.html'),
208 version = VERSION))
209
210def redirect(where, **kw):
211 """
212 Write a complete redirection to some other URL.
213 """
214 OUT.header(content_type = 'text/html',
215 status = 302, location = where,
216 **kw)
217 PRINT("""\
218<html>
219<head><title>No, sorry, it's moved again.</title></head>
220<body><p>I'm <a href="%s">over here</a> now.<body>
221</html>""" % htmlescape(where))
222
223###--------------------------------------------------------------------------
224### Templates.
225
226## Where we find our templates.
227TMPLDIR = HOME
228
229## Keyword arguments for templates.
230STATE = U.Fluid()
231STATE.kw = {}
232
233## Set some basic keyword arguments.
234@CONF.hook
235def set_template_keywords():
236 STATE.kw.update(
237 package = PACKAGE,
238 version = VERSION,
239 script = CFG.SCRIPT_NAME,
240 static = CFG.STATIC)
241
242class TemplateFinder (object):
243 """
244 A magical fake dictionary whose keys are templates.
245 """
246 def __init__(me, dir):
247 me._cache = {}
248 me._dir = dir
249 def __getitem__(me, key):
250 try: return me._cache[key]
251 except KeyError: pass
252 with open(OS.path.join(me._dir, key)) as f: tmpl = f.read()
253 me._cache[key] = tmpl
254 return tmpl
255TMPL = TemplateFinder(TMPLDIR)
256
257@CTX.contextmanager
258def tmplkw(**kw):
259 """
260 Context manager: execute the body with additional keyword arguments
261 """
262 d = dict()
263 d.update(STATE.kw)
264 d.update(kw)
265 with STATE.bind(kw = d): yield
266
267FORMATOPS = {}
268
269class FormatHTML (F.SimpleFormatOperation):
270 """
271 ~H: escape output suitable for inclusion in HTML.
272
273 With `:', instead apply form-urlencoding.
274 """
275 def _convert(me, arg):
276 if me.colonp: return html_quotify(arg)
277 else: return htmlescape(arg)
278FORMATOPS['H'] = FormatHTML
279
280def format_tmpl(control, **kw):
281 with F.COMPILE.bind(opmaps = [FORMATOPS, F.BASEOPS]):
282 with tmplkw(**kw):
283 F.format(OUT, control, **STATE.kw)
284
285def page(template, header = {}, title = 'Chopwood', **kw):
286 header = dict(header, content_type = 'text/html')
287 OUT.header(**header)
288 format_tmpl(TMPL['wrapper.fhtml'],
289 title = title, payload = TMPL[template], **kw)
290
291###--------------------------------------------------------------------------
292### Error reporting.
293
294def cgi_error_guts():
295 """
296 Report an exception while we're acting as a CGI, together with lots of
297 information about our state.
298
299 Our caller has, probably at great expense, arranged that we can format lots
300 of text.
301 """
302
303 ## Grab the exception information.
304 exty, exval, extb = SYS.exc_info()
305
306 ## Print the exception itself.
307 PRINT("""\
308<h2>Exception</h2>
309<pre>%s</pre>""" % html_quotify(
310 '\n'.join(TB.format_exception_only(exty, exval))))
311
312 ## Format a traceback so we can find out what has gone wrong.
313 PRINT("""\
314<h2>Traceback</h2>
315<ol>""")
316 for file, line, func, text in TB.extract_tb(extb, 20):
317 PRINT("<li><b>%s</b>:%d (<b>%s</b>)" % (
318 htmlescape(file), line, htmlescape(func)))
319 if text is not None:
320 PRINT("<br><tt>%s</tt>" % htmlescape(text))
321 PRINT("</ol>")
322
323 ## Format various useful tables.
324 def fmt_dict(d):
325 fmt_kvlist(d.iteritems())
326 def fmt_kvlist(l):
327 for k, v in sorted(l):
328 PRINT("<tr><th align=right valign=top>%s<td><tt>%s</tt>" % (
329 htmlescape(k), htmlescape(v)))
330 def fmt_list(l):
331 for i in l:
332 PRINT("<tr><tt>%s</tt>" % htmlescape(i))
333
334 PRINT("""\
335<h2>Parameters</h2>""")
336 for what, thing, how in [('Query', PARAM, fmt_kvlist),
337 ('Cookies', COOKIE, fmt_dict),
338 ('Path', PATH, fmt_list),
339 ('Environment', ENV, fmt_dict)]:
340 PRINT("<h3>%s</h3>\n<table>" % what)
341 how(thing)
342 PRINT("</table>")
343
344def cgi_error():
345 """
346 Report an exception while in CGI mode.
347
348 If we've not produced a header yet, then we can do that, and produce a
349 status code and everything; otherwise we'll have to make do with a small
350 piece of the page.
351 """
352 if OUT.headerp:
353 PRINT("<div class=exception>")
354 cgi_error_guts()
355 PRINT("</div>\n</body></html>")
356 else:
357 with html("chpwd internal error", status = 500):
358 PRINT("<h1>chpwd internal error</h1>")
359 cgi_error_guts()
360 SYS.exit(1)
361
362@CTX.contextmanager
363def cgi_errors(hook = None):
364 """
365 Context manager: report errors in the body as useful HTML.
366
367 If HOOK is given, then call it before reporting errors. It may have set up
368 useful stuff.
369 """
370 try:
371 yield None
372 except Exception, e:
373 if hook: hook()
374 if isinstance(e, U.ExpectedError) and not OUT.headerp:
375 page('error.fhtml',
376 headers = dict(status = e.code),
377 title = 'Chopwood: error', error = e)
378 else:
379 exty, exval, extb = SYS.exc_info()
380 with tmplkw(exception = TB.format_exception_only(exty, exval),
381 traceback = TB.extract_tb(extb),
382 PARAM = sorted(PARAM),
383 COOKIE = sorted(COOKIE.items()),
384 PATH = PATH,
385 ENV = sorted(ENV.items())):
386 if OUT.headerp:
387 format_tmpl(TMPL['exception.fhtml'], toplevel = False)
388 else:
389 page('exception.fhtml',
390 headers = dict(status = 500),
391 title = 'Chopwood: internal error',
392 toplevel = True)
393
394###--------------------------------------------------------------------------
395### CGI input.
396
397## Lots of global variables to be filled in by `cgiparse'.
398COOKIE = {}
399SPECIAL = {}
400PARAM = []
401PARAMDICT = {}
402PATH = []
403
404## Regular expressions for splitting apart query and cookie strings.
405R_QSPLIT = RX.compile('[;&]')
406R_CSPLIT = RX.compile(';')
407
408def split_keyvalue(string, delim, default):
409 """
410 Split a STRING, and generate the resulting KEY=VALUE pairs.
411
412 The string is split at DELIM; the components are parsed into KEY[=VALUE]
413 pairs. The KEYs and VALUEs are stripped of leading and trailing
414 whitespace, and form-url-decoded. If the VALUE is omitted, then the
415 DEFAULT is used unless the DEFAULT is `None' in which case the component is
416 simply ignored.
417 """
418 for kv in delim.split(string):
419 try:
420 k, v = kv.split('=', 1)
421 except ValueError:
422 if default is None: continue
423 else: k, v = kv, default
424 k, v = k.strip(), v.strip()
425 if not k: continue
426 k, v = urldecode(k), urldecode(v)
427 yield k, v
428
429def cgiparse():
430 """
431 Process all of the various exciting CGI environment variables.
432
433 We read environment variables and populate some tables left in global
434 variables: it's all rather old-school. Variables set are as follows.
435
436 `COOKIE'
437 A dictionary mapping cookie names to the values provided by the user
438 agent.
439
440 `SPECIAL'
441 A dictionary holding some special query parameters which are of
442 interest at a global level, and should not be passed to a subcommand
443 handler. No new entries will be added to this dictionary, though
444 values will be modified to reflect the query parameters discovered.
445 Conventionally, such parameters have names beginning with `%'.
446
447 `PARAM'
448 The query parameters as a list of (KEY, VALUE) pairs. Special
449 parameters are omitted.
450
451 `PARAMDICT'
452 The query parameters as a dictionary. Special parameters, and
453 parameters which appear more than once, are omitted.
454
455 `PATH'
456 The trailing `PATH_INFO' path, split at `/' markers, with any
457 trailing empty component removed.
458 """
459
460 def getenv(var):
461 try: return ENV[var]
462 except KeyError: raise U.ExpectedError, (500, "No `%s' supplied" % var)
463
464 ## Yes, we want the request method.
465 method = getenv('REQUEST_METHOD')
466
467 ## Acquire the query string.
468 if method == 'GET':
469 q = getenv('QUERY_STRING')
470
471 elif method == 'POST':
472
473 ## We must read the query string from stdin.
474 n = getenv('CONTENT_LENGTH')
475 if not n.isdigit():
476 raise U.ExpectedError, (500, "Invalid CONTENT_LENGTH")
477 n = int(n, 10)
478 if getenv('CONTENT_TYPE') != 'application/x-www-form-urlencoded':
479 raise U.ExpectedError, (500, "Unexpected content type `%s'" % ct)
480 q = SYS.stdin.read(n)
481 if len(q) != n:
482 raise U.ExpectedError, (500, "Failed to read correct length")
483
484 else:
485 raise U.ExpectedError, (500, "Unexpected request method `%s'" % method)
486
487 ## Populate the `SPECIAL', `PARAM' and `PARAMDICT' tables.
488 seen = set()
489 for k, v in split_keyvalue(q, R_QSPLIT, 't'):
490 if k in SPECIAL:
491 SPECIAL[k] = v
492 else:
493 PARAM.append((k, v))
494 if k in seen:
495 del PARAMDICT[k]
496 else:
497 PARAMDICT[k] = v
498 seen.add(k)
499
500 ## Parse out the cookies, if any.
501 try: c = ENV['HTTP_COOKIE']
502 except KeyError: pass
503 else:
504 for k, v in split_keyvalue(c, R_CSPLIT, None): COOKIE[k] = v
505
506 ## Set up the `PATH'.
507 try: p = ENV['PATH_INFO']
508 except KeyError: pass
509 else:
510 pp = p.lstrip('/').split('/')
511 if pp and not pp[-1]: pp.pop()
512 PATH[:] = pp
513
514###--------------------------------------------------------------------------
515### CGI subcommands.
516
517class Subcommand (SC.Subcommand):
518 """
519 A CGI subcommand object.
520
521 As for `subcommand.Subcommand', but with additional protocol for processing
522 CGI parameters.
523 """
524
525 def cgi(me, param, path):
526 """
527 Invoke the subcommand given a collection of CGI parameters.
528
529 PARAM is a list of (KEY, VALUE) pairs from the CGI query. The CGI query
530 parameters are checked against the subcommand's parameters (making sure
531 that mandatory parameters are supplied, that any switches are given
532 boolean values, and that only the `rest' parameter, if any, is
533 duplicated).
534
535 PATH is a list of trailing path components. They are used to satisfy the
536 `rest' parameter if there is one and there are no query parameters which
537 satisfy the `rest' parameter; otherwise, an `ExpectedError' is raised if
538 the list of path elements is non-empty.
539 """
540
541 ## We're going to make a pass over the supplied parameters, and we'll
542 ## check them off against the formal parameters as we go; so we'll need
543 ## to be able to look them up. We'll also keep track of the ones we've
544 ## seen so that we can make sure that all of the mandatory parameters
545 ## were actually supplied.
546 ##
547 ## To that end: `want' is a dictionary mapping parameter names to
548 ## functions which will do something useful with the value; `seen' is a
549 ## set of the parameters which have been assigned; and `kw' is going to
550 ## be the keyword-argument dictionary we pass to the handler function.
551 want = {}
552 kw = {}
553
554 def set_value(k, v):
555 """Set a simple value: we shouldn't see multiple values."""
556 if k in kw:
557 raise U.ExpectedError, (400, "Repeated parameter `%s'" % k)
558 kw[k] = v
559 def set_bool(k, v):
560 """Set a simple boolean value: for switches."""
561 set_value(k, v.lower() in ['true', 't', 'yes', 'y'])
562 def set_list(k, v):
563 """Append the value to a list: for the `rest' parameter."""
564 kw.setdefault(k, []).append(v)
565
566 ## Set up the `want' map.
567 for o in me.opts:
568 if o.argname: want[o.name] = set_value
569 else: want[o.name] = set_bool
570 for p in me.params: want[p.name] = set_value
571 for p in me.oparams: want[p.name] = set_value
572 if me.rparam: want[me.rparam.name] = set_list
573
574 ## Work through the list of supplied parameters.
575 for k, v in param:
576 try:
577 f = want[k]
578 except KeyError:
579 if v:
580 raise U.ExpectedError, (400, "Unexpected parameter `%s'" % k)
581 else:
582 f(k, v)
583
584 ## Deal with a path, if there is one.
585 if path:
586 if me.rparam and me.rparam.name not in kw:
587 kw[me.rparam.name] = path
588 else:
589 raise U.ExpectedError, (404, "Superfluous path elements")
590
591 ## Make sure we saw all of the mandatory parameters.
592 for p in me.params:
593 if p.name not in kw:
594 raise U.ExpectedError, (400, "Missing parameter `%s'" % p.name)
595
596 ## Invoke the subcommand.
597 me.func(**kw)
598
599def subcommand(name, contexts, desc, cls = Subcommand, *args, **kw):
600 """Decorator for defining CGI subcommands."""
601 return SC.subcommand(name, contexts, desc, cls = cls, *args, **kw)
602
603###----- That's all, folks --------------------------------------------------