chiark / gitweb /
format.py: Allow general format controls more widely.
[chopwood] / cgi.py
CommitLineData
a2916c06
MW
1### -*-python-*-
2###
3### CGI machinery
4###
5### (c) 2013 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
10### This file is part of Chopwood: a password-changing service.
11###
12### Chopwood is free software; you can redistribute it and/or modify
13### it under the terms of the GNU Affero General Public License as
14### published by the Free Software Foundation; either version 3 of the
15### License, or (at your option) any later version.
16###
17### Chopwood is distributed in the hope that it will be useful,
18### but WITHOUT ANY WARRANTY; without even the implied warranty of
19### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20### GNU Affero General Public License for more details.
21###
22### You should have received a copy of the GNU Affero General Public
23### License along with Chopwood; if not, see
24### <http://www.gnu.org/licenses/>.
25
26from __future__ import with_statement
27
28import contextlib as CTX
29import os as OS; ENV = OS.environ
30import re as RX
31import sys as SYS
32import time as T
33import traceback as TB
34
35from auto import HOME, PACKAGE, VERSION
36import config as CONF; CFG = CONF.CFG
37import format as F
38import output as O; OUT = O.OUT; PRINT = O.PRINT
39import subcommand as SC
40import util as U
41
42###--------------------------------------------------------------------------
43### Configuration tweaks.
44
45_script_name = ENV.get('SCRIPT_NAME', '/cgi-bin/chpwd')
46
47CONF.DEFAULTS.update(
48
49 ## The URL of this program, when it's run through CGI.
50 SCRIPT_NAME = _script_name,
51
52 ## A (maybe relative) URL for static content. By default this comes from
53 ## the main script, but we hope that user agents cache it.
54 STATIC = _script_name + '/static')
55
56###--------------------------------------------------------------------------
57### Escaping and encoding.
58
59## Some handy regular expressions.
60R_URLESC = RX.compile('%([0-9a-fA-F]{2})')
61R_URLBAD = RX.compile('[^-\\w,.!]')
b40d16b2 62R_HTMLBAD = RX.compile('[&<>\'"]')
a2916c06
MW
63
64def urldecode(s):
65 """Decode a single form-url-encoded string S."""
66 return R_URLESC.sub(lambda m: chr(int(m.group(1), 16)),
67 s.replace('+', ' '))
68 return s
69
70def urlencode(s):
71 """Encode a single string S using form-url-encoding."""
72 return R_URLBAD.sub(lambda m: '%%%02x' % ord(m.group(0)), s)
73
74def htmlescape(s):
75 """Escape a literal string S so that HTML doesn't misinterpret it."""
76 return R_HTMLBAD.sub(lambda m: '&#x%02x;' % ord(m.group(0)), s)
77
78## Some standard character sequences, and HTML entity names for prettier
79## versions.
b40d16b2
MW
80html_quotify = U.StringSubst({
81 "<": '&lt;',
82 ">": '&gt;',
83 "&": '&amp;',
a2916c06
MW
84 "`": '&lsquo;',
85 "'": '&rsquo;',
b40d16b2 86 '"': '&quot;',
a2916c06
MW
87 "``": '&ldquo;',
88 "''": '&rdquo;',
89 "--": '&ndash;',
90 "---": '&mdash;'
91})
a2916c06
MW
92
93###--------------------------------------------------------------------------
94### Output machinery.
95
96class HTTPOutput (O.FileOutput):
97 """
98 Output driver providing an automatic HTTP header.
99
100 The `headerp' attribute is true if we've written a header. The `header'
101 method will print a custom header if this is wanted.
102 """
103
104 def __init__(me, *args, **kw):
105 """Constructor: initialize `headerp' flag."""
106 super(HTTPOutput, me).__init__(*args, **kw)
107 me.headerp = False
108
109 def write(me, msg):
110 """Output protocol: print a header if we've not written one already."""
111 if not me.headerp: me.header('text/plain')
112 super(HTTPOutput, me).write(msg)
113
114 def header(me, content_type = 'text/plain', **kw):
115 """
116 Print a header, if none has yet been printed.
117
118 Keyword arguments can be passed to emit HTTP headers: see `http_header'
119 for the formatting rules.
120 """
121 if me.headerp: return
122 me.headerp = True
123 for h in O.http_headers(content_type = content_type, **kw):
124 me.writeln(h)
125 me.writeln('')
126
127def cookie(name, value, **kw):
128 """
129 Return a HTTP `Set-Cookie' header.
130
131 The NAME and VALUE give the name and value of the cookie; both are
132 form-url-encoded to prevent misinterpretation (fortunately, `cgiparse'
133 knows to undo this transformation). The KW are other attributes to
134 declare: the names are forced to lower-case and underscores `_' are
135 replaced by hyphens `-'; a `True' value is assumed to indicate that the
136 attribute is boolean, and omitted.
137 """
138 attr = {}
139 for k, v in kw.iteritems():
140 k = '-'.join(i.lower() for i in k.split('_'))
141 attr[k] = v
142 try: maxage = int(attr['max-age'])
143 except KeyError: pass
144 else:
145 attr['expires'] = T.strftime('%a, %d %b %Y %H:%M:%S GMT',
146 T.gmtime(U.NOW + maxage))
147 return '; '.join(['%s=%s' % (urlencode(name), urlencode(value))] +
148 [v is not True and '%s=%s' % (k, v) or k
623103db 149 for k, v in attr.iteritems() if v])
a2916c06
MW
150
151def action(*v, **kw):
152 """
153 Build a URL invoking this script.
154
155 The positional arguments V are used to construct a path which is appended
156 to the (deduced or configured) script name (and presumably will be read
157 back as `PATH_INFO'). The keyword arguments are (form-url-encoded and)
158 appended as a query string, if present.
159 """
160 url = '/'.join([CFG.SCRIPT_NAME] + list(v))
161 if kw:
162 url += '?' + ';'.join('%s=%s' % (urlencode(k), urlencode(kw[k]))
163 for k in sorted(kw))
164 return htmlescape(url)
165
166def static(name):
167 """Build a URL for the static file NAME."""
168 return htmlescape(CFG.STATIC + '/' + name)
169
a2916c06
MW
170def redirect(where, **kw):
171 """
172 Write a complete redirection to some other URL.
173 """
174 OUT.header(content_type = 'text/html',
175 status = 302, location = where,
176 **kw)
177 PRINT("""\
178<html>
179<head><title>No, sorry, it's moved again.</title></head>
180<body><p>I'm <a href="%s">over here</a> now.<body>
181</html>""" % htmlescape(where))
182
183###--------------------------------------------------------------------------
184### Templates.
185
186## Where we find our templates.
187TMPLDIR = HOME
188
189## Keyword arguments for templates.
190STATE = U.Fluid()
191STATE.kw = {}
192
193## Set some basic keyword arguments.
194@CONF.hook
195def set_template_keywords():
196 STATE.kw.update(
197 package = PACKAGE,
198 version = VERSION,
199 script = CFG.SCRIPT_NAME,
4e7866ab
MW
200 static = CFG.STATIC,
201 allowop = CFG.ALLOWOP)
a2916c06
MW
202
203class TemplateFinder (object):
204 """
205 A magical fake dictionary whose keys are templates.
206 """
207 def __init__(me, dir):
208 me._cache = {}
209 me._dir = dir
210 def __getitem__(me, key):
211 try: return me._cache[key]
212 except KeyError: pass
213 with open(OS.path.join(me._dir, key)) as f: tmpl = f.read()
214 me._cache[key] = tmpl
215 return tmpl
216TMPL = TemplateFinder(TMPLDIR)
217
218@CTX.contextmanager
219def tmplkw(**kw):
220 """
221 Context manager: execute the body with additional keyword arguments
222 """
223 d = dict()
224 d.update(STATE.kw)
225 d.update(kw)
226 with STATE.bind(kw = d): yield
227
228FORMATOPS = {}
229
230class FormatHTML (F.SimpleFormatOperation):
231 """
232 ~H: escape output suitable for inclusion in HTML.
233
b53a8abe 234 With `:', additionally apply quotification.
a2916c06
MW
235 """
236 def _convert(me, arg):
237 if me.colonp: return html_quotify(arg)
238 else: return htmlescape(arg)
239FORMATOPS['H'] = FormatHTML
240
241def format_tmpl(control, **kw):
242 with F.COMPILE.bind(opmaps = [FORMATOPS, F.BASEOPS]):
243 with tmplkw(**kw):
244 F.format(OUT, control, **STATE.kw)
245
246def page(template, header = {}, title = 'Chopwood', **kw):
247 header = dict(header, content_type = 'text/html')
248 OUT.header(**header)
249 format_tmpl(TMPL['wrapper.fhtml'],
250 title = title, payload = TMPL[template], **kw)
251
252###--------------------------------------------------------------------------
253### Error reporting.
254
a2916c06
MW
255@CTX.contextmanager
256def cgi_errors(hook = None):
257 """
258 Context manager: report errors in the body as useful HTML.
259
260 If HOOK is given, then call it before reporting errors. It may have set up
261 useful stuff.
262 """
263 try:
264 yield None
265 except Exception, e:
266 if hook: hook()
267 if isinstance(e, U.ExpectedError) and not OUT.headerp:
268 page('error.fhtml',
b569edae 269 header = dict(status = e.code),
a2916c06
MW
270 title = 'Chopwood: error', error = e)
271 else:
272 exty, exval, extb = SYS.exc_info()
273 with tmplkw(exception = TB.format_exception_only(exty, exval),
274 traceback = TB.extract_tb(extb),
275 PARAM = sorted(PARAM),
276 COOKIE = sorted(COOKIE.items()),
277 PATH = PATH,
278 ENV = sorted(ENV.items())):
279 if OUT.headerp:
280 format_tmpl(TMPL['exception.fhtml'], toplevel = False)
281 else:
282 page('exception.fhtml',
b569edae 283 header = dict(status = 500),
a2916c06
MW
284 title = 'Chopwood: internal error',
285 toplevel = True)
286
287###--------------------------------------------------------------------------
288### CGI input.
289
290## Lots of global variables to be filled in by `cgiparse'.
291COOKIE = {}
292SPECIAL = {}
293PARAM = []
294PARAMDICT = {}
295PATH = []
bb623e8f 296SSLP = False
a2916c06
MW
297
298## Regular expressions for splitting apart query and cookie strings.
299R_QSPLIT = RX.compile('[;&]')
300R_CSPLIT = RX.compile(';')
301
302def split_keyvalue(string, delim, default):
303 """
304 Split a STRING, and generate the resulting KEY=VALUE pairs.
305
306 The string is split at DELIM; the components are parsed into KEY[=VALUE]
307 pairs. The KEYs and VALUEs are stripped of leading and trailing
308 whitespace, and form-url-decoded. If the VALUE is omitted, then the
309 DEFAULT is used unless the DEFAULT is `None' in which case the component is
310 simply ignored.
311 """
312 for kv in delim.split(string):
313 try:
314 k, v = kv.split('=', 1)
315 except ValueError:
316 if default is None: continue
317 else: k, v = kv, default
318 k, v = k.strip(), v.strip()
319 if not k: continue
320 k, v = urldecode(k), urldecode(v)
321 yield k, v
322
323def cgiparse():
324 """
325 Process all of the various exciting CGI environment variables.
326
327 We read environment variables and populate some tables left in global
328 variables: it's all rather old-school. Variables set are as follows.
329
330 `COOKIE'
331 A dictionary mapping cookie names to the values provided by the user
332 agent.
333
334 `SPECIAL'
335 A dictionary holding some special query parameters which are of
336 interest at a global level, and should not be passed to a subcommand
337 handler. No new entries will be added to this dictionary, though
338 values will be modified to reflect the query parameters discovered.
339 Conventionally, such parameters have names beginning with `%'.
340
341 `PARAM'
342 The query parameters as a list of (KEY, VALUE) pairs. Special
343 parameters are omitted.
344
345 `PARAMDICT'
346 The query parameters as a dictionary. Special parameters, and
347 parameters which appear more than once, are omitted.
348
349 `PATH'
350 The trailing `PATH_INFO' path, split at `/' markers, with any
351 trailing empty component removed.
bb623e8f
MW
352
353 `SSLP'
354 True if the client connection is carried over SSL or TLS.
a2916c06
MW
355 """
356
bb623e8f
MW
357 global SSLP
358
a2916c06
MW
359 def getenv(var):
360 try: return ENV[var]
361 except KeyError: raise U.ExpectedError, (500, "No `%s' supplied" % var)
362
363 ## Yes, we want the request method.
364 method = getenv('REQUEST_METHOD')
365
366 ## Acquire the query string.
367 if method == 'GET':
368 q = getenv('QUERY_STRING')
369
370 elif method == 'POST':
371
372 ## We must read the query string from stdin.
373 n = getenv('CONTENT_LENGTH')
374 if not n.isdigit():
375 raise U.ExpectedError, (500, "Invalid CONTENT_LENGTH")
376 n = int(n, 10)
377 if getenv('CONTENT_TYPE') != 'application/x-www-form-urlencoded':
378 raise U.ExpectedError, (500, "Unexpected content type `%s'" % ct)
379 q = SYS.stdin.read(n)
380 if len(q) != n:
381 raise U.ExpectedError, (500, "Failed to read correct length")
382
383 else:
384 raise U.ExpectedError, (500, "Unexpected request method `%s'" % method)
385
386 ## Populate the `SPECIAL', `PARAM' and `PARAMDICT' tables.
387 seen = set()
388 for k, v in split_keyvalue(q, R_QSPLIT, 't'):
389 if k in SPECIAL:
390 SPECIAL[k] = v
391 else:
392 PARAM.append((k, v))
393 if k in seen:
394 del PARAMDICT[k]
395 else:
396 PARAMDICT[k] = v
397 seen.add(k)
398
399 ## Parse out the cookies, if any.
400 try: c = ENV['HTTP_COOKIE']
401 except KeyError: pass
402 else:
403 for k, v in split_keyvalue(c, R_CSPLIT, None): COOKIE[k] = v
404
405 ## Set up the `PATH'.
406 try: p = ENV['PATH_INFO']
407 except KeyError: pass
408 else:
409 pp = p.lstrip('/').split('/')
410 if pp and not pp[-1]: pp.pop()
411 PATH[:] = pp
412
bb623e8f
MW
413 ## Check the crypto for the connection.
414 if ENV.get('SSL_PROTOCOL'):
415 SSLP = True
416
a2916c06
MW
417###--------------------------------------------------------------------------
418### CGI subcommands.
419
420class Subcommand (SC.Subcommand):
421 """
422 A CGI subcommand object.
423
424 As for `subcommand.Subcommand', but with additional protocol for processing
425 CGI parameters.
426 """
427
428 def cgi(me, param, path):
429 """
430 Invoke the subcommand given a collection of CGI parameters.
431
432 PARAM is a list of (KEY, VALUE) pairs from the CGI query. The CGI query
433 parameters are checked against the subcommand's parameters (making sure
434 that mandatory parameters are supplied, that any switches are given
435 boolean values, and that only the `rest' parameter, if any, is
436 duplicated).
437
438 PATH is a list of trailing path components. They are used to satisfy the
439 `rest' parameter if there is one and there are no query parameters which
440 satisfy the `rest' parameter; otherwise, an `ExpectedError' is raised if
441 the list of path elements is non-empty.
442 """
443
444 ## We're going to make a pass over the supplied parameters, and we'll
445 ## check them off against the formal parameters as we go; so we'll need
446 ## to be able to look them up. We'll also keep track of the ones we've
447 ## seen so that we can make sure that all of the mandatory parameters
448 ## were actually supplied.
449 ##
450 ## To that end: `want' is a dictionary mapping parameter names to
451 ## functions which will do something useful with the value; `seen' is a
452 ## set of the parameters which have been assigned; and `kw' is going to
453 ## be the keyword-argument dictionary we pass to the handler function.
454 want = {}
455 kw = {}
456
457 def set_value(k, v):
458 """Set a simple value: we shouldn't see multiple values."""
459 if k in kw:
460 raise U.ExpectedError, (400, "Repeated parameter `%s'" % k)
461 kw[k] = v
462 def set_bool(k, v):
463 """Set a simple boolean value: for switches."""
464 set_value(k, v.lower() in ['true', 't', 'yes', 'y'])
465 def set_list(k, v):
466 """Append the value to a list: for the `rest' parameter."""
467 kw.setdefault(k, []).append(v)
468
469 ## Set up the `want' map.
470 for o in me.opts:
471 if o.argname: want[o.name] = set_value
472 else: want[o.name] = set_bool
473 for p in me.params: want[p.name] = set_value
474 for p in me.oparams: want[p.name] = set_value
475 if me.rparam: want[me.rparam.name] = set_list
476
477 ## Work through the list of supplied parameters.
478 for k, v in param:
479 try:
480 f = want[k]
481 except KeyError:
482 if v:
483 raise U.ExpectedError, (400, "Unexpected parameter `%s'" % k)
484 else:
485 f(k, v)
486
487 ## Deal with a path, if there is one.
488 if path:
489 if me.rparam and me.rparam.name not in kw:
490 kw[me.rparam.name] = path
491 else:
492 raise U.ExpectedError, (404, "Superfluous path elements")
493
494 ## Make sure we saw all of the mandatory parameters.
495 for p in me.params:
496 if p.name not in kw:
497 raise U.ExpectedError, (400, "Missing parameter `%s'" % p.name)
498
499 ## Invoke the subcommand.
500 me.func(**kw)
501
502def subcommand(name, contexts, desc, cls = Subcommand, *args, **kw):
503 """Decorator for defining CGI subcommands."""
504 return SC.subcommand(name, contexts, desc, cls = cls, *args, **kw)
505
506###----- That's all, folks --------------------------------------------------