chiark / gitweb /
format.py: Document the formatting directive syntax.
[chopwood] / cgi.py
CommitLineData
a2916c06
MW
1### -*-python-*-
2###
3### CGI machinery
4###
5### (c) 2013 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
10### This file is part of Chopwood: a password-changing service.
11###
12### Chopwood is free software; you can redistribute it and/or modify
13### it under the terms of the GNU Affero General Public License as
14### published by the Free Software Foundation; either version 3 of the
15### License, or (at your option) any later version.
16###
17### Chopwood is distributed in the hope that it will be useful,
18### but WITHOUT ANY WARRANTY; without even the implied warranty of
19### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20### GNU Affero General Public License for more details.
21###
22### You should have received a copy of the GNU Affero General Public
23### License along with Chopwood; if not, see
24### <http://www.gnu.org/licenses/>.
25
26from __future__ import with_statement
27
28import contextlib as CTX
29import os as OS; ENV = OS.environ
30import re as RX
31import sys as SYS
32import time as T
33import traceback as TB
34
35from auto import HOME, PACKAGE, VERSION
36import config as CONF; CFG = CONF.CFG
37import format as F
38import output as O; OUT = O.OUT; PRINT = O.PRINT
39import subcommand as SC
40import util as U
41
42###--------------------------------------------------------------------------
43### Configuration tweaks.
44
45_script_name = ENV.get('SCRIPT_NAME', '/cgi-bin/chpwd')
46
47CONF.DEFAULTS.update(
48
49 ## The URL of this program, when it's run through CGI.
50 SCRIPT_NAME = _script_name,
51
52 ## A (maybe relative) URL for static content. By default this comes from
53 ## the main script, but we hope that user agents cache it.
54 STATIC = _script_name + '/static')
55
56###--------------------------------------------------------------------------
57### Escaping and encoding.
58
59## Some handy regular expressions.
60R_URLESC = RX.compile('%([0-9a-fA-F]{2})')
61R_URLBAD = RX.compile('[^-\\w,.!]')
62R_HTMLBAD = RX.compile('[&<>]')
63
64def urldecode(s):
65 """Decode a single form-url-encoded string S."""
66 return R_URLESC.sub(lambda m: chr(int(m.group(1), 16)),
67 s.replace('+', ' '))
68 return s
69
70def urlencode(s):
71 """Encode a single string S using form-url-encoding."""
72 return R_URLBAD.sub(lambda m: '%%%02x' % ord(m.group(0)), s)
73
74def htmlescape(s):
75 """Escape a literal string S so that HTML doesn't misinterpret it."""
76 return R_HTMLBAD.sub(lambda m: '&#x%02x;' % ord(m.group(0)), s)
77
78## Some standard character sequences, and HTML entity names for prettier
79## versions.
80_quotify = U.StringSubst({
81 "`": '&lsquo;',
82 "'": '&rsquo;',
83 "``": '&ldquo;',
84 "''": '&rdquo;',
85 "--": '&ndash;',
86 "---": '&mdash;'
87})
88def html_quotify(s):
89 """Return a pretty HTML version of S."""
90 return _quotify(htmlescape(s))
91
92###--------------------------------------------------------------------------
93### Output machinery.
94
95class HTTPOutput (O.FileOutput):
96 """
97 Output driver providing an automatic HTTP header.
98
99 The `headerp' attribute is true if we've written a header. The `header'
100 method will print a custom header if this is wanted.
101 """
102
103 def __init__(me, *args, **kw):
104 """Constructor: initialize `headerp' flag."""
105 super(HTTPOutput, me).__init__(*args, **kw)
106 me.headerp = False
107
108 def write(me, msg):
109 """Output protocol: print a header if we've not written one already."""
110 if not me.headerp: me.header('text/plain')
111 super(HTTPOutput, me).write(msg)
112
113 def header(me, content_type = 'text/plain', **kw):
114 """
115 Print a header, if none has yet been printed.
116
117 Keyword arguments can be passed to emit HTTP headers: see `http_header'
118 for the formatting rules.
119 """
120 if me.headerp: return
121 me.headerp = True
122 for h in O.http_headers(content_type = content_type, **kw):
123 me.writeln(h)
124 me.writeln('')
125
126def cookie(name, value, **kw):
127 """
128 Return a HTTP `Set-Cookie' header.
129
130 The NAME and VALUE give the name and value of the cookie; both are
131 form-url-encoded to prevent misinterpretation (fortunately, `cgiparse'
132 knows to undo this transformation). The KW are other attributes to
133 declare: the names are forced to lower-case and underscores `_' are
134 replaced by hyphens `-'; a `True' value is assumed to indicate that the
135 attribute is boolean, and omitted.
136 """
137 attr = {}
138 for k, v in kw.iteritems():
139 k = '-'.join(i.lower() for i in k.split('_'))
140 attr[k] = v
141 try: maxage = int(attr['max-age'])
142 except KeyError: pass
143 else:
144 attr['expires'] = T.strftime('%a, %d %b %Y %H:%M:%S GMT',
145 T.gmtime(U.NOW + maxage))
146 return '; '.join(['%s=%s' % (urlencode(name), urlencode(value))] +
147 [v is not True and '%s=%s' % (k, v) or k
623103db 148 for k, v in attr.iteritems() if v])
a2916c06
MW
149
150def action(*v, **kw):
151 """
152 Build a URL invoking this script.
153
154 The positional arguments V are used to construct a path which is appended
155 to the (deduced or configured) script name (and presumably will be read
156 back as `PATH_INFO'). The keyword arguments are (form-url-encoded and)
157 appended as a query string, if present.
158 """
159 url = '/'.join([CFG.SCRIPT_NAME] + list(v))
160 if kw:
161 url += '?' + ';'.join('%s=%s' % (urlencode(k), urlencode(kw[k]))
162 for k in sorted(kw))
163 return htmlescape(url)
164
165def static(name):
166 """Build a URL for the static file NAME."""
167 return htmlescape(CFG.STATIC + '/' + name)
168
a2916c06
MW
169def redirect(where, **kw):
170 """
171 Write a complete redirection to some other URL.
172 """
173 OUT.header(content_type = 'text/html',
174 status = 302, location = where,
175 **kw)
176 PRINT("""\
177<html>
178<head><title>No, sorry, it's moved again.</title></head>
179<body><p>I'm <a href="%s">over here</a> now.<body>
180</html>""" % htmlescape(where))
181
182###--------------------------------------------------------------------------
183### Templates.
184
185## Where we find our templates.
186TMPLDIR = HOME
187
188## Keyword arguments for templates.
189STATE = U.Fluid()
190STATE.kw = {}
191
192## Set some basic keyword arguments.
193@CONF.hook
194def set_template_keywords():
195 STATE.kw.update(
196 package = PACKAGE,
197 version = VERSION,
198 script = CFG.SCRIPT_NAME,
199 static = CFG.STATIC)
200
201class TemplateFinder (object):
202 """
203 A magical fake dictionary whose keys are templates.
204 """
205 def __init__(me, dir):
206 me._cache = {}
207 me._dir = dir
208 def __getitem__(me, key):
209 try: return me._cache[key]
210 except KeyError: pass
211 with open(OS.path.join(me._dir, key)) as f: tmpl = f.read()
212 me._cache[key] = tmpl
213 return tmpl
214TMPL = TemplateFinder(TMPLDIR)
215
216@CTX.contextmanager
217def tmplkw(**kw):
218 """
219 Context manager: execute the body with additional keyword arguments
220 """
221 d = dict()
222 d.update(STATE.kw)
223 d.update(kw)
224 with STATE.bind(kw = d): yield
225
226FORMATOPS = {}
227
228class FormatHTML (F.SimpleFormatOperation):
229 """
230 ~H: escape output suitable for inclusion in HTML.
231
232 With `:', instead apply form-urlencoding.
233 """
234 def _convert(me, arg):
235 if me.colonp: return html_quotify(arg)
236 else: return htmlescape(arg)
237FORMATOPS['H'] = FormatHTML
238
239def format_tmpl(control, **kw):
240 with F.COMPILE.bind(opmaps = [FORMATOPS, F.BASEOPS]):
241 with tmplkw(**kw):
242 F.format(OUT, control, **STATE.kw)
243
244def page(template, header = {}, title = 'Chopwood', **kw):
245 header = dict(header, content_type = 'text/html')
246 OUT.header(**header)
247 format_tmpl(TMPL['wrapper.fhtml'],
248 title = title, payload = TMPL[template], **kw)
249
250###--------------------------------------------------------------------------
251### Error reporting.
252
a2916c06
MW
253@CTX.contextmanager
254def cgi_errors(hook = None):
255 """
256 Context manager: report errors in the body as useful HTML.
257
258 If HOOK is given, then call it before reporting errors. It may have set up
259 useful stuff.
260 """
261 try:
262 yield None
263 except Exception, e:
264 if hook: hook()
265 if isinstance(e, U.ExpectedError) and not OUT.headerp:
266 page('error.fhtml',
b569edae 267 header = dict(status = e.code),
a2916c06
MW
268 title = 'Chopwood: error', error = e)
269 else:
270 exty, exval, extb = SYS.exc_info()
271 with tmplkw(exception = TB.format_exception_only(exty, exval),
272 traceback = TB.extract_tb(extb),
273 PARAM = sorted(PARAM),
274 COOKIE = sorted(COOKIE.items()),
275 PATH = PATH,
276 ENV = sorted(ENV.items())):
277 if OUT.headerp:
278 format_tmpl(TMPL['exception.fhtml'], toplevel = False)
279 else:
280 page('exception.fhtml',
b569edae 281 header = dict(status = 500),
a2916c06
MW
282 title = 'Chopwood: internal error',
283 toplevel = True)
284
285###--------------------------------------------------------------------------
286### CGI input.
287
288## Lots of global variables to be filled in by `cgiparse'.
289COOKIE = {}
290SPECIAL = {}
291PARAM = []
292PARAMDICT = {}
293PATH = []
bb623e8f 294SSLP = False
a2916c06
MW
295
296## Regular expressions for splitting apart query and cookie strings.
297R_QSPLIT = RX.compile('[;&]')
298R_CSPLIT = RX.compile(';')
299
300def split_keyvalue(string, delim, default):
301 """
302 Split a STRING, and generate the resulting KEY=VALUE pairs.
303
304 The string is split at DELIM; the components are parsed into KEY[=VALUE]
305 pairs. The KEYs and VALUEs are stripped of leading and trailing
306 whitespace, and form-url-decoded. If the VALUE is omitted, then the
307 DEFAULT is used unless the DEFAULT is `None' in which case the component is
308 simply ignored.
309 """
310 for kv in delim.split(string):
311 try:
312 k, v = kv.split('=', 1)
313 except ValueError:
314 if default is None: continue
315 else: k, v = kv, default
316 k, v = k.strip(), v.strip()
317 if not k: continue
318 k, v = urldecode(k), urldecode(v)
319 yield k, v
320
321def cgiparse():
322 """
323 Process all of the various exciting CGI environment variables.
324
325 We read environment variables and populate some tables left in global
326 variables: it's all rather old-school. Variables set are as follows.
327
328 `COOKIE'
329 A dictionary mapping cookie names to the values provided by the user
330 agent.
331
332 `SPECIAL'
333 A dictionary holding some special query parameters which are of
334 interest at a global level, and should not be passed to a subcommand
335 handler. No new entries will be added to this dictionary, though
336 values will be modified to reflect the query parameters discovered.
337 Conventionally, such parameters have names beginning with `%'.
338
339 `PARAM'
340 The query parameters as a list of (KEY, VALUE) pairs. Special
341 parameters are omitted.
342
343 `PARAMDICT'
344 The query parameters as a dictionary. Special parameters, and
345 parameters which appear more than once, are omitted.
346
347 `PATH'
348 The trailing `PATH_INFO' path, split at `/' markers, with any
349 trailing empty component removed.
bb623e8f
MW
350
351 `SSLP'
352 True if the client connection is carried over SSL or TLS.
a2916c06
MW
353 """
354
bb623e8f
MW
355 global SSLP
356
a2916c06
MW
357 def getenv(var):
358 try: return ENV[var]
359 except KeyError: raise U.ExpectedError, (500, "No `%s' supplied" % var)
360
361 ## Yes, we want the request method.
362 method = getenv('REQUEST_METHOD')
363
364 ## Acquire the query string.
365 if method == 'GET':
366 q = getenv('QUERY_STRING')
367
368 elif method == 'POST':
369
370 ## We must read the query string from stdin.
371 n = getenv('CONTENT_LENGTH')
372 if not n.isdigit():
373 raise U.ExpectedError, (500, "Invalid CONTENT_LENGTH")
374 n = int(n, 10)
375 if getenv('CONTENT_TYPE') != 'application/x-www-form-urlencoded':
376 raise U.ExpectedError, (500, "Unexpected content type `%s'" % ct)
377 q = SYS.stdin.read(n)
378 if len(q) != n:
379 raise U.ExpectedError, (500, "Failed to read correct length")
380
381 else:
382 raise U.ExpectedError, (500, "Unexpected request method `%s'" % method)
383
384 ## Populate the `SPECIAL', `PARAM' and `PARAMDICT' tables.
385 seen = set()
386 for k, v in split_keyvalue(q, R_QSPLIT, 't'):
387 if k in SPECIAL:
388 SPECIAL[k] = v
389 else:
390 PARAM.append((k, v))
391 if k in seen:
392 del PARAMDICT[k]
393 else:
394 PARAMDICT[k] = v
395 seen.add(k)
396
397 ## Parse out the cookies, if any.
398 try: c = ENV['HTTP_COOKIE']
399 except KeyError: pass
400 else:
401 for k, v in split_keyvalue(c, R_CSPLIT, None): COOKIE[k] = v
402
403 ## Set up the `PATH'.
404 try: p = ENV['PATH_INFO']
405 except KeyError: pass
406 else:
407 pp = p.lstrip('/').split('/')
408 if pp and not pp[-1]: pp.pop()
409 PATH[:] = pp
410
bb623e8f
MW
411 ## Check the crypto for the connection.
412 if ENV.get('SSL_PROTOCOL'):
413 SSLP = True
414
a2916c06
MW
415###--------------------------------------------------------------------------
416### CGI subcommands.
417
418class Subcommand (SC.Subcommand):
419 """
420 A CGI subcommand object.
421
422 As for `subcommand.Subcommand', but with additional protocol for processing
423 CGI parameters.
424 """
425
426 def cgi(me, param, path):
427 """
428 Invoke the subcommand given a collection of CGI parameters.
429
430 PARAM is a list of (KEY, VALUE) pairs from the CGI query. The CGI query
431 parameters are checked against the subcommand's parameters (making sure
432 that mandatory parameters are supplied, that any switches are given
433 boolean values, and that only the `rest' parameter, if any, is
434 duplicated).
435
436 PATH is a list of trailing path components. They are used to satisfy the
437 `rest' parameter if there is one and there are no query parameters which
438 satisfy the `rest' parameter; otherwise, an `ExpectedError' is raised if
439 the list of path elements is non-empty.
440 """
441
442 ## We're going to make a pass over the supplied parameters, and we'll
443 ## check them off against the formal parameters as we go; so we'll need
444 ## to be able to look them up. We'll also keep track of the ones we've
445 ## seen so that we can make sure that all of the mandatory parameters
446 ## were actually supplied.
447 ##
448 ## To that end: `want' is a dictionary mapping parameter names to
449 ## functions which will do something useful with the value; `seen' is a
450 ## set of the parameters which have been assigned; and `kw' is going to
451 ## be the keyword-argument dictionary we pass to the handler function.
452 want = {}
453 kw = {}
454
455 def set_value(k, v):
456 """Set a simple value: we shouldn't see multiple values."""
457 if k in kw:
458 raise U.ExpectedError, (400, "Repeated parameter `%s'" % k)
459 kw[k] = v
460 def set_bool(k, v):
461 """Set a simple boolean value: for switches."""
462 set_value(k, v.lower() in ['true', 't', 'yes', 'y'])
463 def set_list(k, v):
464 """Append the value to a list: for the `rest' parameter."""
465 kw.setdefault(k, []).append(v)
466
467 ## Set up the `want' map.
468 for o in me.opts:
469 if o.argname: want[o.name] = set_value
470 else: want[o.name] = set_bool
471 for p in me.params: want[p.name] = set_value
472 for p in me.oparams: want[p.name] = set_value
473 if me.rparam: want[me.rparam.name] = set_list
474
475 ## Work through the list of supplied parameters.
476 for k, v in param:
477 try:
478 f = want[k]
479 except KeyError:
480 if v:
481 raise U.ExpectedError, (400, "Unexpected parameter `%s'" % k)
482 else:
483 f(k, v)
484
485 ## Deal with a path, if there is one.
486 if path:
487 if me.rparam and me.rparam.name not in kw:
488 kw[me.rparam.name] = path
489 else:
490 raise U.ExpectedError, (404, "Superfluous path elements")
491
492 ## Make sure we saw all of the mandatory parameters.
493 for p in me.params:
494 if p.name not in kw:
495 raise U.ExpectedError, (400, "Missing parameter `%s'" % p.name)
496
497 ## Invoke the subcommand.
498 me.func(**kw)
499
500def subcommand(name, contexts, desc, cls = Subcommand, *args, **kw):
501 """Decorator for defining CGI subcommands."""
502 return SC.subcommand(name, contexts, desc, cls = cls, *args, **kw)
503
504###----- That's all, folks --------------------------------------------------