Commit | Line | Data |
---|---|---|
a2916c06 MW |
1 | ### -*-python-*- |
2 | ### | |
3 | ### CGI machinery | |
4 | ### | |
5 | ### (c) 2013 Mark Wooding | |
6 | ### | |
7 | ||
8 | ###----- Licensing notice --------------------------------------------------- | |
9 | ### | |
10 | ### This file is part of Chopwood: a password-changing service. | |
11 | ### | |
12 | ### Chopwood is free software; you can redistribute it and/or modify | |
13 | ### it under the terms of the GNU Affero General Public License as | |
14 | ### published by the Free Software Foundation; either version 3 of the | |
15 | ### License, or (at your option) any later version. | |
16 | ### | |
17 | ### Chopwood is distributed in the hope that it will be useful, | |
18 | ### but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ### GNU Affero General Public License for more details. | |
21 | ### | |
22 | ### You should have received a copy of the GNU Affero General Public | |
23 | ### License along with Chopwood; if not, see | |
24 | ### <http://www.gnu.org/licenses/>. | |
25 | ||
26 | from __future__ import with_statement | |
27 | ||
28 | import contextlib as CTX | |
29 | import os as OS; ENV = OS.environ | |
30 | import re as RX | |
31 | import sys as SYS | |
32 | import time as T | |
33 | import traceback as TB | |
34 | ||
35 | from auto import HOME, PACKAGE, VERSION | |
36 | import config as CONF; CFG = CONF.CFG | |
37 | import format as F | |
38 | import output as O; OUT = O.OUT; PRINT = O.PRINT | |
39 | import subcommand as SC | |
40 | import util as U | |
41 | ||
42 | ###-------------------------------------------------------------------------- | |
43 | ### Configuration tweaks. | |
44 | ||
45 | _script_name = ENV.get('SCRIPT_NAME', '/cgi-bin/chpwd') | |
46 | ||
47 | CONF.DEFAULTS.update( | |
48 | ||
49 | ## The URL of this program, when it's run through CGI. | |
50 | SCRIPT_NAME = _script_name, | |
51 | ||
52 | ## A (maybe relative) URL for static content. By default this comes from | |
53 | ## the main script, but we hope that user agents cache it. | |
54 | STATIC = _script_name + '/static') | |
55 | ||
56 | ###-------------------------------------------------------------------------- | |
57 | ### Escaping and encoding. | |
58 | ||
59 | ## Some handy regular expressions. | |
60 | R_URLESC = RX.compile('%([0-9a-fA-F]{2})') | |
61 | R_URLBAD = RX.compile('[^-\\w,.!]') | |
b40d16b2 | 62 | R_HTMLBAD = RX.compile('[&<>\'"]') |
a2916c06 MW |
63 | |
64 | def urldecode(s): | |
65 | """Decode a single form-url-encoded string S.""" | |
66 | return R_URLESC.sub(lambda m: chr(int(m.group(1), 16)), | |
67 | s.replace('+', ' ')) | |
68 | return s | |
69 | ||
70 | def urlencode(s): | |
71 | """Encode a single string S using form-url-encoding.""" | |
72 | return R_URLBAD.sub(lambda m: '%%%02x' % ord(m.group(0)), s) | |
73 | ||
74 | def htmlescape(s): | |
75 | """Escape a literal string S so that HTML doesn't misinterpret it.""" | |
76 | return R_HTMLBAD.sub(lambda m: '&#x%02x;' % ord(m.group(0)), s) | |
77 | ||
78 | ## Some standard character sequences, and HTML entity names for prettier | |
79 | ## versions. | |
b40d16b2 MW |
80 | html_quotify = U.StringSubst({ |
81 | "<": '<', | |
82 | ">": '>', | |
83 | "&": '&', | |
a2916c06 MW |
84 | "`": '‘', |
85 | "'": '’', | |
b40d16b2 | 86 | '"': '"', |
a2916c06 MW |
87 | "``": '“', |
88 | "''": '”', | |
89 | "--": '–', | |
90 | "---": '—' | |
91 | }) | |
a2916c06 MW |
92 | |
93 | ###-------------------------------------------------------------------------- | |
94 | ### Output machinery. | |
95 | ||
96 | class HTTPOutput (O.FileOutput): | |
97 | """ | |
98 | Output driver providing an automatic HTTP header. | |
99 | ||
100 | The `headerp' attribute is true if we've written a header. The `header' | |
101 | method will print a custom header if this is wanted. | |
102 | """ | |
103 | ||
104 | def __init__(me, *args, **kw): | |
105 | """Constructor: initialize `headerp' flag.""" | |
106 | super(HTTPOutput, me).__init__(*args, **kw) | |
107 | me.headerp = False | |
108 | ||
109 | def write(me, msg): | |
110 | """Output protocol: print a header if we've not written one already.""" | |
111 | if not me.headerp: me.header('text/plain') | |
112 | super(HTTPOutput, me).write(msg) | |
113 | ||
114 | def header(me, content_type = 'text/plain', **kw): | |
115 | """ | |
116 | Print a header, if none has yet been printed. | |
117 | ||
118 | Keyword arguments can be passed to emit HTTP headers: see `http_header' | |
119 | for the formatting rules. | |
120 | """ | |
121 | if me.headerp: return | |
122 | me.headerp = True | |
123 | for h in O.http_headers(content_type = content_type, **kw): | |
124 | me.writeln(h) | |
125 | me.writeln('') | |
126 | ||
127 | def cookie(name, value, **kw): | |
128 | """ | |
129 | Return a HTTP `Set-Cookie' header. | |
130 | ||
131 | The NAME and VALUE give the name and value of the cookie; both are | |
132 | form-url-encoded to prevent misinterpretation (fortunately, `cgiparse' | |
133 | knows to undo this transformation). The KW are other attributes to | |
134 | declare: the names are forced to lower-case and underscores `_' are | |
135 | replaced by hyphens `-'; a `True' value is assumed to indicate that the | |
136 | attribute is boolean, and omitted. | |
137 | """ | |
138 | attr = {} | |
139 | for k, v in kw.iteritems(): | |
140 | k = '-'.join(i.lower() for i in k.split('_')) | |
141 | attr[k] = v | |
142 | try: maxage = int(attr['max-age']) | |
143 | except KeyError: pass | |
144 | else: | |
145 | attr['expires'] = T.strftime('%a, %d %b %Y %H:%M:%S GMT', | |
146 | T.gmtime(U.NOW + maxage)) | |
147 | return '; '.join(['%s=%s' % (urlencode(name), urlencode(value))] + | |
148 | [v is not True and '%s=%s' % (k, v) or k | |
623103db | 149 | for k, v in attr.iteritems() if v]) |
a2916c06 MW |
150 | |
151 | def action(*v, **kw): | |
152 | """ | |
153 | Build a URL invoking this script. | |
154 | ||
155 | The positional arguments V are used to construct a path which is appended | |
156 | to the (deduced or configured) script name (and presumably will be read | |
157 | back as `PATH_INFO'). The keyword arguments are (form-url-encoded and) | |
158 | appended as a query string, if present. | |
159 | """ | |
160 | url = '/'.join([CFG.SCRIPT_NAME] + list(v)) | |
161 | if kw: | |
162 | url += '?' + ';'.join('%s=%s' % (urlencode(k), urlencode(kw[k])) | |
163 | for k in sorted(kw)) | |
164 | return htmlescape(url) | |
165 | ||
166 | def static(name): | |
167 | """Build a URL for the static file NAME.""" | |
168 | return htmlescape(CFG.STATIC + '/' + name) | |
169 | ||
a2916c06 MW |
170 | def redirect(where, **kw): |
171 | """ | |
172 | Write a complete redirection to some other URL. | |
173 | """ | |
174 | OUT.header(content_type = 'text/html', | |
175 | status = 302, location = where, | |
176 | **kw) | |
177 | PRINT("""\ | |
178 | <html> | |
179 | <head><title>No, sorry, it's moved again.</title></head> | |
180 | <body><p>I'm <a href="%s">over here</a> now.<body> | |
181 | </html>""" % htmlescape(where)) | |
182 | ||
183 | ###-------------------------------------------------------------------------- | |
184 | ### Templates. | |
185 | ||
186 | ## Where we find our templates. | |
187 | TMPLDIR = HOME | |
188 | ||
189 | ## Keyword arguments for templates. | |
190 | STATE = U.Fluid() | |
191 | STATE.kw = {} | |
192 | ||
193 | ## Set some basic keyword arguments. | |
194 | @CONF.hook | |
195 | def set_template_keywords(): | |
196 | STATE.kw.update( | |
197 | package = PACKAGE, | |
198 | version = VERSION, | |
199 | script = CFG.SCRIPT_NAME, | |
4e7866ab MW |
200 | static = CFG.STATIC, |
201 | allowop = CFG.ALLOWOP) | |
a2916c06 MW |
202 | |
203 | class TemplateFinder (object): | |
204 | """ | |
205 | A magical fake dictionary whose keys are templates. | |
206 | """ | |
207 | def __init__(me, dir): | |
208 | me._cache = {} | |
209 | me._dir = dir | |
210 | def __getitem__(me, key): | |
211 | try: return me._cache[key] | |
212 | except KeyError: pass | |
213 | with open(OS.path.join(me._dir, key)) as f: tmpl = f.read() | |
214 | me._cache[key] = tmpl | |
215 | return tmpl | |
acd737d8 | 216 | STATE.kw['TMPL'] = TMPL = TemplateFinder(TMPLDIR) |
a2916c06 MW |
217 | |
218 | @CTX.contextmanager | |
219 | def tmplkw(**kw): | |
220 | """ | |
221 | Context manager: execute the body with additional keyword arguments | |
222 | """ | |
223 | d = dict() | |
224 | d.update(STATE.kw) | |
225 | d.update(kw) | |
226 | with STATE.bind(kw = d): yield | |
227 | ||
228 | FORMATOPS = {} | |
229 | ||
230 | class FormatHTML (F.SimpleFormatOperation): | |
231 | """ | |
232 | ~H: escape output suitable for inclusion in HTML. | |
233 | ||
b53a8abe | 234 | With `:', additionally apply quotification. |
a2916c06 MW |
235 | """ |
236 | def _convert(me, arg): | |
237 | if me.colonp: return html_quotify(arg) | |
238 | else: return htmlescape(arg) | |
239 | FORMATOPS['H'] = FormatHTML | |
240 | ||
dc190ae1 MW |
241 | class FormatWrap (F.BaseFormatOperation): |
242 | """ | |
243 | ~<...~@>: wrap enclosed material in another formatting control string. | |
244 | ||
245 | The argument is a formatting control. The enclosed material is split into | |
246 | pieces separated by `~;' markers. The formatting control is performed, and | |
247 | passed the list of pieces (as compiled formatting operations) in the | |
248 | keyword argument `wrapped'. | |
249 | """ | |
250 | def __init__(me, *args): | |
251 | super(FormatWrap, me).__init__(*args) | |
252 | pieces = [] | |
253 | while True: | |
254 | piece, delim = F.collect_subformat('>;') | |
255 | pieces.append(piece) | |
256 | if delim.char == '>': break | |
257 | me.pieces = pieces | |
258 | def _format(me, atp, colonp): | |
259 | op = F.compile(me.getarg.get()) | |
260 | with F.FORMAT.bind(argmap = dict(F.FORMAT.argmap, wrapped = me.pieces)): | |
261 | op.format() | |
262 | FORMATOPS['<'] = FormatWrap | |
263 | ||
a2916c06 MW |
264 | def format_tmpl(control, **kw): |
265 | with F.COMPILE.bind(opmaps = [FORMATOPS, F.BASEOPS]): | |
266 | with tmplkw(**kw): | |
267 | F.format(OUT, control, **STATE.kw) | |
268 | ||
269 | def page(template, header = {}, title = 'Chopwood', **kw): | |
270 | header = dict(header, content_type = 'text/html') | |
271 | OUT.header(**header) | |
272 | format_tmpl(TMPL['wrapper.fhtml'], | |
273 | title = title, payload = TMPL[template], **kw) | |
274 | ||
275 | ###-------------------------------------------------------------------------- | |
276 | ### Error reporting. | |
277 | ||
a2916c06 MW |
278 | @CTX.contextmanager |
279 | def cgi_errors(hook = None): | |
280 | """ | |
281 | Context manager: report errors in the body as useful HTML. | |
282 | ||
283 | If HOOK is given, then call it before reporting errors. It may have set up | |
284 | useful stuff. | |
285 | """ | |
286 | try: | |
287 | yield None | |
288 | except Exception, e: | |
289 | if hook: hook() | |
290 | if isinstance(e, U.ExpectedError) and not OUT.headerp: | |
291 | page('error.fhtml', | |
b569edae | 292 | header = dict(status = e.code), |
a2916c06 MW |
293 | title = 'Chopwood: error', error = e) |
294 | else: | |
295 | exty, exval, extb = SYS.exc_info() | |
296 | with tmplkw(exception = TB.format_exception_only(exty, exval), | |
297 | traceback = TB.extract_tb(extb), | |
298 | PARAM = sorted(PARAM), | |
299 | COOKIE = sorted(COOKIE.items()), | |
300 | PATH = PATH, | |
301 | ENV = sorted(ENV.items())): | |
302 | if OUT.headerp: | |
303 | format_tmpl(TMPL['exception.fhtml'], toplevel = False) | |
304 | else: | |
305 | page('exception.fhtml', | |
b569edae | 306 | header = dict(status = 500), |
a2916c06 MW |
307 | title = 'Chopwood: internal error', |
308 | toplevel = True) | |
309 | ||
310 | ###-------------------------------------------------------------------------- | |
311 | ### CGI input. | |
312 | ||
313 | ## Lots of global variables to be filled in by `cgiparse'. | |
314 | COOKIE = {} | |
315 | SPECIAL = {} | |
316 | PARAM = [] | |
317 | PARAMDICT = {} | |
318 | PATH = [] | |
bb623e8f | 319 | SSLP = False |
a2916c06 MW |
320 | |
321 | ## Regular expressions for splitting apart query and cookie strings. | |
322 | R_QSPLIT = RX.compile('[;&]') | |
323 | R_CSPLIT = RX.compile(';') | |
324 | ||
325 | def split_keyvalue(string, delim, default): | |
326 | """ | |
327 | Split a STRING, and generate the resulting KEY=VALUE pairs. | |
328 | ||
329 | The string is split at DELIM; the components are parsed into KEY[=VALUE] | |
330 | pairs. The KEYs and VALUEs are stripped of leading and trailing | |
331 | whitespace, and form-url-decoded. If the VALUE is omitted, then the | |
332 | DEFAULT is used unless the DEFAULT is `None' in which case the component is | |
333 | simply ignored. | |
334 | """ | |
335 | for kv in delim.split(string): | |
336 | try: | |
337 | k, v = kv.split('=', 1) | |
338 | except ValueError: | |
339 | if default is None: continue | |
340 | else: k, v = kv, default | |
341 | k, v = k.strip(), v.strip() | |
342 | if not k: continue | |
343 | k, v = urldecode(k), urldecode(v) | |
344 | yield k, v | |
345 | ||
346 | def cgiparse(): | |
347 | """ | |
348 | Process all of the various exciting CGI environment variables. | |
349 | ||
350 | We read environment variables and populate some tables left in global | |
351 | variables: it's all rather old-school. Variables set are as follows. | |
352 | ||
353 | `COOKIE' | |
354 | A dictionary mapping cookie names to the values provided by the user | |
355 | agent. | |
356 | ||
357 | `SPECIAL' | |
358 | A dictionary holding some special query parameters which are of | |
359 | interest at a global level, and should not be passed to a subcommand | |
360 | handler. No new entries will be added to this dictionary, though | |
361 | values will be modified to reflect the query parameters discovered. | |
362 | Conventionally, such parameters have names beginning with `%'. | |
363 | ||
364 | `PARAM' | |
365 | The query parameters as a list of (KEY, VALUE) pairs. Special | |
366 | parameters are omitted. | |
367 | ||
368 | `PARAMDICT' | |
369 | The query parameters as a dictionary. Special parameters, and | |
370 | parameters which appear more than once, are omitted. | |
371 | ||
372 | `PATH' | |
373 | The trailing `PATH_INFO' path, split at `/' markers, with any | |
374 | trailing empty component removed. | |
bb623e8f MW |
375 | |
376 | `SSLP' | |
377 | True if the client connection is carried over SSL or TLS. | |
a2916c06 MW |
378 | """ |
379 | ||
bb623e8f MW |
380 | global SSLP |
381 | ||
a2916c06 MW |
382 | def getenv(var): |
383 | try: return ENV[var] | |
384 | except KeyError: raise U.ExpectedError, (500, "No `%s' supplied" % var) | |
385 | ||
386 | ## Yes, we want the request method. | |
387 | method = getenv('REQUEST_METHOD') | |
388 | ||
389 | ## Acquire the query string. | |
390 | if method == 'GET': | |
391 | q = getenv('QUERY_STRING') | |
392 | ||
393 | elif method == 'POST': | |
394 | ||
395 | ## We must read the query string from stdin. | |
396 | n = getenv('CONTENT_LENGTH') | |
397 | if not n.isdigit(): | |
398 | raise U.ExpectedError, (500, "Invalid CONTENT_LENGTH") | |
399 | n = int(n, 10) | |
400 | if getenv('CONTENT_TYPE') != 'application/x-www-form-urlencoded': | |
401 | raise U.ExpectedError, (500, "Unexpected content type `%s'" % ct) | |
402 | q = SYS.stdin.read(n) | |
403 | if len(q) != n: | |
404 | raise U.ExpectedError, (500, "Failed to read correct length") | |
405 | ||
406 | else: | |
407 | raise U.ExpectedError, (500, "Unexpected request method `%s'" % method) | |
408 | ||
409 | ## Populate the `SPECIAL', `PARAM' and `PARAMDICT' tables. | |
410 | seen = set() | |
411 | for k, v in split_keyvalue(q, R_QSPLIT, 't'): | |
412 | if k in SPECIAL: | |
413 | SPECIAL[k] = v | |
414 | else: | |
415 | PARAM.append((k, v)) | |
416 | if k in seen: | |
417 | del PARAMDICT[k] | |
418 | else: | |
419 | PARAMDICT[k] = v | |
420 | seen.add(k) | |
421 | ||
422 | ## Parse out the cookies, if any. | |
423 | try: c = ENV['HTTP_COOKIE'] | |
424 | except KeyError: pass | |
425 | else: | |
426 | for k, v in split_keyvalue(c, R_CSPLIT, None): COOKIE[k] = v | |
427 | ||
428 | ## Set up the `PATH'. | |
429 | try: p = ENV['PATH_INFO'] | |
430 | except KeyError: pass | |
431 | else: | |
432 | pp = p.lstrip('/').split('/') | |
433 | if pp and not pp[-1]: pp.pop() | |
434 | PATH[:] = pp | |
435 | ||
bb623e8f MW |
436 | ## Check the crypto for the connection. |
437 | if ENV.get('SSL_PROTOCOL'): | |
438 | SSLP = True | |
439 | ||
a2916c06 MW |
440 | ###-------------------------------------------------------------------------- |
441 | ### CGI subcommands. | |
442 | ||
443 | class Subcommand (SC.Subcommand): | |
444 | """ | |
445 | A CGI subcommand object. | |
446 | ||
447 | As for `subcommand.Subcommand', but with additional protocol for processing | |
448 | CGI parameters. | |
449 | """ | |
450 | ||
451 | def cgi(me, param, path): | |
452 | """ | |
453 | Invoke the subcommand given a collection of CGI parameters. | |
454 | ||
455 | PARAM is a list of (KEY, VALUE) pairs from the CGI query. The CGI query | |
456 | parameters are checked against the subcommand's parameters (making sure | |
457 | that mandatory parameters are supplied, that any switches are given | |
458 | boolean values, and that only the `rest' parameter, if any, is | |
459 | duplicated). | |
460 | ||
461 | PATH is a list of trailing path components. They are used to satisfy the | |
462 | `rest' parameter if there is one and there are no query parameters which | |
463 | satisfy the `rest' parameter; otherwise, an `ExpectedError' is raised if | |
464 | the list of path elements is non-empty. | |
465 | """ | |
466 | ||
467 | ## We're going to make a pass over the supplied parameters, and we'll | |
468 | ## check them off against the formal parameters as we go; so we'll need | |
469 | ## to be able to look them up. We'll also keep track of the ones we've | |
470 | ## seen so that we can make sure that all of the mandatory parameters | |
471 | ## were actually supplied. | |
472 | ## | |
473 | ## To that end: `want' is a dictionary mapping parameter names to | |
474 | ## functions which will do something useful with the value; `seen' is a | |
475 | ## set of the parameters which have been assigned; and `kw' is going to | |
476 | ## be the keyword-argument dictionary we pass to the handler function. | |
477 | want = {} | |
478 | kw = {} | |
479 | ||
480 | def set_value(k, v): | |
481 | """Set a simple value: we shouldn't see multiple values.""" | |
482 | if k in kw: | |
483 | raise U.ExpectedError, (400, "Repeated parameter `%s'" % k) | |
484 | kw[k] = v | |
485 | def set_bool(k, v): | |
486 | """Set a simple boolean value: for switches.""" | |
487 | set_value(k, v.lower() in ['true', 't', 'yes', 'y']) | |
488 | def set_list(k, v): | |
489 | """Append the value to a list: for the `rest' parameter.""" | |
490 | kw.setdefault(k, []).append(v) | |
491 | ||
492 | ## Set up the `want' map. | |
493 | for o in me.opts: | |
494 | if o.argname: want[o.name] = set_value | |
495 | else: want[o.name] = set_bool | |
496 | for p in me.params: want[p.name] = set_value | |
497 | for p in me.oparams: want[p.name] = set_value | |
498 | if me.rparam: want[me.rparam.name] = set_list | |
499 | ||
500 | ## Work through the list of supplied parameters. | |
501 | for k, v in param: | |
502 | try: | |
503 | f = want[k] | |
504 | except KeyError: | |
505 | if v: | |
506 | raise U.ExpectedError, (400, "Unexpected parameter `%s'" % k) | |
507 | else: | |
508 | f(k, v) | |
509 | ||
510 | ## Deal with a path, if there is one. | |
511 | if path: | |
512 | if me.rparam and me.rparam.name not in kw: | |
513 | kw[me.rparam.name] = path | |
514 | else: | |
515 | raise U.ExpectedError, (404, "Superfluous path elements") | |
516 | ||
517 | ## Make sure we saw all of the mandatory parameters. | |
518 | for p in me.params: | |
519 | if p.name not in kw: | |
520 | raise U.ExpectedError, (400, "Missing parameter `%s'" % p.name) | |
521 | ||
522 | ## Invoke the subcommand. | |
523 | me.func(**kw) | |
524 | ||
525 | def subcommand(name, contexts, desc, cls = Subcommand, *args, **kw): | |
526 | """Decorator for defining CGI subcommands.""" | |
527 | return SC.subcommand(name, contexts, desc, cls = cls, *args, **kw) | |
528 | ||
529 | ###----- That's all, folks -------------------------------------------------- |