chiark / gitweb /
format.py: Fix some commentary typos.
[chopwood] / cgi.py
CommitLineData
a2916c06
MW
1### -*-python-*-
2###
3### CGI machinery
4###
5### (c) 2013 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
10### This file is part of Chopwood: a password-changing service.
11###
12### Chopwood is free software; you can redistribute it and/or modify
13### it under the terms of the GNU Affero General Public License as
14### published by the Free Software Foundation; either version 3 of the
15### License, or (at your option) any later version.
16###
17### Chopwood is distributed in the hope that it will be useful,
18### but WITHOUT ANY WARRANTY; without even the implied warranty of
19### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20### GNU Affero General Public License for more details.
21###
22### You should have received a copy of the GNU Affero General Public
23### License along with Chopwood; if not, see
24### <http://www.gnu.org/licenses/>.
25
26from __future__ import with_statement
27
28import contextlib as CTX
29import os as OS; ENV = OS.environ
30import re as RX
31import sys as SYS
32import time as T
33import traceback as TB
34
35from auto import HOME, PACKAGE, VERSION
36import config as CONF; CFG = CONF.CFG
37import format as F
38import output as O; OUT = O.OUT; PRINT = O.PRINT
39import subcommand as SC
40import util as U
41
42###--------------------------------------------------------------------------
43### Configuration tweaks.
44
45_script_name = ENV.get('SCRIPT_NAME', '/cgi-bin/chpwd')
46
47CONF.DEFAULTS.update(
48
49 ## The URL of this program, when it's run through CGI.
50 SCRIPT_NAME = _script_name,
51
52 ## A (maybe relative) URL for static content. By default this comes from
53 ## the main script, but we hope that user agents cache it.
2ec2b38f
MW
54 STATIC = None)
55
56@CONF.hook
57def set_static():
58 if CFG.STATIC is None: CFG.STATIC = CFG.SCRIPT_NAME + '/static'
a2916c06
MW
59
60###--------------------------------------------------------------------------
61### Escaping and encoding.
62
63## Some handy regular expressions.
64R_URLESC = RX.compile('%([0-9a-fA-F]{2})')
65R_URLBAD = RX.compile('[^-\\w,.!]')
b40d16b2 66R_HTMLBAD = RX.compile('[&<>\'"]')
a2916c06
MW
67
68def urldecode(s):
69 """Decode a single form-url-encoded string S."""
70 return R_URLESC.sub(lambda m: chr(int(m.group(1), 16)),
71 s.replace('+', ' '))
72 return s
73
74def urlencode(s):
75 """Encode a single string S using form-url-encoding."""
76 return R_URLBAD.sub(lambda m: '%%%02x' % ord(m.group(0)), s)
77
78def htmlescape(s):
79 """Escape a literal string S so that HTML doesn't misinterpret it."""
80 return R_HTMLBAD.sub(lambda m: '&#x%02x;' % ord(m.group(0)), s)
81
82## Some standard character sequences, and HTML entity names for prettier
83## versions.
b40d16b2
MW
84html_quotify = U.StringSubst({
85 "<": '&lt;',
86 ">": '&gt;',
87 "&": '&amp;',
a2916c06
MW
88 "`": '&lsquo;',
89 "'": '&rsquo;',
b40d16b2 90 '"': '&quot;',
a2916c06
MW
91 "``": '&ldquo;',
92 "''": '&rdquo;',
93 "--": '&ndash;',
94 "---": '&mdash;'
95})
a2916c06
MW
96
97###--------------------------------------------------------------------------
98### Output machinery.
99
100class HTTPOutput (O.FileOutput):
101 """
102 Output driver providing an automatic HTTP header.
103
104 The `headerp' attribute is true if we've written a header. The `header'
105 method will print a custom header if this is wanted.
106 """
107
108 def __init__(me, *args, **kw):
109 """Constructor: initialize `headerp' flag."""
110 super(HTTPOutput, me).__init__(*args, **kw)
111 me.headerp = False
5b7c6334 112 me.warnings = []
a2916c06
MW
113
114 def write(me, msg):
115 """Output protocol: print a header if we've not written one already."""
116 if not me.headerp: me.header('text/plain')
117 super(HTTPOutput, me).write(msg)
118
119 def header(me, content_type = 'text/plain', **kw):
120 """
121 Print a header, if none has yet been printed.
122
cf7c527a 123 Keyword arguments can be passed to emit HTTP headers: see `http_headers'
a2916c06
MW
124 for the formatting rules.
125 """
126 if me.headerp: return
127 me.headerp = True
128 for h in O.http_headers(content_type = content_type, **kw):
129 me.writeln(h)
130 me.writeln('')
039df864
MW
131 if METHOD == 'HEAD':
132 HEADER_DONE()
a2916c06 133
5b7c6334
MW
134 def warn(me, msg):
135 """
136 Report a warning message.
137
138 The warning is stashed in a list where it can be retrieved using
139 `warnings'.
140 """
141 me.warnings.append(msg)
142
a2916c06
MW
143def cookie(name, value, **kw):
144 """
145 Return a HTTP `Set-Cookie' header.
146
147 The NAME and VALUE give the name and value of the cookie; both are
148 form-url-encoded to prevent misinterpretation (fortunately, `cgiparse'
149 knows to undo this transformation). The KW are other attributes to
150 declare: the names are forced to lower-case and underscores `_' are
151 replaced by hyphens `-'; a `True' value is assumed to indicate that the
152 attribute is boolean, and omitted.
153 """
154 attr = {}
155 for k, v in kw.iteritems():
156 k = '-'.join(i.lower() for i in k.split('_'))
157 attr[k] = v
158 try: maxage = int(attr['max-age'])
159 except KeyError: pass
160 else:
161 attr['expires'] = T.strftime('%a, %d %b %Y %H:%M:%S GMT',
162 T.gmtime(U.NOW + maxage))
163 return '; '.join(['%s=%s' % (urlencode(name), urlencode(value))] +
164 [v is not True and '%s=%s' % (k, v) or k
623103db 165 for k, v in attr.iteritems() if v])
a2916c06
MW
166
167def action(*v, **kw):
168 """
169 Build a URL invoking this script.
170
171 The positional arguments V are used to construct a path which is appended
172 to the (deduced or configured) script name (and presumably will be read
173 back as `PATH_INFO'). The keyword arguments are (form-url-encoded and)
174 appended as a query string, if present.
175 """
176 url = '/'.join([CFG.SCRIPT_NAME] + list(v))
177 if kw:
178 url += '?' + ';'.join('%s=%s' % (urlencode(k), urlencode(kw[k]))
179 for k in sorted(kw))
180 return htmlescape(url)
181
182def static(name):
183 """Build a URL for the static file NAME."""
184 return htmlescape(CFG.STATIC + '/' + name)
185
a2916c06
MW
186def redirect(where, **kw):
187 """
188 Write a complete redirection to some other URL.
189 """
190 OUT.header(content_type = 'text/html',
191 status = 302, location = where,
192 **kw)
193 PRINT("""\
194<html>
195<head><title>No, sorry, it's moved again.</title></head>
196<body><p>I'm <a href="%s">over here</a> now.<body>
197</html>""" % htmlescape(where))
198
199###--------------------------------------------------------------------------
200### Templates.
201
202## Where we find our templates.
203TMPLDIR = HOME
204
205## Keyword arguments for templates.
206STATE = U.Fluid()
207STATE.kw = {}
208
209## Set some basic keyword arguments.
210@CONF.hook
211def set_template_keywords():
212 STATE.kw.update(
213 package = PACKAGE,
214 version = VERSION,
215 script = CFG.SCRIPT_NAME,
4e7866ab
MW
216 static = CFG.STATIC,
217 allowop = CFG.ALLOWOP)
a2916c06
MW
218
219class TemplateFinder (object):
220 """
221 A magical fake dictionary whose keys are templates.
222 """
223 def __init__(me, dir):
224 me._cache = {}
225 me._dir = dir
226 def __getitem__(me, key):
227 try: return me._cache[key]
228 except KeyError: pass
229 with open(OS.path.join(me._dir, key)) as f: tmpl = f.read()
230 me._cache[key] = tmpl
231 return tmpl
acd737d8 232STATE.kw['TMPL'] = TMPL = TemplateFinder(TMPLDIR)
a2916c06
MW
233
234@CTX.contextmanager
235def tmplkw(**kw):
236 """
237 Context manager: execute the body with additional keyword arguments
238 """
239 d = dict()
240 d.update(STATE.kw)
241 d.update(kw)
242 with STATE.bind(kw = d): yield
243
244FORMATOPS = {}
245
246class FormatHTML (F.SimpleFormatOperation):
247 """
248 ~H: escape output suitable for inclusion in HTML.
249
b53a8abe 250 With `:', additionally apply quotification.
a2916c06
MW
251 """
252 def _convert(me, arg):
253 if me.colonp: return html_quotify(arg)
254 else: return htmlescape(arg)
255FORMATOPS['H'] = FormatHTML
256
dc190ae1
MW
257class FormatWrap (F.BaseFormatOperation):
258 """
259 ~<...~@>: wrap enclosed material in another formatting control string.
260
261 The argument is a formatting control. The enclosed material is split into
262 pieces separated by `~;' markers. The formatting control is performed, and
263 passed the list of pieces (as compiled formatting operations) in the
264 keyword argument `wrapped'.
265 """
266 def __init__(me, *args):
267 super(FormatWrap, me).__init__(*args)
268 pieces = []
269 while True:
270 piece, delim = F.collect_subformat('>;')
271 pieces.append(piece)
272 if delim.char == '>': break
273 me.pieces = pieces
274 def _format(me, atp, colonp):
275 op = F.compile(me.getarg.get())
276 with F.FORMAT.bind(argmap = dict(F.FORMAT.argmap, wrapped = me.pieces)):
277 op.format()
278FORMATOPS['<'] = FormatWrap
279
a2916c06
MW
280def format_tmpl(control, **kw):
281 with F.COMPILE.bind(opmaps = [FORMATOPS, F.BASEOPS]):
282 with tmplkw(**kw):
283 F.format(OUT, control, **STATE.kw)
284
285def page(template, header = {}, title = 'Chopwood', **kw):
286 header = dict(header, content_type = 'text/html')
287 OUT.header(**header)
288 format_tmpl(TMPL['wrapper.fhtml'],
5b7c6334
MW
289 title = title, warnings = OUT.warnings,
290 payload = TMPL[template], **kw)
a2916c06
MW
291
292###--------------------------------------------------------------------------
293### Error reporting.
294
a2916c06
MW
295@CTX.contextmanager
296def cgi_errors(hook = None):
297 """
298 Context manager: report errors in the body as useful HTML.
299
300 If HOOK is given, then call it before reporting errors. It may have set up
301 useful stuff.
302 """
303 try:
304 yield None
305 except Exception, e:
306 if hook: hook()
307 if isinstance(e, U.ExpectedError) and not OUT.headerp:
308 page('error.fhtml',
b569edae 309 header = dict(status = e.code),
a2916c06
MW
310 title = 'Chopwood: error', error = e)
311 else:
312 exty, exval, extb = SYS.exc_info()
313 with tmplkw(exception = TB.format_exception_only(exty, exval),
314 traceback = TB.extract_tb(extb),
315 PARAM = sorted(PARAM),
316 COOKIE = sorted(COOKIE.items()),
317 PATH = PATH,
318 ENV = sorted(ENV.items())):
319 if OUT.headerp:
320 format_tmpl(TMPL['exception.fhtml'], toplevel = False)
321 else:
322 page('exception.fhtml',
b569edae 323 header = dict(status = 500),
a2916c06
MW
324 title = 'Chopwood: internal error',
325 toplevel = True)
326
327###--------------------------------------------------------------------------
328### CGI input.
329
330## Lots of global variables to be filled in by `cgiparse'.
f2e194ee 331METHOD = None
a2916c06
MW
332COOKIE = {}
333SPECIAL = {}
334PARAM = []
335PARAMDICT = {}
336PATH = []
bb623e8f 337SSLP = False
039df864 338HEADER_DONE = lambda: None
a2916c06
MW
339
340## Regular expressions for splitting apart query and cookie strings.
341R_QSPLIT = RX.compile('[;&]')
342R_CSPLIT = RX.compile(';')
343
344def split_keyvalue(string, delim, default):
345 """
346 Split a STRING, and generate the resulting KEY=VALUE pairs.
347
348 The string is split at DELIM; the components are parsed into KEY[=VALUE]
349 pairs. The KEYs and VALUEs are stripped of leading and trailing
350 whitespace, and form-url-decoded. If the VALUE is omitted, then the
351 DEFAULT is used unless the DEFAULT is `None' in which case the component is
352 simply ignored.
353 """
354 for kv in delim.split(string):
355 try:
356 k, v = kv.split('=', 1)
357 except ValueError:
358 if default is None: continue
359 else: k, v = kv, default
360 k, v = k.strip(), v.strip()
361 if not k: continue
362 k, v = urldecode(k), urldecode(v)
363 yield k, v
364
365def cgiparse():
366 """
367 Process all of the various exciting CGI environment variables.
368
369 We read environment variables and populate some tables left in global
370 variables: it's all rather old-school. Variables set are as follows.
371
372 `COOKIE'
373 A dictionary mapping cookie names to the values provided by the user
374 agent.
375
376 `SPECIAL'
377 A dictionary holding some special query parameters which are of
378 interest at a global level, and should not be passed to a subcommand
379 handler. No new entries will be added to this dictionary, though
380 values will be modified to reflect the query parameters discovered.
381 Conventionally, such parameters have names beginning with `%'.
382
383 `PARAM'
384 The query parameters as a list of (KEY, VALUE) pairs. Special
385 parameters are omitted.
386
387 `PARAMDICT'
388 The query parameters as a dictionary. Special parameters, and
389 parameters which appear more than once, are omitted.
390
391 `PATH'
392 The trailing `PATH_INFO' path, split at `/' markers, with any
393 trailing empty component removed.
bb623e8f
MW
394
395 `SSLP'
396 True if the client connection is carried over SSL or TLS.
a2916c06
MW
397 """
398
f2e194ee 399 global METHOD, SSLP
bb623e8f 400
a2916c06
MW
401 def getenv(var):
402 try: return ENV[var]
403 except KeyError: raise U.ExpectedError, (500, "No `%s' supplied" % var)
404
405 ## Yes, we want the request method.
f2e194ee 406 METHOD = getenv('REQUEST_METHOD')
a2916c06
MW
407
408 ## Acquire the query string.
039df864 409 if METHOD in ['GET', 'HEAD']:
dd650029 410 q = ENV.get('QUERY_STRING', '')
a2916c06 411
f2e194ee 412 elif METHOD == 'POST':
a2916c06
MW
413
414 ## We must read the query string from stdin.
415 n = getenv('CONTENT_LENGTH')
416 if not n.isdigit():
417 raise U.ExpectedError, (500, "Invalid CONTENT_LENGTH")
418 n = int(n, 10)
76ee7d4f
MW
419 ct = getenv('CONTENT_TYPE')
420 if ct != 'application/x-www-form-urlencoded':
a2916c06
MW
421 raise U.ExpectedError, (500, "Unexpected content type `%s'" % ct)
422 q = SYS.stdin.read(n)
423 if len(q) != n:
424 raise U.ExpectedError, (500, "Failed to read correct length")
425
426 else:
f2e194ee 427 raise U.ExpectedError, (500, "Unexpected request method `%s'" % METHOD)
a2916c06
MW
428
429 ## Populate the `SPECIAL', `PARAM' and `PARAMDICT' tables.
430 seen = set()
431 for k, v in split_keyvalue(q, R_QSPLIT, 't'):
432 if k in SPECIAL:
433 SPECIAL[k] = v
434 else:
435 PARAM.append((k, v))
436 if k in seen:
9d6ec9ac
MW
437 try: del PARAMDICT[k]
438 except KeyError: pass
a2916c06
MW
439 else:
440 PARAMDICT[k] = v
441 seen.add(k)
442
443 ## Parse out the cookies, if any.
444 try: c = ENV['HTTP_COOKIE']
445 except KeyError: pass
446 else:
447 for k, v in split_keyvalue(c, R_CSPLIT, None): COOKIE[k] = v
448
449 ## Set up the `PATH'.
450 try: p = ENV['PATH_INFO']
451 except KeyError: pass
452 else:
453 pp = p.lstrip('/').split('/')
454 if pp and not pp[-1]: pp.pop()
455 PATH[:] = pp
456
bb623e8f
MW
457 ## Check the crypto for the connection.
458 if ENV.get('SSL_PROTOCOL'):
459 SSLP = True
460
a2916c06
MW
461###--------------------------------------------------------------------------
462### CGI subcommands.
463
464class Subcommand (SC.Subcommand):
465 """
466 A CGI subcommand object.
467
468 As for `subcommand.Subcommand', but with additional protocol for processing
469 CGI parameters.
470 """
471
9e574017
MW
472 def __init__(me, name, contexts, desc, func,
473 methods = ['GET', 'POST'], *args, **kw):
474 super(Subcommand, me).__init__(name, contexts, desc, func, *args, **kw)
475 me.methods = set(methods)
476
a2916c06
MW
477 def cgi(me, param, path):
478 """
479 Invoke the subcommand given a collection of CGI parameters.
480
481 PARAM is a list of (KEY, VALUE) pairs from the CGI query. The CGI query
482 parameters are checked against the subcommand's parameters (making sure
483 that mandatory parameters are supplied, that any switches are given
484 boolean values, and that only the `rest' parameter, if any, is
485 duplicated).
486
487 PATH is a list of trailing path components. They are used to satisfy the
488 `rest' parameter if there is one and there are no query parameters which
489 satisfy the `rest' parameter; otherwise, an `ExpectedError' is raised if
490 the list of path elements is non-empty.
491 """
492
039df864
MW
493 global HEADER_DONE
494
a2916c06
MW
495 ## We're going to make a pass over the supplied parameters, and we'll
496 ## check them off against the formal parameters as we go; so we'll need
497 ## to be able to look them up. We'll also keep track of the ones we've
498 ## seen so that we can make sure that all of the mandatory parameters
499 ## were actually supplied.
500 ##
501 ## To that end: `want' is a dictionary mapping parameter names to
502 ## functions which will do something useful with the value; `seen' is a
503 ## set of the parameters which have been assigned; and `kw' is going to
504 ## be the keyword-argument dictionary we pass to the handler function.
505 want = {}
506 kw = {}
507
9e574017
MW
508 ## Check the request method against the permitted list.
509 meth = METHOD
510 if meth == 'HEAD': meth = 'GET'
511 if meth not in me.methods:
512 raise U.ExpectedError, (500, "Unexpected request method `%s'" % METHOD)
513
a2916c06
MW
514 def set_value(k, v):
515 """Set a simple value: we shouldn't see multiple values."""
516 if k in kw:
517 raise U.ExpectedError, (400, "Repeated parameter `%s'" % k)
518 kw[k] = v
519 def set_bool(k, v):
520 """Set a simple boolean value: for switches."""
521 set_value(k, v.lower() in ['true', 't', 'yes', 'y'])
522 def set_list(k, v):
523 """Append the value to a list: for the `rest' parameter."""
524 kw.setdefault(k, []).append(v)
525
526 ## Set up the `want' map.
527 for o in me.opts:
528 if o.argname: want[o.name] = set_value
529 else: want[o.name] = set_bool
530 for p in me.params: want[p.name] = set_value
531 for p in me.oparams: want[p.name] = set_value
532 if me.rparam: want[me.rparam.name] = set_list
533
534 ## Work through the list of supplied parameters.
535 for k, v in param:
536 try:
537 f = want[k]
538 except KeyError:
539 if v:
540 raise U.ExpectedError, (400, "Unexpected parameter `%s'" % k)
541 else:
542 f(k, v)
543
544 ## Deal with a path, if there is one.
545 if path:
546 if me.rparam and me.rparam.name not in kw:
547 kw[me.rparam.name] = path
548 else:
549 raise U.ExpectedError, (404, "Superfluous path elements")
550
551 ## Make sure we saw all of the mandatory parameters.
552 for p in me.params:
553 if p.name not in kw:
554 raise U.ExpectedError, (400, "Missing parameter `%s'" % p.name)
555
556 ## Invoke the subcommand.
557 me.func(**kw)
558
559def subcommand(name, contexts, desc, cls = Subcommand, *args, **kw):
560 """Decorator for defining CGI subcommands."""
561 return SC.subcommand(name, contexts, desc, cls = cls, *args, **kw)
562
563###----- That's all, folks --------------------------------------------------