Commit | Line | Data |
---|---|---|
7db733d4 MW |
1 | #! @PYTHON@ |
2 | ### | |
3 | ### Generate files by filling in simple templates | |
4 | ### | |
5 | ### (c) 2013 Straylight/Edgeware | |
6 | ### | |
7 | ||
8 | ###----- Licensing notice --------------------------------------------------- | |
9 | ### | |
10 | ### This file is part of Catacomb. | |
11 | ### | |
12 | ### Catacomb is free software; you can redistribute it and/or modify | |
13 | ### it under the terms of the GNU Library General Public License as | |
14 | ### published by the Free Software Foundation; either version 2 of the | |
15 | ### License, or (at your option) any later version. | |
16 | ### | |
17 | ### Catacomb is distributed in the hope that it will be useful, | |
18 | ### but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ### GNU Library General Public License for more details. | |
21 | ### | |
22 | ### You should have received a copy of the GNU Library General Public | |
23 | ### License along with Catacomb; if not, write to the Free | |
24 | ### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | |
25 | ### MA 02111-1307, USA. | |
26 | ||
27 | from __future__ import with_statement | |
28 | ||
29 | import itertools as IT | |
30 | import optparse as OP | |
31 | import os as OS | |
32 | import re as RX | |
b8dc834f MW |
33 | import sys as SYS |
34 | if SYS.version_info >= (3,): from io import StringIO | |
35 | else: from cStringIO import StringIO | |
7db733d4 MW |
36 | from sys import argv, exit, stderr |
37 | ||
38 | ###-------------------------------------------------------------------------- | |
39 | ### Utilities. | |
40 | ||
e7abc7ea | 41 | QUIS = OS.path.basename(argv[0]) # Program name, for use in errors. |
7db733d4 MW |
42 | |
43 | def die(msg): | |
e7abc7ea | 44 | """Report MSG as a fatal error, and exit.""" |
7db733d4 MW |
45 | stderr.write('%s: %s\n' % (QUIS, msg)) |
46 | exit(1) | |
47 | ||
48 | def indexed(seq): | |
e7abc7ea MW |
49 | """ |
50 | Generate pairs (I, X), where I counts from zero and X are the items of SEQ. | |
51 | """ | |
7db733d4 MW |
52 | return IT.izip(IT.count(), seq) |
53 | ||
b8dc834f MW |
54 | if SYS.version_info >= (3,): |
55 | def func_name(func): return func.__name__ | |
56 | IT.izip = zip | |
57 | else: | |
58 | def func_name(func): return func.func_name | |
59 | ||
60 | try: next | |
61 | except NameError: | |
62 | def next(obj): return obj.next() | |
63 | ||
7db733d4 MW |
64 | ###-------------------------------------------------------------------------- |
65 | ### Reading the input values. | |
66 | ||
e7abc7ea | 67 | ## Map column names to (Relation, # index) pairs. |
7db733d4 MW |
68 | COLMAP = {} |
69 | ||
70 | class Cursor (object): | |
e7abc7ea MW |
71 | """ |
72 | A Cursor object keeps track of an iteration through a Relation. | |
73 | ||
74 | At any time, the Cursor has a `current' row; the individual cells of this | |
75 | row may be retrieved using Python's standard indexing operator. The `step' | |
76 | method advances to the next row (if there is one). The `reset' method | |
77 | returns to row zero. | |
78 | """ | |
79 | ||
7db733d4 | 80 | def __init__(me, rel): |
e7abc7ea MW |
81 | """ |
82 | Initialize a new Cursor object, tracking its way through a Relation REL. | |
83 | ||
84 | The new Cursor has row zero as its current row. The REL must not be | |
85 | empty. | |
86 | """ | |
7db733d4 | 87 | me._rel = rel |
3ece2113 | 88 | me.reset() |
e7abc7ea | 89 | |
7db733d4 | 90 | def step(me): |
e7abc7ea MW |
91 | """ |
92 | Advance the Cursor to the next row. | |
93 | ||
94 | Returns False if there is no next row; otherwise True. | |
95 | """ | |
7db733d4 MW |
96 | me._i += 1 |
97 | if me._i >= len(me._rel): | |
98 | me._i = me._row = None | |
99 | return False | |
100 | me._row = me._rel[me._i] | |
101 | return True | |
e7abc7ea | 102 | |
7db733d4 | 103 | def reset(me): |
e7abc7ea MW |
104 | """ |
105 | Reset the Cursor, so that row zero is current again. | |
106 | """ | |
7db733d4 MW |
107 | me._i = 0 |
108 | me._row = me._rel[0] | |
e7abc7ea | 109 | |
7db733d4 | 110 | def __getitem__(me, i): |
e7abc7ea MW |
111 | """ |
112 | Return the item in column I of the Cursor's current row. | |
113 | ||
114 | The index must be acceptable to the underlying row object, but otherwise | |
115 | the Cursor imposes no restrictions. Indices need not be numeric, for | |
116 | example. | |
117 | """ | |
7db733d4 | 118 | return me._row[i] |
e7abc7ea | 119 | |
7db733d4 | 120 | def __repr__(me): |
e7abc7ea MW |
121 | """ |
122 | Return a text description of the Cursor, for diagnostic use. | |
123 | """ | |
7db733d4 MW |
124 | return '#<Cursor %r[%d] = %r>' % (me._rel, me._i, me._row) |
125 | ||
126 | class CursorSet (object): | |
e7abc7ea MW |
127 | """ |
128 | A CursorSet iterates over the cartiesian product of a number of Relations. | |
129 | ||
130 | More precisely: it maintains a stack, each level of which tracks a number | |
131 | of Relations. More Relations can be pushed onto this stack with the `push' | |
132 | method, and removed with `pop'. The `step' method advances through the | |
133 | cartesian product of the Relations in the top level of the stack -- the | |
134 | `active' Relations. Columns from the current rows of all of the currently | |
135 | known Relations -- whether active or not -- can be extracted using `get'. | |
136 | """ | |
137 | ||
7db733d4 | 138 | def __init__(me): |
e7abc7ea MW |
139 | """ |
140 | Initialize a new CursorSet object. | |
141 | ||
142 | A new CursorSet has an empty stack. | |
143 | """ | |
7db733d4 MW |
144 | me._map = {} |
145 | me._stack = [] | |
146 | me._act = None | |
e7abc7ea | 147 | |
7db733d4 | 148 | def push(me, rels): |
e7abc7ea MW |
149 | """ |
150 | Push the new Relations RELS onto the stack and start iterating. | |
151 | ||
152 | The currently active Relations are pushed down. Those Relations which are | |
153 | not already known to the CursorSet become the newly active collection. | |
154 | (Relations which are already known are simply ignored.) | |
155 | ||
156 | Iteration traverses Relations on the right more rapidly. | |
157 | """ | |
7db733d4 MW |
158 | cc = [] |
159 | rr = [] | |
160 | for r in rels: | |
161 | if r in me._map: continue | |
162 | c = me._map[r] = Cursor(r) | |
163 | rr.append(r) | |
164 | cc.append(c) | |
165 | me._stack.append((me._act, rr)) | |
166 | me._act = cc | |
e7abc7ea | 167 | |
7db733d4 | 168 | def step(me): |
e7abc7ea MW |
169 | """ |
170 | Advance the CursorSet through the currently active Relations. | |
171 | ||
172 | Return False if the active Relations have now been exhausted; otherwise | |
173 | return True. | |
174 | """ | |
7db733d4 MW |
175 | i = 0 |
176 | while i < len(me._act): | |
177 | if me._act[i].step(): return True | |
178 | if i >= len(me._act): return False | |
179 | me._act[i].reset() | |
180 | i += 1 | |
181 | return False | |
e7abc7ea | 182 | |
7db733d4 | 183 | def pop(me): |
e7abc7ea MW |
184 | """ |
185 | Pop the active Relations. | |
186 | ||
187 | Return to iterating over the previously active collection. | |
188 | """ | |
7db733d4 MW |
189 | me._act, rels = me._stack.pop() |
190 | for r in rels: del me._map[r] | |
e7abc7ea | 191 | |
7db733d4 | 192 | def get(me, rel, i): |
e7abc7ea MW |
193 | """ |
194 | Return the item with index I in the current row of Relation REL. | |
195 | """ | |
7db733d4 MW |
196 | return me._map[rel][i] |
197 | ||
198 | class Relation (object): | |
e7abc7ea MW |
199 | """ |
200 | A Relation keeps track of a table of data. | |
201 | ||
202 | A Relation consists of a `header', which is a sequence of string names, | |
203 | and a rectangular array of data, each row of which has the same number of | |
204 | items as the header. | |
205 | ||
206 | Relations can be iterated over using Cursors and CursorSets. | |
207 | """ | |
208 | ||
7db733d4 | 209 | def __init__(me, head): |
e7abc7ea MW |
210 | """ |
211 | Initialize a new, empty Relation with header HEAD. | |
212 | ||
213 | The `COLMAP' dictionary is updated to map the names in the header to this | |
214 | Relation and its column indices. | |
215 | """ | |
7db733d4 MW |
216 | me._head = head |
217 | me._rows = [] | |
218 | for i, c in indexed(head): COLMAP[c] = me, i | |
e7abc7ea | 219 | |
7db733d4 | 220 | def addrow(me, row): |
e7abc7ea MW |
221 | """ |
222 | Add a ROW to the Relation. | |
223 | ||
224 | The new row must have the correct number of entries. | |
225 | """ | |
7db733d4 MW |
226 | if len(row) != len(me._head): |
227 | die("mismatch: row `%s' doesn't match heading `%s'" % | |
3ece2113 | 228 | (', '.join(row), ', '.join(me._head))) |
7db733d4 | 229 | me._rows.append(row) |
e7abc7ea | 230 | |
7db733d4 | 231 | def __len__(me): |
e7abc7ea | 232 | """Return the number of rows in the Relation.""" |
7db733d4 | 233 | return len(me._rows) |
e7abc7ea | 234 | |
7db733d4 | 235 | def __getitem__(me, i): |
e7abc7ea | 236 | """Return the Ith row of the Relation.""" |
7db733d4 | 237 | return me._rows[i] |
e7abc7ea | 238 | |
7db733d4 | 239 | def __repr__(me): |
e7abc7ea | 240 | """Return a textual description of the Relation, for diagnostic use.""" |
7db733d4 MW |
241 | return '#<Relation %r>' % me._head |
242 | ||
243 | def read_immediate(word): | |
e7abc7ea MW |
244 | """ |
245 | Return a Relation constructed by parsing WORD. | |
246 | ||
247 | The WORD has the form `HEAD=ROW ROW ...', where the HEAD and ROWs are | |
248 | comma-separated lists of strings which will form the relation's header and | |
249 | rows respectively. There is no way to include an item which contains a | |
250 | comma or whitespace. | |
251 | """ | |
7db733d4 MW |
252 | head, rels = word.split('=', 1) |
253 | rel = Relation([c.strip() for c in head.split(',')]) | |
254 | for row in rels.split(): rel.addrow([c.strip() for c in row.split(',')]) | |
255 | ||
256 | def read_file(spec): | |
e7abc7ea MW |
257 | """ |
258 | Return a Relation constructed from a file, according to SPEC. | |
259 | ||
260 | The SPEC has the form `FILE:HEAD', where FILE names a file, and HEAD is a | |
261 | comma-separated list of strings to form the relation's header. Each line | |
262 | from the file which is neither empty nor begins with `#' is split into | |
263 | whitespace-separated words to form a row in the relation. There is no way | |
264 | to include an item which contains whitespace. | |
265 | """ | |
7db733d4 MW |
266 | file, head = spec.split(':', 1) |
267 | rel = Relation([c.strip() for c in head.split(',')]) | |
7db733d4 MW |
268 | with open(file) as f: |
269 | for line in f: | |
270 | line = line.strip() | |
271 | if line.startswith('#') or line == '': continue | |
272 | rel.addrow(line.split()) | |
273 | ||
274 | def read_thing(spec): | |
e7abc7ea MW |
275 | """ |
276 | Return a relation constructed from SPEC. | |
277 | ||
278 | If SPEC begins with `@' then read the relation from a file (see | |
279 | `read_file'); otherwise interpret it as immediate data (see | |
280 | `read_immediate'). | |
281 | """ | |
7db733d4 MW |
282 | if spec.startswith('@'): read_file(spec[1:]) |
283 | else: read_immediate(spec) | |
284 | ||
285 | ###-------------------------------------------------------------------------- | |
286 | ### Template structure. | |
287 | ||
288 | class BasicTemplate (object): | |
e7abc7ea MW |
289 | """ |
290 | Base class for template objects. | |
291 | ||
292 | The protocol for templates consists of two methods: | |
293 | ||
294 | relations() Return a set of Relations mentioned at top-level in | |
295 | substitutions in the template. | |
296 | ||
297 | subst(OUT, CS) Fill in the template, writing the output to the | |
298 | stream OUT. The CS is a CursorSet object tracking | |
299 | the current iteration state. | |
300 | """ | |
7db733d4 MW |
301 | pass |
302 | ||
303 | class LiteralTemplate (BasicTemplate): | |
e7abc7ea MW |
304 | """ |
305 | A LiteralTemplate outputs a fixed string. | |
306 | """ | |
307 | ||
7db733d4 | 308 | def __init__(me, text, **kw): |
e7abc7ea MW |
309 | """ |
310 | Initialize a new LiteralTemplate object. TEXT is the text to be written. | |
311 | """ | |
7db733d4 MW |
312 | super(LiteralTemplate, me).__init__(**kw) |
313 | me._text = text | |
e7abc7ea | 314 | |
7db733d4 | 315 | def relations(me): |
e7abc7ea | 316 | """A LiteralTemplate contains no substitutions.""" |
7db733d4 | 317 | return set() |
e7abc7ea | 318 | |
7db733d4 | 319 | def subst(me, out, cs): |
e7abc7ea | 320 | """A LiteralTemplate just emits its text.""" |
7db733d4 | 321 | out.write(me._text) |
e7abc7ea | 322 | |
7db733d4 MW |
323 | def __repr__(me): |
324 | return '#<LiteralTemplate %r>' % me._text | |
325 | ||
326 | class TagTemplate (BasicTemplate): | |
e7abc7ea MW |
327 | """ |
328 | A TagTemplate object expands a substitution tag. | |
329 | ||
330 | It extracts an item from the current row of a relation, processes it | |
331 | according to an operation, and outputs the result. | |
332 | """ | |
333 | ||
7db733d4 | 334 | def __init__(me, rel, i, op, **kw): |
e7abc7ea MW |
335 | """ |
336 | Initialize a new TagTemplate object. | |
337 | ||
338 | REL is the relation from which to pick the output; I is the column index; | |
339 | OP is a transformation to apply to the data, and may be None to indicate | |
340 | that the data should not be transformed. | |
341 | """ | |
7db733d4 MW |
342 | super(TagTemplate, me).__init__(**kw) |
343 | me._rel = rel | |
344 | me._i = i | |
345 | me._op = op | |
e7abc7ea | 346 | |
7db733d4 | 347 | def relations(me): |
e7abc7ea | 348 | """The TagTemplate knows which relation it uses.""" |
7db733d4 | 349 | return set([me._rel]) |
e7abc7ea | 350 | |
7db733d4 | 351 | def subst(me, out, cs): |
e7abc7ea MW |
352 | """ |
353 | A TagTemplate extracts and transforms an item from the current row of | |
354 | a relation. | |
355 | """ | |
7db733d4 MW |
356 | val = cs.get(me._rel, me._i) |
357 | if me._op is not None: val = me._op(val) | |
358 | out.write(val) | |
e7abc7ea | 359 | |
7db733d4 MW |
360 | def __repr__(me): |
361 | return '#<TagTemplate %s>' % me._rel._head[me._i] | |
362 | ||
363 | class SequenceTemplate (BasicTemplate): | |
e7abc7ea MW |
364 | """ |
365 | A SequenceTemplate concatenates a number of other templates. | |
366 | """ | |
367 | ||
7db733d4 | 368 | def __new__(cls, seq, **kw): |
e7abc7ea MW |
369 | """ |
370 | Construct a template from a sequence SEQ of other templates. | |
371 | ||
372 | If SEQ is a singleton (which it often is) then return it directly; | |
373 | otherwise construct a SequenceTemplate. | |
374 | """ | |
7db733d4 MW |
375 | if len(seq) == 1: |
376 | return seq[0] | |
377 | else: | |
ff0d3013 | 378 | return super(SequenceTemplate, cls).__new__(cls, **kw) |
3ece2113 | 379 | |
7db733d4 | 380 | def __init__(me, seq, **kw): |
e7abc7ea MW |
381 | """ |
382 | Initialize a new SequenceTemplate object from SEQ. | |
383 | ||
384 | The sequence is flattened out: if SEQ contains SequenceTemplates then we | |
385 | use their children directly, so that we don't have a useless tree. | |
386 | """ | |
7db733d4 | 387 | super(SequenceTemplate, me).__init__(**kw) |
3ece2113 MW |
388 | tt = [] |
389 | cls = type(me) | |
390 | for t in seq: | |
391 | if isinstance(t, cls): tt += t._seq | |
392 | else: tt.append(t) | |
393 | me._seq = tt | |
394 | ||
7db733d4 | 395 | def relations(me): |
e7abc7ea MW |
396 | """ |
397 | The relations of a SequenceTemplate are the union of the relations of its | |
398 | children. | |
399 | """ | |
7db733d4 MW |
400 | rr = set() |
401 | for t in me._seq: rr.update(t.relations()) | |
402 | return rr | |
e7abc7ea | 403 | |
7db733d4 | 404 | def subst(me, out, cs): |
e7abc7ea MW |
405 | """ |
406 | The output of a SequenceTemplate is the concatenation of the expansions | |
407 | of its children. | |
408 | """ | |
7db733d4 | 409 | for t in me._seq: t.subst(out, cs) |
e7abc7ea | 410 | |
7db733d4 MW |
411 | def __repr__(me): |
412 | return '#<SequenceTemplate %r>' % me._seq | |
413 | ||
414 | class RepeatTemplate (BasicTemplate): | |
e7abc7ea MW |
415 | """ |
416 | A RepeatTemplate iterates its body over a number of relations. | |
417 | """ | |
418 | ||
7db733d4 | 419 | def __init__(me, sub): |
e7abc7ea MW |
420 | """ |
421 | Initialize a new RepeatTemplate, given a template to act as its body. | |
422 | """ | |
7db733d4 | 423 | me._sub = sub |
e7abc7ea | 424 | |
7db733d4 | 425 | def relations(me): |
e7abc7ea MW |
426 | """ |
427 | A RepeatTemplate hides the relations of its body. | |
428 | """ | |
7db733d4 | 429 | return set() |
e7abc7ea | 430 | |
7db733d4 | 431 | def subst(me, out, cs): |
e7abc7ea MW |
432 | """ |
433 | Substitute a RepeatTemplate, by iterating over the relations mentioned in | |
434 | its body template. | |
435 | """ | |
7db733d4 MW |
436 | rr = me._sub.relations() |
437 | for r in rr: | |
438 | if len(r) == 0: return | |
439 | cs.push(rr) | |
440 | while True: | |
441 | me._sub.subst(out, cs) | |
442 | if not cs.step(): break | |
443 | cs.pop() | |
e7abc7ea | 444 | |
7db733d4 MW |
445 | def __repr__(me): |
446 | return '#<RepeatTemplate %r>' % me._sub | |
447 | ||
448 | ###-------------------------------------------------------------------------- | |
449 | ### Some slightly cheesy parsing machinery. | |
450 | ||
451 | class ParseState (object): | |
e7abc7ea MW |
452 | """ |
453 | A ParseState object keeps track of a parser's position in a file. | |
454 | ||
455 | The `curr' slot contains the current line under consideration. | |
456 | """ | |
457 | ||
7db733d4 | 458 | def __init__(me, file, text): |
e7abc7ea MW |
459 | """ |
460 | Initialize a ParseState object. | |
461 | ||
462 | The FILE is a string naming the source file, and the TEXT is an iterator | |
463 | over the file's lines. | |
464 | """ | |
7db733d4 MW |
465 | me._file = file |
466 | me._i = 0 | |
467 | me._it = iter(text.splitlines(True)) | |
468 | me.step() | |
e7abc7ea | 469 | |
7db733d4 | 470 | def step(me): |
e7abc7ea MW |
471 | """ |
472 | Advance the ParseState to the next line. | |
473 | ||
474 | Sets `curr' to the next line, or to None if the input is exhausted. | |
475 | """ | |
b8dc834f | 476 | try: me.curr = next(me._it) |
28ffcb2a MW |
477 | except StopIteration: me.curr = None |
478 | else: me._i += 1 | |
e7abc7ea | 479 | |
7db733d4 | 480 | def error(me, msg): |
e7abc7ea MW |
481 | """ |
482 | Report a fatal error during parsing, attributing it to the current line. | |
483 | """ | |
7db733d4 MW |
484 | die('%s:%d: %s' % (me._file, me._i, msg)) |
485 | ||
486 | class token (object): | |
e7abc7ea MW |
487 | """ |
488 | A token object has no interesting properties other than its identity. | |
489 | """ | |
490 | ||
7db733d4 | 491 | def __init__(me, name): |
e7abc7ea | 492 | """Initialize a new token, with the given NAME.""" |
7db733d4 MW |
493 | me._name = name |
494 | def __repr__(me): | |
e7abc7ea | 495 | """Return a description of the token, for diagnostic purposes.""" |
7db733d4 MW |
496 | return '#<%s>' % me._name |
497 | ||
e7abc7ea | 498 | ## Some magical tokens useful during parsing. |
7db733d4 MW |
499 | EOF = token('eof') |
500 | END = token('end') | |
501 | ||
e7abc7ea | 502 | ## Regular expressions matching substitution tags. |
7db733d4 MW |
503 | R_SIMPLETAG = RX.compile(r'@ (\w+)', RX.VERBOSE) |
504 | R_COMPLEXTAG = RX.compile(r'@ { (\w+) ((?: : \w+)*) }', RX.VERBOSE) | |
505 | ||
e7abc7ea | 506 | ## A dictionary mapping operation names to functions which implement them. |
7db733d4 MW |
507 | OPMAP = {} |
508 | ||
509 | def defop(func): | |
e7abc7ea MW |
510 | """ |
511 | Decorator for substitution operator functions. | |
512 | ||
513 | Remember the operator in `OPMAP'; the operator's name is taken from FUNC's | |
514 | name, removing a prefix `op_' if there is one. | |
515 | ||
516 | An operator function is given the raw value as an argument and should | |
517 | return the transformed value. | |
518 | """ | |
b8dc834f | 519 | name = func_name(func) |
7db733d4 MW |
520 | if name.startswith('op_'): name = name[3:] |
521 | OPMAP[name] = func | |
522 | return func | |
523 | ||
524 | @defop | |
e7abc7ea MW |
525 | def op_u(val): |
526 | """@{COLUMN:u} -- the item in upper case.""" | |
527 | return val.upper() | |
7db733d4 MW |
528 | |
529 | @defop | |
e7abc7ea MW |
530 | def op_l(val): |
531 | """@{COLUMN:l} -- the item in upper case.""" | |
532 | return val.lower() | |
7db733d4 | 533 | |
aaae9cab MW |
534 | @defop |
535 | def op_f(val): | |
536 | """@{COLUMN:f} -- the item, with `/' characters replaced by `-'.""" | |
537 | return val.replace('/', '-') | |
538 | ||
7db733d4 MW |
539 | R_NOTIDENT = RX.compile(r'[^a-zA-Z0-9_]+') |
540 | @defop | |
e7abc7ea MW |
541 | def op_c(val): |
542 | """ | |
543 | @{COLUMN:c} -- the item, with non-alphanumeric sequences replaced with `_'. | |
544 | """ | |
545 | return R_NOTIDENT.sub('_', val) | |
7db733d4 MW |
546 | |
547 | def _pairify(val): | |
e7abc7ea MW |
548 | """ |
549 | Split VAL into two, at an `=' sign. | |
550 | ||
551 | If VAL has the form `THIS=THAT' then return the pair (THIS, THAT); | |
552 | otherwise return (VAL, VAL). | |
553 | """ | |
7db733d4 MW |
554 | c = val.find('=') |
555 | if c >= 0: return val[:c], val[c + 1:] | |
556 | else: return val, val | |
557 | ||
558 | @defop | |
e7abc7ea MW |
559 | def op_left(val): |
560 | """@{COLUMN:left} -- the left-hand side of the item.""" | |
561 | return _pairify(val)[0] | |
7db733d4 | 562 | @defop |
e7abc7ea MW |
563 | def op_right(val): |
564 | """@{COLUMN:right} -- the left-hand side of the item.""" | |
565 | return _pairify(val)[1] | |
7db733d4 MW |
566 | |
567 | def parse_text(ps): | |
e7abc7ea MW |
568 | """ |
569 | Parse a chunk of text from a ParseState. | |
570 | ||
571 | Stop when we get to something which looks like a template keyword, but | |
572 | extract tags. Return the resulting template. | |
573 | ||
574 | Tags have the form `@COLUMN', or `@{COLUMN:OPERATOR:...}'. The text may | |
575 | contain comments beginning `%#', which are ignored, and lines beginning | |
576 | `%%' which have the initial `%' removed and are otherwise treated as normal | |
577 | text (and, in particular, may contain tags). Other lines beginning with | |
578 | `%' are directives and must be processed by our caller. | |
579 | """ | |
580 | ||
581 | ## Starting out: no templates collected, and an empty buffer of literal | |
582 | ## text. | |
7db733d4 MW |
583 | tt = [] |
584 | lit = StringIO() | |
e7abc7ea | 585 | |
7db733d4 | 586 | def spill(): |
e7abc7ea MW |
587 | ## Spill accumulated literal text from `lit' into a LiteralTemplate |
588 | ## object. | |
7db733d4 MW |
589 | l = lit.getvalue() |
590 | if l: tt.append(LiteralTemplate(l)) | |
b8dc834f | 591 | lit.seek(0) |
7db733d4 | 592 | lit.truncate() |
e7abc7ea MW |
593 | |
594 | ## Iterate over the lines of input. | |
7db733d4 MW |
595 | while True: |
596 | line = ps.curr | |
e7abc7ea MW |
597 | |
598 | ## Stop if there's no more text; handle lines beginning with `%'. | |
7db733d4 MW |
599 | if line is None: break |
600 | elif line.startswith('%'): | |
601 | if line.startswith('%#'): ps.step(); continue | |
602 | elif line.startswith('%%'): line = line[1:] | |
603 | else: break | |
e7abc7ea MW |
604 | |
605 | ## Work through the line, finding tags. | |
7db733d4 MW |
606 | i = 0 |
607 | while True: | |
e7abc7ea MW |
608 | |
609 | ## If there are no more `@' signs, there can be no more tags, and we're | |
610 | ## done. | |
7db733d4 MW |
611 | j = line.find('@', i) |
612 | if j < 0: break | |
e7abc7ea MW |
613 | |
614 | ## Write the chunk we've found. | |
7db733d4 | 615 | lit.write(line[i:j]) |
e7abc7ea MW |
616 | |
617 | ## If the next character is also `@' then this is an escape and we | |
618 | ## should carry on. | |
619 | if line[j:].startswith('@@'): | |
620 | lit.write('@') | |
621 | i = j + 2 | |
622 | continue | |
623 | ||
624 | ## Parse the tag into a column name, and maybe some operators. | |
7db733d4 MW |
625 | m = R_SIMPLETAG.match(line, j) |
626 | if not m: m = R_COMPLEXTAG.match(line, j) | |
627 | if not m: ps.error('invalid tag') | |
628 | col = m.group(1) | |
629 | try: rel, i = COLMAP[col] | |
630 | except KeyError: ps.error("unknown column `%s'" % col) | |
7db733d4 | 631 | ops = m.lastindex >= 2 and m.group(2) |
e7abc7ea MW |
632 | |
633 | ## If we have operators then look them up and compose them. | |
3ece2113 | 634 | wholeop = None |
7db733d4 MW |
635 | if ops: |
636 | for opname in ops[1:].split(':'): | |
637 | try: op = OPMAP[opname] | |
638 | except KeyError: ps.error("unknown operation `%s'" % opname) | |
639 | if wholeop is None: wholeop = op | |
640 | else: wholeop = (lambda f, g: lambda x: f(g(x)))(op, wholeop) | |
e7abc7ea MW |
641 | |
642 | ## Emit a LiteralTemplate for the accumulated text, and a TagTemplate | |
643 | ## for the tag. | |
7db733d4 MW |
644 | spill() |
645 | tt.append(TagTemplate(rel, i, wholeop)) | |
e7abc7ea MW |
646 | |
647 | ## Continue from after the tag. | |
7db733d4 | 648 | i = m.end() |
e7abc7ea MW |
649 | |
650 | ## Finished a line. Write out the remainder of the line and move onto | |
651 | ## the next. | |
7db733d4 MW |
652 | lit.write(line[i:]) |
653 | ps.step() | |
e7abc7ea MW |
654 | |
655 | ## Run out of things to do. Flush out the rest of the literal text and | |
656 | ## combine the templates. | |
7db733d4 MW |
657 | spill() |
658 | return SequenceTemplate(tt) | |
659 | ||
e7abc7ea | 660 | ## A dictionary mapping regular expressions to directive-processing functions. |
7db733d4 MW |
661 | DIRECT = [] |
662 | ||
663 | def direct(rx): | |
e7abc7ea MW |
664 | """ |
665 | Function decorator for template file directives. | |
666 | ||
667 | Associate the regular expression RX with the function in `DIRECT'. | |
668 | Directive functions are invoked as FUNC(PS, M), where PS is the ParseState, | |
669 | and M is the match object resulting from matching RX against the directive | |
670 | text. | |
671 | """ | |
7db733d4 MW |
672 | def _(func): |
673 | DIRECT.append((RX.compile(rx, RX.VERBOSE), func)) | |
674 | return func | |
675 | return _ | |
676 | ||
677 | def parse_template(ps): | |
e7abc7ea MW |
678 | """ |
679 | Parse a single template from the ParseState PS. | |
680 | ||
681 | A single template is either a chunk of text (parsed by `parse_text') or a | |
682 | directive (handled by the appropriate function in `DIRECT'). | |
683 | ||
684 | Returns either a template object, or a special token. In particular, `EOF' | |
685 | is returned if we run out of text; directives may return other tokens. | |
686 | """ | |
687 | ||
688 | ## Skip initial comments. Otherwise we might end up with an empty | |
689 | ## SequenceTemplate here. | |
7db733d4 | 690 | while ps.curr is not None and ps.curr.startswith('%#'): ps.step() |
e7abc7ea MW |
691 | |
692 | ## If we've run out of input, return `EOF' here. A line beginning `%%', or | |
693 | ## not beginning `%', means we've found a chunk of text. Otherwise find | |
694 | ## the right directive handler. | |
7db733d4 MW |
695 | if ps.curr is None: return EOF |
696 | elif ps.curr.startswith('%'): | |
697 | if ps.curr.startswith('%%'): return parse_text(ps) | |
698 | for rx, func in DIRECT: | |
699 | line = ps.curr[1:].strip() | |
700 | m = rx.match(line) | |
701 | if m: | |
702 | ps.step() | |
703 | return func(ps, m) | |
704 | ps.error("unrecognized directive") | |
705 | else: | |
706 | return parse_text(ps) | |
707 | ||
708 | def parse_templseq(ps, nestp): | |
e7abc7ea MW |
709 | """ |
710 | Parse a sequence of templates from the ParseState PS. | |
711 | ||
712 | Calls `parse_template' repeatedly If NESTP is true, then an `END' token | |
713 | (presumably from a directive handler) is permitted and halts parsing; | |
714 | otherwise `END' signifies an error. | |
715 | ||
716 | Returns a template object. | |
717 | """ | |
718 | ||
7db733d4 MW |
719 | tt = [] |
720 | while True: | |
721 | t = parse_template(ps) | |
722 | if t is END: | |
723 | if nestp: break | |
724 | else: ps.error("unexpected `end' directive") | |
725 | elif t is EOF: | |
726 | if nestp: ps.error("unexpected end of file") | |
727 | else: break | |
728 | tt.append(t) | |
729 | return SequenceTemplate(tt) | |
730 | ||
731 | @direct(r'repeat') | |
732 | def dir_repeat(ps, m): | |
e7abc7ea MW |
733 | """ |
734 | %repeat | |
735 | BODY | |
736 | %end | |
737 | ||
738 | Iterate the body over the cartesian product of the relations mentioned | |
739 | within. | |
740 | """ | |
7db733d4 MW |
741 | return RepeatTemplate(parse_templseq(ps, True)) |
742 | ||
743 | @direct(r'end') | |
744 | def dir_end(ps, m): | |
e7abc7ea | 745 | """%end -- an end marker used to delimet chunks of template.""" |
7db733d4 MW |
746 | return END |
747 | ||
748 | def compile_template(file, text): | |
e7abc7ea MW |
749 | """ |
750 | Compile TEXT into a template, attributing errors to FILE. | |
751 | """ | |
7db733d4 MW |
752 | ps = ParseState(file, text) |
753 | t = parse_templseq(ps, False) | |
754 | return t | |
755 | ||
756 | ###-------------------------------------------------------------------------- | |
757 | ### Main code. | |
758 | ||
759 | op = OP.OptionParser( | |
760 | description = 'Generates files by filling in simple templates', | |
f8b70fdb | 761 | usage = 'usage: %prog {-l | -g TMPL} FILE [COL,...=VAL,... ... | @FILE:COL,...] ...', |
7db733d4 | 762 | version = 'Catacomb version @VERSION@') |
f8b70fdb MW |
763 | def cb_gen(opt, optstr, arg, op): |
764 | op.values.input = arg | |
765 | op.values.mode = 'gen' | |
7db733d4 MW |
766 | for short, long, kw in [ |
767 | ('-l', '--list', dict( | |
768 | action = 'store_const', const = 'list', dest = 'mode', | |
769 | help = 'list filenames generated')), | |
770 | ('-g', '--generate', dict( | |
f8b70fdb MW |
771 | action = 'callback', metavar = 'TEMPLATE', |
772 | callback = cb_gen, type = 'string', | |
773 | help = 'generate file(s) from TEMPLATE file'))]: | |
7db733d4 | 774 | op.add_option(short, long, **kw) |
f8b70fdb | 775 | op.set_defaults(mode = 'what?') |
7db733d4 MW |
776 | opts, args = op.parse_args() |
777 | ||
778 | if len(args) < 1: op.error('missing FILE') | |
779 | filepat = args[0] | |
780 | for rel in args[1:]: read_thing(rel) | |
781 | filetempl = compile_template('<output>', filepat) | |
782 | ||
783 | def filenames(filetempl): | |
e7abc7ea MW |
784 | """ |
785 | Generate the filenames in the compiled filename template FILETEMPL. | |
786 | """ | |
7db733d4 MW |
787 | cs = CursorSet() |
788 | rr = filetempl.relations() | |
789 | for r in rr: | |
790 | if not len(r): return | |
791 | cs.push(rr) | |
792 | while True: | |
793 | out = StringIO() | |
794 | filetempl.subst(out, cs) | |
795 | yield out.getvalue(), cs | |
796 | if not cs.step(): break | |
797 | cs.pop() | |
798 | ||
e7abc7ea | 799 | ## Main dispatch. |
7db733d4 | 800 | if opts.mode == 'list': |
b8dc834f | 801 | for file, cs in filenames(filetempl): print(file) |
7db733d4 MW |
802 | elif opts.mode == 'gen': |
803 | with open(opts.input) as f: | |
804 | templ = RepeatTemplate(compile_template(opts.input, f.read())) | |
805 | for file, cs in filenames(filetempl): | |
806 | new = file + '.new' | |
807 | with open(new, 'w') as out: | |
808 | templ.subst(out, cs) | |
809 | OS.rename(new, file) | |
810 | else: | |
f8b70fdb | 811 | die('What am I doing here?') |
7db733d4 MW |
812 | |
813 | ###----- That's all, folks -------------------------------------------------- |