Commit | Line | Data |
---|---|---|
7db733d4 MW |
1 | #! @PYTHON@ |
2 | ### | |
3 | ### Generate files by filling in simple templates | |
4 | ### | |
5 | ### (c) 2013 Straylight/Edgeware | |
6 | ### | |
7 | ||
8 | ###----- Licensing notice --------------------------------------------------- | |
9 | ### | |
10 | ### This file is part of Catacomb. | |
11 | ### | |
12 | ### Catacomb is free software; you can redistribute it and/or modify | |
13 | ### it under the terms of the GNU Library General Public License as | |
14 | ### published by the Free Software Foundation; either version 2 of the | |
15 | ### License, or (at your option) any later version. | |
16 | ### | |
17 | ### Catacomb is distributed in the hope that it will be useful, | |
18 | ### but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ### GNU Library General Public License for more details. | |
21 | ### | |
22 | ### You should have received a copy of the GNU Library General Public | |
23 | ### License along with Catacomb; if not, write to the Free | |
24 | ### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | |
25 | ### MA 02111-1307, USA. | |
26 | ||
27 | from __future__ import with_statement | |
28 | ||
29 | import itertools as IT | |
30 | import optparse as OP | |
31 | import os as OS | |
32 | import re as RX | |
33 | from cStringIO import StringIO | |
34 | from sys import argv, exit, stderr | |
35 | ||
36 | ###-------------------------------------------------------------------------- | |
37 | ### Utilities. | |
38 | ||
e7abc7ea | 39 | QUIS = OS.path.basename(argv[0]) # Program name, for use in errors. |
7db733d4 MW |
40 | |
41 | def die(msg): | |
e7abc7ea | 42 | """Report MSG as a fatal error, and exit.""" |
7db733d4 MW |
43 | stderr.write('%s: %s\n' % (QUIS, msg)) |
44 | exit(1) | |
45 | ||
46 | def indexed(seq): | |
e7abc7ea MW |
47 | """ |
48 | Generate pairs (I, X), where I counts from zero and X are the items of SEQ. | |
49 | """ | |
7db733d4 MW |
50 | return IT.izip(IT.count(), seq) |
51 | ||
52 | ###-------------------------------------------------------------------------- | |
53 | ### Reading the input values. | |
54 | ||
e7abc7ea | 55 | ## Map column names to (Relation, # index) pairs. |
7db733d4 MW |
56 | COLMAP = {} |
57 | ||
58 | class Cursor (object): | |
e7abc7ea MW |
59 | """ |
60 | A Cursor object keeps track of an iteration through a Relation. | |
61 | ||
62 | At any time, the Cursor has a `current' row; the individual cells of this | |
63 | row may be retrieved using Python's standard indexing operator. The `step' | |
64 | method advances to the next row (if there is one). The `reset' method | |
65 | returns to row zero. | |
66 | """ | |
67 | ||
7db733d4 | 68 | def __init__(me, rel): |
e7abc7ea MW |
69 | """ |
70 | Initialize a new Cursor object, tracking its way through a Relation REL. | |
71 | ||
72 | The new Cursor has row zero as its current row. The REL must not be | |
73 | empty. | |
74 | """ | |
7db733d4 | 75 | me._rel = rel |
3ece2113 | 76 | me.reset() |
e7abc7ea | 77 | |
7db733d4 | 78 | def step(me): |
e7abc7ea MW |
79 | """ |
80 | Advance the Cursor to the next row. | |
81 | ||
82 | Returns False if there is no next row; otherwise True. | |
83 | """ | |
7db733d4 MW |
84 | me._i += 1 |
85 | if me._i >= len(me._rel): | |
86 | me._i = me._row = None | |
87 | return False | |
88 | me._row = me._rel[me._i] | |
89 | return True | |
e7abc7ea | 90 | |
7db733d4 | 91 | def reset(me): |
e7abc7ea MW |
92 | """ |
93 | Reset the Cursor, so that row zero is current again. | |
94 | """ | |
7db733d4 MW |
95 | me._i = 0 |
96 | me._row = me._rel[0] | |
e7abc7ea | 97 | |
7db733d4 | 98 | def __getitem__(me, i): |
e7abc7ea MW |
99 | """ |
100 | Return the item in column I of the Cursor's current row. | |
101 | ||
102 | The index must be acceptable to the underlying row object, but otherwise | |
103 | the Cursor imposes no restrictions. Indices need not be numeric, for | |
104 | example. | |
105 | """ | |
7db733d4 | 106 | return me._row[i] |
e7abc7ea | 107 | |
7db733d4 | 108 | def __repr__(me): |
e7abc7ea MW |
109 | """ |
110 | Return a text description of the Cursor, for diagnostic use. | |
111 | """ | |
7db733d4 MW |
112 | return '#<Cursor %r[%d] = %r>' % (me._rel, me._i, me._row) |
113 | ||
114 | class CursorSet (object): | |
e7abc7ea MW |
115 | """ |
116 | A CursorSet iterates over the cartiesian product of a number of Relations. | |
117 | ||
118 | More precisely: it maintains a stack, each level of which tracks a number | |
119 | of Relations. More Relations can be pushed onto this stack with the `push' | |
120 | method, and removed with `pop'. The `step' method advances through the | |
121 | cartesian product of the Relations in the top level of the stack -- the | |
122 | `active' Relations. Columns from the current rows of all of the currently | |
123 | known Relations -- whether active or not -- can be extracted using `get'. | |
124 | """ | |
125 | ||
7db733d4 | 126 | def __init__(me): |
e7abc7ea MW |
127 | """ |
128 | Initialize a new CursorSet object. | |
129 | ||
130 | A new CursorSet has an empty stack. | |
131 | """ | |
7db733d4 MW |
132 | me._map = {} |
133 | me._stack = [] | |
134 | me._act = None | |
e7abc7ea | 135 | |
7db733d4 | 136 | def push(me, rels): |
e7abc7ea MW |
137 | """ |
138 | Push the new Relations RELS onto the stack and start iterating. | |
139 | ||
140 | The currently active Relations are pushed down. Those Relations which are | |
141 | not already known to the CursorSet become the newly active collection. | |
142 | (Relations which are already known are simply ignored.) | |
143 | ||
144 | Iteration traverses Relations on the right more rapidly. | |
145 | """ | |
7db733d4 MW |
146 | cc = [] |
147 | rr = [] | |
148 | for r in rels: | |
149 | if r in me._map: continue | |
150 | c = me._map[r] = Cursor(r) | |
151 | rr.append(r) | |
152 | cc.append(c) | |
153 | me._stack.append((me._act, rr)) | |
154 | me._act = cc | |
e7abc7ea | 155 | |
7db733d4 | 156 | def step(me): |
e7abc7ea MW |
157 | """ |
158 | Advance the CursorSet through the currently active Relations. | |
159 | ||
160 | Return False if the active Relations have now been exhausted; otherwise | |
161 | return True. | |
162 | """ | |
7db733d4 MW |
163 | i = 0 |
164 | while i < len(me._act): | |
165 | if me._act[i].step(): return True | |
166 | if i >= len(me._act): return False | |
167 | me._act[i].reset() | |
168 | i += 1 | |
169 | return False | |
e7abc7ea | 170 | |
7db733d4 | 171 | def pop(me): |
e7abc7ea MW |
172 | """ |
173 | Pop the active Relations. | |
174 | ||
175 | Return to iterating over the previously active collection. | |
176 | """ | |
7db733d4 MW |
177 | me._act, rels = me._stack.pop() |
178 | for r in rels: del me._map[r] | |
e7abc7ea | 179 | |
7db733d4 | 180 | def get(me, rel, i): |
e7abc7ea MW |
181 | """ |
182 | Return the item with index I in the current row of Relation REL. | |
183 | """ | |
7db733d4 MW |
184 | return me._map[rel][i] |
185 | ||
186 | class Relation (object): | |
e7abc7ea MW |
187 | """ |
188 | A Relation keeps track of a table of data. | |
189 | ||
190 | A Relation consists of a `header', which is a sequence of string names, | |
191 | and a rectangular array of data, each row of which has the same number of | |
192 | items as the header. | |
193 | ||
194 | Relations can be iterated over using Cursors and CursorSets. | |
195 | """ | |
196 | ||
7db733d4 | 197 | def __init__(me, head): |
e7abc7ea MW |
198 | """ |
199 | Initialize a new, empty Relation with header HEAD. | |
200 | ||
201 | The `COLMAP' dictionary is updated to map the names in the header to this | |
202 | Relation and its column indices. | |
203 | """ | |
7db733d4 MW |
204 | me._head = head |
205 | me._rows = [] | |
206 | for i, c in indexed(head): COLMAP[c] = me, i | |
e7abc7ea | 207 | |
7db733d4 | 208 | def addrow(me, row): |
e7abc7ea MW |
209 | """ |
210 | Add a ROW to the Relation. | |
211 | ||
212 | The new row must have the correct number of entries. | |
213 | """ | |
7db733d4 MW |
214 | if len(row) != len(me._head): |
215 | die("mismatch: row `%s' doesn't match heading `%s'" % | |
3ece2113 | 216 | (', '.join(row), ', '.join(me._head))) |
7db733d4 | 217 | me._rows.append(row) |
e7abc7ea | 218 | |
7db733d4 | 219 | def __len__(me): |
e7abc7ea | 220 | """Return the number of rows in the Relation.""" |
7db733d4 | 221 | return len(me._rows) |
e7abc7ea | 222 | |
7db733d4 | 223 | def __getitem__(me, i): |
e7abc7ea | 224 | """Return the Ith row of the Relation.""" |
7db733d4 | 225 | return me._rows[i] |
e7abc7ea | 226 | |
7db733d4 | 227 | def __repr__(me): |
e7abc7ea | 228 | """Return a textual description of the Relation, for diagnostic use.""" |
7db733d4 MW |
229 | return '#<Relation %r>' % me._head |
230 | ||
231 | def read_immediate(word): | |
e7abc7ea MW |
232 | """ |
233 | Return a Relation constructed by parsing WORD. | |
234 | ||
235 | The WORD has the form `HEAD=ROW ROW ...', where the HEAD and ROWs are | |
236 | comma-separated lists of strings which will form the relation's header and | |
237 | rows respectively. There is no way to include an item which contains a | |
238 | comma or whitespace. | |
239 | """ | |
7db733d4 MW |
240 | head, rels = word.split('=', 1) |
241 | rel = Relation([c.strip() for c in head.split(',')]) | |
242 | for row in rels.split(): rel.addrow([c.strip() for c in row.split(',')]) | |
243 | ||
244 | def read_file(spec): | |
e7abc7ea MW |
245 | """ |
246 | Return a Relation constructed from a file, according to SPEC. | |
247 | ||
248 | The SPEC has the form `FILE:HEAD', where FILE names a file, and HEAD is a | |
249 | comma-separated list of strings to form the relation's header. Each line | |
250 | from the file which is neither empty nor begins with `#' is split into | |
251 | whitespace-separated words to form a row in the relation. There is no way | |
252 | to include an item which contains whitespace. | |
253 | """ | |
7db733d4 MW |
254 | file, head = spec.split(':', 1) |
255 | rel = Relation([c.strip() for c in head.split(',')]) | |
7db733d4 MW |
256 | with open(file) as f: |
257 | for line in f: | |
258 | line = line.strip() | |
259 | if line.startswith('#') or line == '': continue | |
260 | rel.addrow(line.split()) | |
261 | ||
262 | def read_thing(spec): | |
e7abc7ea MW |
263 | """ |
264 | Return a relation constructed from SPEC. | |
265 | ||
266 | If SPEC begins with `@' then read the relation from a file (see | |
267 | `read_file'); otherwise interpret it as immediate data (see | |
268 | `read_immediate'). | |
269 | """ | |
7db733d4 MW |
270 | if spec.startswith('@'): read_file(spec[1:]) |
271 | else: read_immediate(spec) | |
272 | ||
273 | ###-------------------------------------------------------------------------- | |
274 | ### Template structure. | |
275 | ||
276 | class BasicTemplate (object): | |
e7abc7ea MW |
277 | """ |
278 | Base class for template objects. | |
279 | ||
280 | The protocol for templates consists of two methods: | |
281 | ||
282 | relations() Return a set of Relations mentioned at top-level in | |
283 | substitutions in the template. | |
284 | ||
285 | subst(OUT, CS) Fill in the template, writing the output to the | |
286 | stream OUT. The CS is a CursorSet object tracking | |
287 | the current iteration state. | |
288 | """ | |
7db733d4 MW |
289 | pass |
290 | ||
291 | class LiteralTemplate (BasicTemplate): | |
e7abc7ea MW |
292 | """ |
293 | A LiteralTemplate outputs a fixed string. | |
294 | """ | |
295 | ||
7db733d4 | 296 | def __init__(me, text, **kw): |
e7abc7ea MW |
297 | """ |
298 | Initialize a new LiteralTemplate object. TEXT is the text to be written. | |
299 | """ | |
7db733d4 MW |
300 | super(LiteralTemplate, me).__init__(**kw) |
301 | me._text = text | |
e7abc7ea | 302 | |
7db733d4 | 303 | def relations(me): |
e7abc7ea | 304 | """A LiteralTemplate contains no substitutions.""" |
7db733d4 | 305 | return set() |
e7abc7ea | 306 | |
7db733d4 | 307 | def subst(me, out, cs): |
e7abc7ea | 308 | """A LiteralTemplate just emits its text.""" |
7db733d4 | 309 | out.write(me._text) |
e7abc7ea | 310 | |
7db733d4 MW |
311 | def __repr__(me): |
312 | return '#<LiteralTemplate %r>' % me._text | |
313 | ||
314 | class TagTemplate (BasicTemplate): | |
e7abc7ea MW |
315 | """ |
316 | A TagTemplate object expands a substitution tag. | |
317 | ||
318 | It extracts an item from the current row of a relation, processes it | |
319 | according to an operation, and outputs the result. | |
320 | """ | |
321 | ||
7db733d4 | 322 | def __init__(me, rel, i, op, **kw): |
e7abc7ea MW |
323 | """ |
324 | Initialize a new TagTemplate object. | |
325 | ||
326 | REL is the relation from which to pick the output; I is the column index; | |
327 | OP is a transformation to apply to the data, and may be None to indicate | |
328 | that the data should not be transformed. | |
329 | """ | |
7db733d4 MW |
330 | super(TagTemplate, me).__init__(**kw) |
331 | me._rel = rel | |
332 | me._i = i | |
333 | me._op = op | |
e7abc7ea | 334 | |
7db733d4 | 335 | def relations(me): |
e7abc7ea | 336 | """The TagTemplate knows which relation it uses.""" |
7db733d4 | 337 | return set([me._rel]) |
e7abc7ea | 338 | |
7db733d4 | 339 | def subst(me, out, cs): |
e7abc7ea MW |
340 | """ |
341 | A TagTemplate extracts and transforms an item from the current row of | |
342 | a relation. | |
343 | """ | |
7db733d4 MW |
344 | val = cs.get(me._rel, me._i) |
345 | if me._op is not None: val = me._op(val) | |
346 | out.write(val) | |
e7abc7ea | 347 | |
7db733d4 MW |
348 | def __repr__(me): |
349 | return '#<TagTemplate %s>' % me._rel._head[me._i] | |
350 | ||
351 | class SequenceTemplate (BasicTemplate): | |
e7abc7ea MW |
352 | """ |
353 | A SequenceTemplate concatenates a number of other templates. | |
354 | """ | |
355 | ||
7db733d4 | 356 | def __new__(cls, seq, **kw): |
e7abc7ea MW |
357 | """ |
358 | Construct a template from a sequence SEQ of other templates. | |
359 | ||
360 | If SEQ is a singleton (which it often is) then return it directly; | |
361 | otherwise construct a SequenceTemplate. | |
362 | """ | |
7db733d4 MW |
363 | if len(seq) == 1: |
364 | return seq[0] | |
365 | else: | |
3ece2113 MW |
366 | return super(SequenceTemplate, cls).__new__(cls, seq = seq, **kw) |
367 | ||
7db733d4 | 368 | def __init__(me, seq, **kw): |
e7abc7ea MW |
369 | """ |
370 | Initialize a new SequenceTemplate object from SEQ. | |
371 | ||
372 | The sequence is flattened out: if SEQ contains SequenceTemplates then we | |
373 | use their children directly, so that we don't have a useless tree. | |
374 | """ | |
7db733d4 | 375 | super(SequenceTemplate, me).__init__(**kw) |
3ece2113 MW |
376 | tt = [] |
377 | cls = type(me) | |
378 | for t in seq: | |
379 | if isinstance(t, cls): tt += t._seq | |
380 | else: tt.append(t) | |
381 | me._seq = tt | |
382 | ||
7db733d4 | 383 | def relations(me): |
e7abc7ea MW |
384 | """ |
385 | The relations of a SequenceTemplate are the union of the relations of its | |
386 | children. | |
387 | """ | |
7db733d4 MW |
388 | rr = set() |
389 | for t in me._seq: rr.update(t.relations()) | |
390 | return rr | |
e7abc7ea | 391 | |
7db733d4 | 392 | def subst(me, out, cs): |
e7abc7ea MW |
393 | """ |
394 | The output of a SequenceTemplate is the concatenation of the expansions | |
395 | of its children. | |
396 | """ | |
7db733d4 | 397 | for t in me._seq: t.subst(out, cs) |
e7abc7ea | 398 | |
7db733d4 MW |
399 | def __repr__(me): |
400 | return '#<SequenceTemplate %r>' % me._seq | |
401 | ||
402 | class RepeatTemplate (BasicTemplate): | |
e7abc7ea MW |
403 | """ |
404 | A RepeatTemplate iterates its body over a number of relations. | |
405 | """ | |
406 | ||
7db733d4 | 407 | def __init__(me, sub): |
e7abc7ea MW |
408 | """ |
409 | Initialize a new RepeatTemplate, given a template to act as its body. | |
410 | """ | |
7db733d4 | 411 | me._sub = sub |
e7abc7ea | 412 | |
7db733d4 | 413 | def relations(me): |
e7abc7ea MW |
414 | """ |
415 | A RepeatTemplate hides the relations of its body. | |
416 | """ | |
7db733d4 | 417 | return set() |
e7abc7ea | 418 | |
7db733d4 | 419 | def subst(me, out, cs): |
e7abc7ea MW |
420 | """ |
421 | Substitute a RepeatTemplate, by iterating over the relations mentioned in | |
422 | its body template. | |
423 | """ | |
7db733d4 MW |
424 | rr = me._sub.relations() |
425 | for r in rr: | |
426 | if len(r) == 0: return | |
427 | cs.push(rr) | |
428 | while True: | |
429 | me._sub.subst(out, cs) | |
430 | if not cs.step(): break | |
431 | cs.pop() | |
e7abc7ea | 432 | |
7db733d4 MW |
433 | def __repr__(me): |
434 | return '#<RepeatTemplate %r>' % me._sub | |
435 | ||
436 | ###-------------------------------------------------------------------------- | |
437 | ### Some slightly cheesy parsing machinery. | |
438 | ||
439 | class ParseState (object): | |
e7abc7ea MW |
440 | """ |
441 | A ParseState object keeps track of a parser's position in a file. | |
442 | ||
443 | The `curr' slot contains the current line under consideration. | |
444 | """ | |
445 | ||
7db733d4 | 446 | def __init__(me, file, text): |
e7abc7ea MW |
447 | """ |
448 | Initialize a ParseState object. | |
449 | ||
450 | The FILE is a string naming the source file, and the TEXT is an iterator | |
451 | over the file's lines. | |
452 | """ | |
7db733d4 MW |
453 | me._file = file |
454 | me._i = 0 | |
455 | me._it = iter(text.splitlines(True)) | |
456 | me.step() | |
e7abc7ea | 457 | |
7db733d4 | 458 | def step(me): |
e7abc7ea MW |
459 | """ |
460 | Advance the ParseState to the next line. | |
461 | ||
462 | Sets `curr' to the next line, or to None if the input is exhausted. | |
463 | """ | |
28ffcb2a MW |
464 | try: me.curr = me._it.next() |
465 | except StopIteration: me.curr = None | |
466 | else: me._i += 1 | |
e7abc7ea | 467 | |
7db733d4 | 468 | def error(me, msg): |
e7abc7ea MW |
469 | """ |
470 | Report a fatal error during parsing, attributing it to the current line. | |
471 | """ | |
7db733d4 MW |
472 | die('%s:%d: %s' % (me._file, me._i, msg)) |
473 | ||
474 | class token (object): | |
e7abc7ea MW |
475 | """ |
476 | A token object has no interesting properties other than its identity. | |
477 | """ | |
478 | ||
7db733d4 | 479 | def __init__(me, name): |
e7abc7ea | 480 | """Initialize a new token, with the given NAME.""" |
7db733d4 MW |
481 | me._name = name |
482 | def __repr__(me): | |
e7abc7ea | 483 | """Return a description of the token, for diagnostic purposes.""" |
7db733d4 MW |
484 | return '#<%s>' % me._name |
485 | ||
e7abc7ea | 486 | ## Some magical tokens useful during parsing. |
7db733d4 MW |
487 | EOF = token('eof') |
488 | END = token('end') | |
489 | ||
e7abc7ea | 490 | ## Regular expressions matching substitution tags. |
7db733d4 MW |
491 | R_SIMPLETAG = RX.compile(r'@ (\w+)', RX.VERBOSE) |
492 | R_COMPLEXTAG = RX.compile(r'@ { (\w+) ((?: : \w+)*) }', RX.VERBOSE) | |
493 | ||
e7abc7ea | 494 | ## A dictionary mapping operation names to functions which implement them. |
7db733d4 MW |
495 | OPMAP = {} |
496 | ||
497 | def defop(func): | |
e7abc7ea MW |
498 | """ |
499 | Decorator for substitution operator functions. | |
500 | ||
501 | Remember the operator in `OPMAP'; the operator's name is taken from FUNC's | |
502 | name, removing a prefix `op_' if there is one. | |
503 | ||
504 | An operator function is given the raw value as an argument and should | |
505 | return the transformed value. | |
506 | """ | |
7db733d4 MW |
507 | name = func.func_name |
508 | if name.startswith('op_'): name = name[3:] | |
509 | OPMAP[name] = func | |
510 | return func | |
511 | ||
512 | @defop | |
e7abc7ea MW |
513 | def op_u(val): |
514 | """@{COLUMN:u} -- the item in upper case.""" | |
515 | return val.upper() | |
7db733d4 MW |
516 | |
517 | @defop | |
e7abc7ea MW |
518 | def op_l(val): |
519 | """@{COLUMN:l} -- the item in upper case.""" | |
520 | return val.lower() | |
7db733d4 MW |
521 | |
522 | R_NOTIDENT = RX.compile(r'[^a-zA-Z0-9_]+') | |
523 | @defop | |
e7abc7ea MW |
524 | def op_c(val): |
525 | """ | |
526 | @{COLUMN:c} -- the item, with non-alphanumeric sequences replaced with `_'. | |
527 | """ | |
528 | return R_NOTIDENT.sub('_', val) | |
7db733d4 MW |
529 | |
530 | def _pairify(val): | |
e7abc7ea MW |
531 | """ |
532 | Split VAL into two, at an `=' sign. | |
533 | ||
534 | If VAL has the form `THIS=THAT' then return the pair (THIS, THAT); | |
535 | otherwise return (VAL, VAL). | |
536 | """ | |
7db733d4 MW |
537 | c = val.find('=') |
538 | if c >= 0: return val[:c], val[c + 1:] | |
539 | else: return val, val | |
540 | ||
541 | @defop | |
e7abc7ea MW |
542 | def op_left(val): |
543 | """@{COLUMN:left} -- the left-hand side of the item.""" | |
544 | return _pairify(val)[0] | |
7db733d4 | 545 | @defop |
e7abc7ea MW |
546 | def op_right(val): |
547 | """@{COLUMN:right} -- the left-hand side of the item.""" | |
548 | return _pairify(val)[1] | |
7db733d4 MW |
549 | |
550 | def parse_text(ps): | |
e7abc7ea MW |
551 | """ |
552 | Parse a chunk of text from a ParseState. | |
553 | ||
554 | Stop when we get to something which looks like a template keyword, but | |
555 | extract tags. Return the resulting template. | |
556 | ||
557 | Tags have the form `@COLUMN', or `@{COLUMN:OPERATOR:...}'. The text may | |
558 | contain comments beginning `%#', which are ignored, and lines beginning | |
559 | `%%' which have the initial `%' removed and are otherwise treated as normal | |
560 | text (and, in particular, may contain tags). Other lines beginning with | |
561 | `%' are directives and must be processed by our caller. | |
562 | """ | |
563 | ||
564 | ## Starting out: no templates collected, and an empty buffer of literal | |
565 | ## text. | |
7db733d4 MW |
566 | tt = [] |
567 | lit = StringIO() | |
e7abc7ea | 568 | |
7db733d4 | 569 | def spill(): |
e7abc7ea MW |
570 | ## Spill accumulated literal text from `lit' into a LiteralTemplate |
571 | ## object. | |
7db733d4 MW |
572 | l = lit.getvalue() |
573 | if l: tt.append(LiteralTemplate(l)) | |
574 | lit.reset() | |
575 | lit.truncate() | |
e7abc7ea MW |
576 | |
577 | ## Iterate over the lines of input. | |
7db733d4 MW |
578 | while True: |
579 | line = ps.curr | |
e7abc7ea MW |
580 | |
581 | ## Stop if there's no more text; handle lines beginning with `%'. | |
7db733d4 MW |
582 | if line is None: break |
583 | elif line.startswith('%'): | |
584 | if line.startswith('%#'): ps.step(); continue | |
585 | elif line.startswith('%%'): line = line[1:] | |
586 | else: break | |
e7abc7ea MW |
587 | |
588 | ## Work through the line, finding tags. | |
7db733d4 MW |
589 | i = 0 |
590 | while True: | |
e7abc7ea MW |
591 | |
592 | ## If there are no more `@' signs, there can be no more tags, and we're | |
593 | ## done. | |
7db733d4 MW |
594 | j = line.find('@', i) |
595 | if j < 0: break | |
e7abc7ea MW |
596 | |
597 | ## Write the chunk we've found. | |
7db733d4 | 598 | lit.write(line[i:j]) |
e7abc7ea MW |
599 | |
600 | ## If the next character is also `@' then this is an escape and we | |
601 | ## should carry on. | |
602 | if line[j:].startswith('@@'): | |
603 | lit.write('@') | |
604 | i = j + 2 | |
605 | continue | |
606 | ||
607 | ## Parse the tag into a column name, and maybe some operators. | |
7db733d4 MW |
608 | m = R_SIMPLETAG.match(line, j) |
609 | if not m: m = R_COMPLEXTAG.match(line, j) | |
610 | if not m: ps.error('invalid tag') | |
611 | col = m.group(1) | |
612 | try: rel, i = COLMAP[col] | |
613 | except KeyError: ps.error("unknown column `%s'" % col) | |
7db733d4 | 614 | ops = m.lastindex >= 2 and m.group(2) |
e7abc7ea MW |
615 | |
616 | ## If we have operators then look them up and compose them. | |
3ece2113 | 617 | wholeop = None |
7db733d4 MW |
618 | if ops: |
619 | for opname in ops[1:].split(':'): | |
620 | try: op = OPMAP[opname] | |
621 | except KeyError: ps.error("unknown operation `%s'" % opname) | |
622 | if wholeop is None: wholeop = op | |
623 | else: wholeop = (lambda f, g: lambda x: f(g(x)))(op, wholeop) | |
e7abc7ea MW |
624 | |
625 | ## Emit a LiteralTemplate for the accumulated text, and a TagTemplate | |
626 | ## for the tag. | |
7db733d4 MW |
627 | spill() |
628 | tt.append(TagTemplate(rel, i, wholeop)) | |
e7abc7ea MW |
629 | |
630 | ## Continue from after the tag. | |
7db733d4 | 631 | i = m.end() |
e7abc7ea MW |
632 | |
633 | ## Finished a line. Write out the remainder of the line and move onto | |
634 | ## the next. | |
7db733d4 MW |
635 | lit.write(line[i:]) |
636 | ps.step() | |
e7abc7ea MW |
637 | |
638 | ## Run out of things to do. Flush out the rest of the literal text and | |
639 | ## combine the templates. | |
7db733d4 MW |
640 | spill() |
641 | return SequenceTemplate(tt) | |
642 | ||
e7abc7ea | 643 | ## A dictionary mapping regular expressions to directive-processing functions. |
7db733d4 MW |
644 | DIRECT = [] |
645 | ||
646 | def direct(rx): | |
e7abc7ea MW |
647 | """ |
648 | Function decorator for template file directives. | |
649 | ||
650 | Associate the regular expression RX with the function in `DIRECT'. | |
651 | Directive functions are invoked as FUNC(PS, M), where PS is the ParseState, | |
652 | and M is the match object resulting from matching RX against the directive | |
653 | text. | |
654 | """ | |
7db733d4 MW |
655 | def _(func): |
656 | DIRECT.append((RX.compile(rx, RX.VERBOSE), func)) | |
657 | return func | |
658 | return _ | |
659 | ||
660 | def parse_template(ps): | |
e7abc7ea MW |
661 | """ |
662 | Parse a single template from the ParseState PS. | |
663 | ||
664 | A single template is either a chunk of text (parsed by `parse_text') or a | |
665 | directive (handled by the appropriate function in `DIRECT'). | |
666 | ||
667 | Returns either a template object, or a special token. In particular, `EOF' | |
668 | is returned if we run out of text; directives may return other tokens. | |
669 | """ | |
670 | ||
671 | ## Skip initial comments. Otherwise we might end up with an empty | |
672 | ## SequenceTemplate here. | |
7db733d4 | 673 | while ps.curr is not None and ps.curr.startswith('%#'): ps.step() |
e7abc7ea MW |
674 | |
675 | ## If we've run out of input, return `EOF' here. A line beginning `%%', or | |
676 | ## not beginning `%', means we've found a chunk of text. Otherwise find | |
677 | ## the right directive handler. | |
7db733d4 MW |
678 | if ps.curr is None: return EOF |
679 | elif ps.curr.startswith('%'): | |
680 | if ps.curr.startswith('%%'): return parse_text(ps) | |
681 | for rx, func in DIRECT: | |
682 | line = ps.curr[1:].strip() | |
683 | m = rx.match(line) | |
684 | if m: | |
685 | ps.step() | |
686 | return func(ps, m) | |
687 | ps.error("unrecognized directive") | |
688 | else: | |
689 | return parse_text(ps) | |
690 | ||
691 | def parse_templseq(ps, nestp): | |
e7abc7ea MW |
692 | """ |
693 | Parse a sequence of templates from the ParseState PS. | |
694 | ||
695 | Calls `parse_template' repeatedly If NESTP is true, then an `END' token | |
696 | (presumably from a directive handler) is permitted and halts parsing; | |
697 | otherwise `END' signifies an error. | |
698 | ||
699 | Returns a template object. | |
700 | """ | |
701 | ||
7db733d4 MW |
702 | tt = [] |
703 | while True: | |
704 | t = parse_template(ps) | |
705 | if t is END: | |
706 | if nestp: break | |
707 | else: ps.error("unexpected `end' directive") | |
708 | elif t is EOF: | |
709 | if nestp: ps.error("unexpected end of file") | |
710 | else: break | |
711 | tt.append(t) | |
712 | return SequenceTemplate(tt) | |
713 | ||
714 | @direct(r'repeat') | |
715 | def dir_repeat(ps, m): | |
e7abc7ea MW |
716 | """ |
717 | %repeat | |
718 | BODY | |
719 | %end | |
720 | ||
721 | Iterate the body over the cartesian product of the relations mentioned | |
722 | within. | |
723 | """ | |
7db733d4 MW |
724 | return RepeatTemplate(parse_templseq(ps, True)) |
725 | ||
726 | @direct(r'end') | |
727 | def dir_end(ps, m): | |
e7abc7ea | 728 | """%end -- an end marker used to delimet chunks of template.""" |
7db733d4 MW |
729 | return END |
730 | ||
731 | def compile_template(file, text): | |
e7abc7ea MW |
732 | """ |
733 | Compile TEXT into a template, attributing errors to FILE. | |
734 | """ | |
7db733d4 MW |
735 | ps = ParseState(file, text) |
736 | t = parse_templseq(ps, False) | |
737 | return t | |
738 | ||
739 | ###-------------------------------------------------------------------------- | |
740 | ### Main code. | |
741 | ||
742 | op = OP.OptionParser( | |
743 | description = 'Generates files by filling in simple templates', | |
f8b70fdb | 744 | usage = 'usage: %prog {-l | -g TMPL} FILE [COL,...=VAL,... ... | @FILE:COL,...] ...', |
7db733d4 | 745 | version = 'Catacomb version @VERSION@') |
f8b70fdb MW |
746 | def cb_gen(opt, optstr, arg, op): |
747 | op.values.input = arg | |
748 | op.values.mode = 'gen' | |
7db733d4 MW |
749 | for short, long, kw in [ |
750 | ('-l', '--list', dict( | |
751 | action = 'store_const', const = 'list', dest = 'mode', | |
752 | help = 'list filenames generated')), | |
753 | ('-g', '--generate', dict( | |
f8b70fdb MW |
754 | action = 'callback', metavar = 'TEMPLATE', |
755 | callback = cb_gen, type = 'string', | |
756 | help = 'generate file(s) from TEMPLATE file'))]: | |
7db733d4 | 757 | op.add_option(short, long, **kw) |
f8b70fdb | 758 | op.set_defaults(mode = 'what?') |
7db733d4 MW |
759 | opts, args = op.parse_args() |
760 | ||
761 | if len(args) < 1: op.error('missing FILE') | |
762 | filepat = args[0] | |
763 | for rel in args[1:]: read_thing(rel) | |
764 | filetempl = compile_template('<output>', filepat) | |
765 | ||
766 | def filenames(filetempl): | |
e7abc7ea MW |
767 | """ |
768 | Generate the filenames in the compiled filename template FILETEMPL. | |
769 | """ | |
7db733d4 MW |
770 | cs = CursorSet() |
771 | rr = filetempl.relations() | |
772 | for r in rr: | |
773 | if not len(r): return | |
774 | cs.push(rr) | |
775 | while True: | |
776 | out = StringIO() | |
777 | filetempl.subst(out, cs) | |
778 | yield out.getvalue(), cs | |
779 | if not cs.step(): break | |
780 | cs.pop() | |
781 | ||
e7abc7ea | 782 | ## Main dispatch. |
7db733d4 MW |
783 | if opts.mode == 'list': |
784 | for file, cs in filenames(filetempl): print file | |
785 | elif opts.mode == 'gen': | |
786 | with open(opts.input) as f: | |
787 | templ = RepeatTemplate(compile_template(opts.input, f.read())) | |
788 | for file, cs in filenames(filetempl): | |
789 | new = file + '.new' | |
790 | with open(new, 'w') as out: | |
791 | templ.subst(out, cs) | |
792 | OS.rename(new, file) | |
793 | else: | |
f8b70fdb | 794 | die('What am I doing here?') |
7db733d4 MW |
795 | |
796 | ###----- That's all, folks -------------------------------------------------- |