3 ### Generate files by filling in simple templates
5 ### (c) 2013 Straylight/Edgeware
8 ###----- Licensing notice ---------------------------------------------------
10 ### This file is part of Catacomb.
12 ### Catacomb is free software; you can redistribute it and/or modify
13 ### it under the terms of the GNU Library General Public License as
14 ### published by the Free Software Foundation; either version 2 of the
15 ### License, or (at your option) any later version.
17 ### Catacomb is distributed in the hope that it will be useful,
18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ### GNU Library General Public License for more details.
22 ### You should have received a copy of the GNU Library General Public
23 ### License along with Catacomb; if not, write to the Free
24 ### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 ### MA 02111-1307, USA.
27 from __future__ import with_statement
29 import itertools as IT
33 from cStringIO import StringIO
34 from sys import argv, exit, stderr
36 ###--------------------------------------------------------------------------
39 QUIS = OS.path.basename(argv[0]) # Program name, for use in errors.
42 """Report MSG as a fatal error, and exit."""
43 stderr.write('%s: %s\n' % (QUIS, msg))
48 Generate pairs (I, X), where I counts from zero and X are the items of SEQ.
50 return IT.izip(IT.count(), seq)
52 ###--------------------------------------------------------------------------
53 ### Reading the input values.
55 ## Map column names to (Relation, # index) pairs.
58 class Cursor (object):
60 A Cursor object keeps track of an iteration through a Relation.
62 At any time, the Cursor has a `current' row; the individual cells of this
63 row may be retrieved using Python's standard indexing operator. The `step'
64 method advances to the next row (if there is one). The `reset' method
68 def __init__(me, rel):
70 Initialize a new Cursor object, tracking its way through a Relation REL.
72 The new Cursor has row zero as its current row. The REL must not be
80 Advance the Cursor to the next row.
82 Returns False if there is no next row; otherwise True.
85 if me._i >= len(me._rel):
86 me._i = me._row = None
88 me._row = me._rel[me._i]
93 Reset the Cursor, so that row zero is current again.
98 def __getitem__(me, i):
100 Return the item in column I of the Cursor's current row.
102 The index must be acceptable to the underlying row object, but otherwise
103 the Cursor imposes no restrictions. Indices need not be numeric, for
110 Return a text description of the Cursor, for diagnostic use.
112 return '#<Cursor %r[%d] = %r>' % (me._rel, me._i, me._row)
114 class CursorSet (object):
116 A CursorSet iterates over the cartiesian product of a number of Relations.
118 More precisely: it maintains a stack, each level of which tracks a number
119 of Relations. More Relations can be pushed onto this stack with the `push'
120 method, and removed with `pop'. The `step' method advances through the
121 cartesian product of the Relations in the top level of the stack -- the
122 `active' Relations. Columns from the current rows of all of the currently
123 known Relations -- whether active or not -- can be extracted using `get'.
128 Initialize a new CursorSet object.
130 A new CursorSet has an empty stack.
138 Push the new Relations RELS onto the stack and start iterating.
140 The currently active Relations are pushed down. Those Relations which are
141 not already known to the CursorSet become the newly active collection.
142 (Relations which are already known are simply ignored.)
144 Iteration traverses Relations on the right more rapidly.
149 if r in me._map: continue
150 c = me._map[r] = Cursor(r)
153 me._stack.append((me._act, rr))
158 Advance the CursorSet through the currently active Relations.
160 Return False if the active Relations have now been exhausted; otherwise
164 while i < len(me._act):
165 if me._act[i].step(): return True
166 if i >= len(me._act): return False
173 Pop the active Relations.
175 Return to iterating over the previously active collection.
177 me._act, rels = me._stack.pop()
178 for r in rels: del me._map[r]
182 Return the item with index I in the current row of Relation REL.
184 return me._map[rel][i]
186 class Relation (object):
188 A Relation keeps track of a table of data.
190 A Relation consists of a `header', which is a sequence of string names,
191 and a rectangular array of data, each row of which has the same number of
194 Relations can be iterated over using Cursors and CursorSets.
197 def __init__(me, head):
199 Initialize a new, empty Relation with header HEAD.
201 The `COLMAP' dictionary is updated to map the names in the header to this
202 Relation and its column indices.
206 for i, c in indexed(head): COLMAP[c] = me, i
210 Add a ROW to the Relation.
212 The new row must have the correct number of entries.
214 if len(row) != len(me._head):
215 die("mismatch: row `%s' doesn't match heading `%s'" %
216 (', '.join(row), ', '.join(me._head)))
220 """Return the number of rows in the Relation."""
223 def __getitem__(me, i):
224 """Return the Ith row of the Relation."""
228 """Return a textual description of the Relation, for diagnostic use."""
229 return '#<Relation %r>' % me._head
231 def read_immediate(word):
233 Return a Relation constructed by parsing WORD.
235 The WORD has the form `HEAD=ROW ROW ...', where the HEAD and ROWs are
236 comma-separated lists of strings which will form the relation's header and
237 rows respectively. There is no way to include an item which contains a
240 head, rels = word.split('=', 1)
241 rel = Relation([c.strip() for c in head.split(',')])
242 for row in rels.split(): rel.addrow([c.strip() for c in row.split(',')])
246 Return a Relation constructed from a file, according to SPEC.
248 The SPEC has the form `FILE:HEAD', where FILE names a file, and HEAD is a
249 comma-separated list of strings to form the relation's header. Each line
250 from the file which is neither empty nor begins with `#' is split into
251 whitespace-separated words to form a row in the relation. There is no way
252 to include an item which contains whitespace.
254 file, head = spec.split(':', 1)
255 rel = Relation([c.strip() for c in head.split(',')])
256 with open(file) as f:
259 if line.startswith('#') or line == '': continue
260 rel.addrow(line.split())
262 def read_thing(spec):
264 Return a relation constructed from SPEC.
266 If SPEC begins with `@' then read the relation from a file (see
267 `read_file'); otherwise interpret it as immediate data (see
270 if spec.startswith('@'): read_file(spec[1:])
271 else: read_immediate(spec)
273 ###--------------------------------------------------------------------------
274 ### Template structure.
276 class BasicTemplate (object):
278 Base class for template objects.
280 The protocol for templates consists of two methods:
282 relations() Return a set of Relations mentioned at top-level in
283 substitutions in the template.
285 subst(OUT, CS) Fill in the template, writing the output to the
286 stream OUT. The CS is a CursorSet object tracking
287 the current iteration state.
291 class LiteralTemplate (BasicTemplate):
293 A LiteralTemplate outputs a fixed string.
296 def __init__(me, text, **kw):
298 Initialize a new LiteralTemplate object. TEXT is the text to be written.
300 super(LiteralTemplate, me).__init__(**kw)
304 """A LiteralTemplate contains no substitutions."""
307 def subst(me, out, cs):
308 """A LiteralTemplate just emits its text."""
312 return '#<LiteralTemplate %r>' % me._text
314 class TagTemplate (BasicTemplate):
316 A TagTemplate object expands a substitution tag.
318 It extracts an item from the current row of a relation, processes it
319 according to an operation, and outputs the result.
322 def __init__(me, rel, i, op, **kw):
324 Initialize a new TagTemplate object.
326 REL is the relation from which to pick the output; I is the column index;
327 OP is a transformation to apply to the data, and may be None to indicate
328 that the data should not be transformed.
330 super(TagTemplate, me).__init__(**kw)
336 """The TagTemplate knows which relation it uses."""
337 return set([me._rel])
339 def subst(me, out, cs):
341 A TagTemplate extracts and transforms an item from the current row of
344 val = cs.get(me._rel, me._i)
345 if me._op is not None: val = me._op(val)
349 return '#<TagTemplate %s>' % me._rel._head[me._i]
351 class SequenceTemplate (BasicTemplate):
353 A SequenceTemplate concatenates a number of other templates.
356 def __new__(cls, seq, **kw):
358 Construct a template from a sequence SEQ of other templates.
360 If SEQ is a singleton (which it often is) then return it directly;
361 otherwise construct a SequenceTemplate.
366 return super(SequenceTemplate, cls).__new__(cls, seq = seq, **kw)
368 def __init__(me, seq, **kw):
370 Initialize a new SequenceTemplate object from SEQ.
372 The sequence is flattened out: if SEQ contains SequenceTemplates then we
373 use their children directly, so that we don't have a useless tree.
375 super(SequenceTemplate, me).__init__(**kw)
379 if isinstance(t, cls): tt += t._seq
385 The relations of a SequenceTemplate are the union of the relations of its
389 for t in me._seq: rr.update(t.relations())
392 def subst(me, out, cs):
394 The output of a SequenceTemplate is the concatenation of the expansions
397 for t in me._seq: t.subst(out, cs)
400 return '#<SequenceTemplate %r>' % me._seq
402 class RepeatTemplate (BasicTemplate):
404 A RepeatTemplate iterates its body over a number of relations.
407 def __init__(me, sub):
409 Initialize a new RepeatTemplate, given a template to act as its body.
415 A RepeatTemplate hides the relations of its body.
419 def subst(me, out, cs):
421 Substitute a RepeatTemplate, by iterating over the relations mentioned in
424 rr = me._sub.relations()
426 if len(r) == 0: return
429 me._sub.subst(out, cs)
430 if not cs.step(): break
434 return '#<RepeatTemplate %r>' % me._sub
436 ###--------------------------------------------------------------------------
437 ### Some slightly cheesy parsing machinery.
439 class ParseState (object):
441 A ParseState object keeps track of a parser's position in a file.
443 The `curr' slot contains the current line under consideration.
446 def __init__(me, file, text):
448 Initialize a ParseState object.
450 The FILE is a string naming the source file, and the TEXT is an iterator
451 over the file's lines.
455 me._it = iter(text.splitlines(True))
460 Advance the ParseState to the next line.
462 Sets `curr' to the next line, or to None if the input is exhausted.
464 try: me.curr = me._it.next()
465 except StopIteration: me.curr = None
470 Report a fatal error during parsing, attributing it to the current line.
472 die('%s:%d: %s' % (me._file, me._i, msg))
474 class token (object):
476 A token object has no interesting properties other than its identity.
479 def __init__(me, name):
480 """Initialize a new token, with the given NAME."""
483 """Return a description of the token, for diagnostic purposes."""
484 return '#<%s>' % me._name
486 ## Some magical tokens useful during parsing.
490 ## Regular expressions matching substitution tags.
491 R_SIMPLETAG = RX.compile(r'@ (\w+)', RX.VERBOSE)
492 R_COMPLEXTAG = RX.compile(r'@ { (\w+) ((?: : \w+)*) }', RX.VERBOSE)
494 ## A dictionary mapping operation names to functions which implement them.
499 Decorator for substitution operator functions.
501 Remember the operator in `OPMAP'; the operator's name is taken from FUNC's
502 name, removing a prefix `op_' if there is one.
504 An operator function is given the raw value as an argument and should
505 return the transformed value.
507 name = func.func_name
508 if name.startswith('op_'): name = name[3:]
514 """@{COLUMN:u} -- the item in upper case."""
519 """@{COLUMN:l} -- the item in upper case."""
524 """@{COLUMN:f} -- the item, with `/' characters replaced by `-'."""
525 return val.replace('/', '-')
527 R_NOTIDENT = RX.compile(r'[^a-zA-Z0-9_]+')
531 @{COLUMN:c} -- the item, with non-alphanumeric sequences replaced with `_'.
533 return R_NOTIDENT.sub('_', val)
537 Split VAL into two, at an `=' sign.
539 If VAL has the form `THIS=THAT' then return the pair (THIS, THAT);
540 otherwise return (VAL, VAL).
543 if c >= 0: return val[:c], val[c + 1:]
544 else: return val, val
548 """@{COLUMN:left} -- the left-hand side of the item."""
549 return _pairify(val)[0]
552 """@{COLUMN:right} -- the left-hand side of the item."""
553 return _pairify(val)[1]
557 Parse a chunk of text from a ParseState.
559 Stop when we get to something which looks like a template keyword, but
560 extract tags. Return the resulting template.
562 Tags have the form `@COLUMN', or `@{COLUMN:OPERATOR:...}'. The text may
563 contain comments beginning `%#', which are ignored, and lines beginning
564 `%%' which have the initial `%' removed and are otherwise treated as normal
565 text (and, in particular, may contain tags). Other lines beginning with
566 `%' are directives and must be processed by our caller.
569 ## Starting out: no templates collected, and an empty buffer of literal
575 ## Spill accumulated literal text from `lit' into a LiteralTemplate
578 if l: tt.append(LiteralTemplate(l))
582 ## Iterate over the lines of input.
586 ## Stop if there's no more text; handle lines beginning with `%'.
587 if line is None: break
588 elif line.startswith('%'):
589 if line.startswith('%#'): ps.step(); continue
590 elif line.startswith('%%'): line = line[1:]
593 ## Work through the line, finding tags.
597 ## If there are no more `@' signs, there can be no more tags, and we're
599 j = line.find('@', i)
602 ## Write the chunk we've found.
605 ## If the next character is also `@' then this is an escape and we
607 if line[j:].startswith('@@'):
612 ## Parse the tag into a column name, and maybe some operators.
613 m = R_SIMPLETAG.match(line, j)
614 if not m: m = R_COMPLEXTAG.match(line, j)
615 if not m: ps.error('invalid tag')
617 try: rel, i = COLMAP[col]
618 except KeyError: ps.error("unknown column `%s'" % col)
619 ops = m.lastindex >= 2 and m.group(2)
621 ## If we have operators then look them up and compose them.
624 for opname in ops[1:].split(':'):
625 try: op = OPMAP[opname]
626 except KeyError: ps.error("unknown operation `%s'" % opname)
627 if wholeop is None: wholeop = op
628 else: wholeop = (lambda f, g: lambda x: f(g(x)))(op, wholeop)
630 ## Emit a LiteralTemplate for the accumulated text, and a TagTemplate
633 tt.append(TagTemplate(rel, i, wholeop))
635 ## Continue from after the tag.
638 ## Finished a line. Write out the remainder of the line and move onto
643 ## Run out of things to do. Flush out the rest of the literal text and
644 ## combine the templates.
646 return SequenceTemplate(tt)
648 ## A dictionary mapping regular expressions to directive-processing functions.
653 Function decorator for template file directives.
655 Associate the regular expression RX with the function in `DIRECT'.
656 Directive functions are invoked as FUNC(PS, M), where PS is the ParseState,
657 and M is the match object resulting from matching RX against the directive
661 DIRECT.append((RX.compile(rx, RX.VERBOSE), func))
665 def parse_template(ps):
667 Parse a single template from the ParseState PS.
669 A single template is either a chunk of text (parsed by `parse_text') or a
670 directive (handled by the appropriate function in `DIRECT').
672 Returns either a template object, or a special token. In particular, `EOF'
673 is returned if we run out of text; directives may return other tokens.
676 ## Skip initial comments. Otherwise we might end up with an empty
677 ## SequenceTemplate here.
678 while ps.curr is not None and ps.curr.startswith('%#'): ps.step()
680 ## If we've run out of input, return `EOF' here. A line beginning `%%', or
681 ## not beginning `%', means we've found a chunk of text. Otherwise find
682 ## the right directive handler.
683 if ps.curr is None: return EOF
684 elif ps.curr.startswith('%'):
685 if ps.curr.startswith('%%'): return parse_text(ps)
686 for rx, func in DIRECT:
687 line = ps.curr[1:].strip()
692 ps.error("unrecognized directive")
694 return parse_text(ps)
696 def parse_templseq(ps, nestp):
698 Parse a sequence of templates from the ParseState PS.
700 Calls `parse_template' repeatedly If NESTP is true, then an `END' token
701 (presumably from a directive handler) is permitted and halts parsing;
702 otherwise `END' signifies an error.
704 Returns a template object.
709 t = parse_template(ps)
712 else: ps.error("unexpected `end' directive")
714 if nestp: ps.error("unexpected end of file")
717 return SequenceTemplate(tt)
720 def dir_repeat(ps, m):
726 Iterate the body over the cartesian product of the relations mentioned
729 return RepeatTemplate(parse_templseq(ps, True))
733 """%end -- an end marker used to delimet chunks of template."""
736 def compile_template(file, text):
738 Compile TEXT into a template, attributing errors to FILE.
740 ps = ParseState(file, text)
741 t = parse_templseq(ps, False)
744 ###--------------------------------------------------------------------------
747 op = OP.OptionParser(
748 description = 'Generates files by filling in simple templates',
749 usage = 'usage: %prog {-l | -g TMPL} FILE [COL,...=VAL,... ... | @FILE:COL,...] ...',
750 version = 'Catacomb version @VERSION@')
751 def cb_gen(opt, optstr, arg, op):
752 op.values.input = arg
753 op.values.mode = 'gen'
754 for short, long, kw in [
755 ('-l', '--list', dict(
756 action = 'store_const', const = 'list', dest = 'mode',
757 help = 'list filenames generated')),
758 ('-g', '--generate', dict(
759 action = 'callback', metavar = 'TEMPLATE',
760 callback = cb_gen, type = 'string',
761 help = 'generate file(s) from TEMPLATE file'))]:
762 op.add_option(short, long, **kw)
763 op.set_defaults(mode = 'what?')
764 opts, args = op.parse_args()
766 if len(args) < 1: op.error('missing FILE')
768 for rel in args[1:]: read_thing(rel)
769 filetempl = compile_template('<output>', filepat)
771 def filenames(filetempl):
773 Generate the filenames in the compiled filename template FILETEMPL.
776 rr = filetempl.relations()
778 if not len(r): return
782 filetempl.subst(out, cs)
783 yield out.getvalue(), cs
784 if not cs.step(): break
788 if opts.mode == 'list':
789 for file, cs in filenames(filetempl): print file
790 elif opts.mode == 'gen':
791 with open(opts.input) as f:
792 templ = RepeatTemplate(compile_template(opts.input, f.read()))
793 for file, cs in filenames(filetempl):
795 with open(new, 'w') as out:
799 die('What am I doing here?')
801 ###----- That's all, folks --------------------------------------------------