+### -*-python-*-
+###
+### String formatting, with bells, whistles, and gongs
+###
+### (c) 2013 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This file is part of Chopwood: a password-changing service.
+###
+### Chopwood is free software; you can redistribute it and/or modify
+### it under the terms of the GNU Affero General Public License as
+### published by the Free Software Foundation; either version 3 of the
+### License, or (at your option) any later version.
+###
+### Chopwood is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+### GNU Affero General Public License for more details.
+###
+### You should have received a copy of the GNU Affero General Public
+### License along with Chopwood; if not, see
+### <http://www.gnu.org/licenses/>.
+
+from __future__ import with_statement
+
+import contextlib as CTX
+import re as RX
+from cStringIO import StringIO
+import sys as SYS
+
+import util as U
+
+###--------------------------------------------------------------------------
+### A quick guide to the formatting machinery.
+###
+### This is basically a re-implementation of Common Lisp's FORMAT function in
+### Python. It differs in a few respects.
+###
+### * Most essentially, Python's object and argument-passing models aren't
+### the same as Lisp's. In fact, for our purposes, they're a bit better:
+### Python's sharp distinction between positional and keyword arguments
+### is often extremely annoying, but here they become a clear benefit.
+### Inspired by Python's own enhanced string-formatting machinery (the
+### new `str.format' method, and `string.Formatting' class, we provide
+### additional syntax to access keyword arguments by name, positional
+### arguments by position (without moving the cursor as manipulated by
+### `~*'), and for selecting individual elements of arguments by indexing
+### or attribute lookup.
+###
+### * Unfortunately, Python's I/O subsystem is much less rich than Lisp's.
+### We lack streams which remember their cursor position, and so can't
+### implmenent the `?&' (fresh line) or `~T' (horizontal tab) operators
+### usefully. Moreover, the Python pretty-printer is rather less well
+### developed than the XP-based Lisp pretty-printer, so the pretty-
+### printing operations are unlikely to be implemented any time soon.
+###
+### * This implementation is missing a number of formatting directives just
+### because they're somewhat tedious to write, such as the detailed
+### floating-point printing provided by `~E', `~F' and `~G'. These might
+### appear in time.
+###
+### Formatting takes place in two separable stages. First, a format string
+### is compiled into a formatting operation. Then, the formatting operation
+### can be applied to sets of arguments. State for these two stages is
+### maintained in fluid variable sets `COMPILE' and `FORMAT'.
+###
+### There are a number of protocols involved in making all of this work.
+### They're described in detail as we come across them, but here's an
+### overview.
+###
+### * Output is determined by formatting-operation objects, typically (but
+### not necessarily) subclasses of `BaseFormatOperation'. A format
+### string is compiled into a single compound formatting operation.
+###
+### * Formatting operations determine what to output from their own
+### internal state and from formatting arguments. The latter are
+### collected from argument-collection objects which are subclasses of
+### `BaseArg'.
+###
+### * Formatting operations can be modified using parameters, which are
+### supplied either through the format string or from arguments. To
+### abstract over this distinction, parameters are collected from
+### parameter-collection objects which are subclasses of `BaseParameter'.
+
+FORMAT = U.Fluid()
+## State for format-time processing. The base state is established by the
+## `format' function, though various formatting operations will rebind
+## portions of the state while they perform recursive processing. The
+## variables are as follows.
+##
+## argmap The map (typically a dictionary) of keyword arguments to be
+## formatted. These can be accessed only though `=KEY' or
+## `!KEY' syntax.
+##
+## argpos The index of the next positional argument to be collected.
+## The `~*' directive works by setting this variable.
+##
+## argseq The sequence (typically a list) of positional arguments to be
+## formatted. These are collected in order (as modified by the
+## `~*' directive), or may be accessed through `=INDEX' or
+## `!INDEX' syntax.
+##
+## escape An escape procedure (i.e., usually created by `Escape()') to
+## be called by `~^'.
+##
+## last_multi_p A boolean, indicating that there are no more lists of
+## arguments (e.g., from `~:{...~}'), so `~:^' should escape if
+## it is encountered.
+##
+## multi_escape An escape procedure (i.e., usually created by `Escape()') to
+## be called by `~:^'.
+##
+## pushback Some formatting operations, notably `~@[...~]', read
+## arguments without consuming them, so a subsequent operation
+## should collect the same argument. This works by pushing the
+## arguments onto the `pushback' list.
+##
+## write A function which writes its single string argument to the
+## current output.
+
+COMPILE = U.Fluid()
+## State for compile-time processing. The base state is established by the
+## `compile' function, though some formatting operations will rebind portions
+## of the state while they perform recursive processing. The variables are
+## as follows.
+##
+## control The control string being parsed.
+##
+## delim An iterable (usually a string) of delimiter directives. See
+## the `FormatDelimeter' class and the `collect_subformat'
+## function for details of this.
+##
+## end The end of the portion of the control string being parsed.
+## There might be more of the string, but we should pretend that
+## it doesn't exist.
+##
+## opmaps A list of operation maps, i.e., dictionaries mapping
+## formatting directive characters to the corresponding
+## formatting operation classes. The list is searched in order,
+## and the first match is used. This can be used to provide
+## local extensions to the formatting language.
+##
+## start The current position in the control string. This is advanced
+## as pieces of the string are successfully parsed.
+
+###--------------------------------------------------------------------------
+### A few random utilities.
+
+def remaining():
+ """
+ Return the number of positional arguments remaining.
+
+ This will /include/ pushed-back arguments, so this needn't be monotonic
+ even in the absence of `~*' repositioning.
+ """
+ return len(FORMAT.pushback) + len(FORMAT.argseq) - FORMAT.argpos
+
+@CTX.contextmanager
+def bind_args(args, **kw):
+ """
+ Context manager: temporarily establish a different collection of arguments.
+
+ If the ARGS have a `keys' attribute, then they're assumed to be a mapping
+ object and are set as the keyword arguments, preserving the positional
+ arguments; otherwise, the positional arguments are set and the keyword
+ arguments are preserved.
+
+ Other keyword arguments to this function are treated as additional `FORMAT'
+ variables to be bound.
+ """
+ if hasattr(args, 'keys'):
+ with FORMAT.bind(argmap = args, **kw): yield
+ else:
+ with FORMAT.bind(argseq = args, argpos = 0, pushback = [], **kw): yield
+
+## Some regular expressions for parsing things.
+R_INT = RX.compile(r'[-+]?[0-9]+')
+R_WORD = RX.compile(r'[_a-zA-Z][_a-zA-Z0-9]*')
+
+###--------------------------------------------------------------------------
+### Format string errors.
+
+class FormatStringError (Exception):
+ """
+ An exception type for reporting errors in format control strings.
+
+ Its most useful feature is that it points out where the error is in a
+ vaguely useful way. Attributes are as follows.
+
+ control The offending format control string.
+
+ msg The error message, as a human-readable string.
+
+ pos The position at which the error was discovered. This might
+ be a little way from the actual problem, but it's usually
+ good enough.
+ """
+
+ def __init__(me, msg, control, pos):
+ """
+ Construct the exception, given a message MSG, a format CONTROL string,
+ and the position POS at which the error was found.
+ """
+ me.msg = msg
+ me.control = control
+ me.pos = pos
+
+ def __str__(me):
+ """
+ Present a string explaining the problem, including a dump of the
+ offending portion of the string.
+ """
+ s = me.control.rfind('\n', 0, me.pos) + 1
+ e = me.control.find('\n', me.pos)
+ if e < 0: e = len(me.control)
+ return '%s\n %s\n %*s^\n' % \
+ (me.msg, me.control[s:e], me.pos - s, '')
+
+def format_string_error(msg):
+ """Report an error in the current format string."""
+ raise FormatStringError(msg, COMPILE.control, COMPILE.start)
+
+###--------------------------------------------------------------------------
+### Argument collection protocol.
+
+## Argument collectors abstract away the details of collecting formatting
+## arguments. They're used both for collecting arguments to be output, and
+## for parameters designated using the `v' or `!ARG' syntaxes.
+##
+## There are a small number of primitive collectors, and some `compound
+## collectors' which read an argument using some other collector, and then
+## process it in some way.
+##
+## An argument collector should implement the following methods.
+##
+## get() Return the argument variable.
+##
+## pair() Return a pair of arguments.
+##
+## tostr(FORCEP)
+## Return a string representation of the collector. If FORCEP,
+## always return a string; otherwise, a `NextArg' collector
+## returns `None' to indicate that no syntax is required to
+## select it.
+
+class BaseArg (object):
+ """
+ Base class for argument collectors.
+
+ This implements the `pair' method by calling `get' and hoping that the
+ corresponding argument is indeed a sequence of two items.
+ """
+
+ def __init__(me):
+ """Trivial constructor."""
+ pass
+
+ def pair(me):
+ """
+ Return a pair of arguments, by returning an argument which is a pair.
+ """
+ return me.get()
+
+ def __repr__(me):
+ """Print a useful string representation of the collector."""
+ return '#<%s "=%s">' % (type(me).__name__, me.tostr(True))
+
+class NextArg (BaseArg):
+ """The default argument collector."""
+
+ def get(me):
+ """
+ Return the next argument.
+
+ If there are pushed-back arguments, then return the one most recently
+ pushed back. Otherwise, return the next argument from `argseq',
+ advancing `argpos'.
+ """
+ if FORMAT.pushback: return FORMAT.pushback.pop()
+ i = FORMAT.argpos
+ a = FORMAT.argseq[i]
+ FORMAT.argpos = i + 1
+ return a
+
+ def pair(me):
+ """Return a pair of arguments, by fetching two separate arguments."""
+ left = me.get()
+ right = me.get()
+ return left, right
+
+ def tostr(me, forcep):
+ """Convert the default collector to a string."""
+ if forcep: return '+'
+ else: return None
+
+NEXTARG = NextArg()
+## Because a `NextArg' collectors are used so commonly, and they're all the
+## same, we make a distinguished one and try to use that instead. Nothing
+## goes badly wrong if you don't use this, but you'll use more memory than
+## strictly necessary.
+
+class ThisArg (BaseArg):
+ """Return the current positional argument without consuming it."""
+ def _get(me, i):
+ """Return the positional argument I on from the current position."""
+ n = len(FORMAT.pushback)
+ if n > i: return FORMAT.pushback[n - i - 1]
+ else: return FORMAT.argseq[FORMAT.argpos + i - n]
+ def get(me):
+ """Return the next argument."""
+ return me._get(0)
+ def pair(me):
+ """Return the next two arguments without consuming either."""
+ return me._get(0), me._get(1)
+ def tostr(me, forcep):
+ """Convert the colector to a string."""
+ return '@'
+
+THISARG = ThisArg()
+
+class SeqArg (BaseArg):
+ """
+ A primitive collector which picks out the positional argument at a specific
+ index.
+ """
+ def __init__(me, index): me.index = index
+ def get(me): return FORMAT.argseq[me.index]
+ def tostr(me, forcep): return '%d' % me.index
+
+class MapArg (BaseArg):
+ """
+ A primitive collector which picks out the keyword argument with a specific
+ key.
+ """
+ def __init__(me, key): me.key = key
+ def get(me): return FORMAT.argmap[me.key]
+ def tostr(me, forcep): return '%s' % me.key
+
+class IndexArg (BaseArg):
+ """
+ A compound collector which indexes an argument.
+ """
+ def __init__(me, base, index):
+ me.base = base
+ me.index = index
+ def get(me):
+ return me.base.get()[me.index]
+ def tostr(me, forcep):
+ return '%s[%s]' % (me.base.tostr(True), me.index)
+
+class AttrArg (BaseArg):
+ """
+ A compound collector which returns an attribute of an argument.
+ """
+ def __init__(me, base, attr):
+ me.base = base
+ me.attr = attr
+ def get(me):
+ return getattr(me.base.get(), me.attr)
+ def tostr(me, forcep):
+ return '%s.%s' % (me.base.tostr(True), me.attr)
+
+## Regular expression matching compound-argument suffixes.
+R_REF = RX.compile(r'''
+ \[ ( [-+]? [0-9]+ ) \]
+ | \[ ( [^]]* ) \]
+ | \. ( [_a-zA-Z] [_a-zA-Z0-9]* )
+''', RX.VERBOSE)
+
+def parse_arg():
+ """
+ Parse an argument collector from the current format control string.
+
+ The syntax of an argument is as follows.
+
+ ARG ::= COMPOUND-ARG | `{' COMPOUND-ARG `}'
+
+ COMPOUND-ARG ::= SIMPLE-ARG
+ | COMPOUND-ARG `[' INDEX `]'
+ | COMPOUND-ARG `.' WORD
+
+ SIMPLE-ARG ::= INT | WORD | `+' | `@'
+
+ Surrounding braces mean nothing, but may serve to separate the argument
+ from a following alphabetic formatting directive.
+
+ A `+' means `the next pushed-back or positional argument'. It's useful to
+ be able to say this explicitly so that indexing and attribute references
+ can be attached to it: for example, in `~={thing}@[~={+.attr}A~]'.
+
+ An integer argument selects the positional argument with that index; a
+ negative index counts backwards from the end, as is usual in Python.
+
+ A word argument selects the keyword argument with that key.
+ """
+
+ c = COMPILE.control
+ s, e = COMPILE.start, COMPILE.end
+
+ ## If it's delimited then pick through the delimiter.
+ brace = None
+ if s < e and c[s] == '{':
+ brace = '}'
+ s += 1
+
+ ## Make sure there's something to look at.
+ if s >= e: raise FormatStringError('missing argument specifier', c, s)
+
+ ## Find the start of the breadcrumbs.
+ if c[s] == '+':
+ getarg = NEXTARG
+ s += 1
+ if c[s] == '@':
+ getarg = THISARG
+ s += 1
+ elif c[s].isdigit():
+ m = R_INT.match(c, s, e)
+ getarg = SeqArg(int(m.group()))
+ s = m.end()
+ else:
+ m = R_WORD.match(c, s, e)
+ if not m: raise FormatStringError('unknown argument specifier', c, s)
+ getarg = MapArg(m.group())
+ s = m.end()
+
+ ## Now parse indices and attribute references.
+ while True:
+ m = R_REF.match(c, s, e)
+ if not m: break
+ if m.group(1): getarg = IndexArg(getarg, int(m.group(1)))
+ elif m.group(2): getarg = IndexArg(getarg, m.group(2))
+ elif m.group(3): getarg = AttrArg(getarg, m.group(3))
+ else: raise FormatStringError('internal error (weird ref)', c, s)
+ s = m.end()
+
+ ## Finally, check that we have the close delimiter we want.
+ if brace:
+ if s >= e or c[s] != brace:
+ raise FormatStringError('missing close brace', c, s)
+ s += 1
+
+ ## Done.
+ COMPILE.start = s
+ return getarg
+
+###--------------------------------------------------------------------------
+### Parameter collectors.
+
+## These are pretty similar in shape to argument collectors. The required
+## methods are as follows.
+##
+## get() Return the parameter value.
+##
+## tostr() Return a string representation of the collector. (We don't
+## need a FORCEP argument here, because there are no default
+## parameters.)
+
+class BaseParameter (object):
+ """
+ Base class for parameter collector objects.
+
+ This isn't currently very useful, because all it provides is `__repr__',
+ but the protocol might get more complicated later.
+ """
+ def __init__(me): pass
+ def __repr__(me): return '#<%s "%s">' % (type(me).__name__, me.tostr())
+
+class LiteralParameter (BaseParameter):
+ """
+ A literal parameter, parsed from the control string.
+ """
+ def __init__(me, lit): me.lit = lit
+ def get(me): return me.lit
+ def tostr(me):
+ if me.lit is None: return ''
+ elif isinstance(me.lit, (int, long)): return str(me.lit)
+ else: return "'%c" % me.lit
+
+## Many parameters are omitted, so let's just reuse a distinguished collector
+## for them.
+LITNONE = LiteralParameter(None)
+
+class RemainingParameter (BaseParameter):
+ """
+ A parameter which collects the number of remaining positional arguments.
+ """
+ def get(me): return remaining()
+ def tostr(me): return '#'
+
+## These are all the same, so let's just have one of them.
+REMAIN = RemainingParameter()
+
+class VariableParameter (BaseParameter):
+ """
+ A variable parameter, fetched from an argument.
+ """
+ def __init__(me, arg): me.arg = arg
+ def get(me): return me.arg.get()
+ def tostr(me):
+ s = me.arg.tostr(False)
+ if not s: return 'V'
+ else: return '!' + s
+VARNEXT = VariableParameter(NEXTARG)
+
+###--------------------------------------------------------------------------
+### Formatting protocol.
+
+## The formatting operation protocol is pretty straightforward. An operation
+## must implement a method `format' which takes no arguments, and should
+## produce its output (if any) by calling `FORMAT.write'. In the course of
+## its execution, it may collect parameters and arguments.
+##
+## The `opmaps' table maps formatting directives (which are individual
+## characters, in upper-case for letters) to functions returning formatting
+## operation objects. All of the directives are implemented in this way.
+## The functions for the base directives are actually the (callable) class
+## objects for subclasses of `BaseFormatOperation', though this isn't
+## necessary.
+##
+## The constructor functions are called as follows:
+##
+## FUNC(ATP, COLONP, GETARG, PARAMS, CHAR)
+## The ATP and COLONP arguments are booleans indicating respectively
+## whether the `@' and `:' modifiers were set in the control string.
+## GETARG is the collector for the operation's argument(s). The PARAMS
+## are a list of parameter collectors. Finally, CHAR is the directive
+## character (so directives with siilar behaviour can use the same
+## class).
+
+class FormatLiteral (object):
+ """
+ A special formatting operation for printing literal text.
+ """
+ def __init__(me, s): me.s = s
+ def __repr__(me): return '#<%s %r>' % (type(me).__name__, me.s)
+ def format(me): FORMAT.write(me.s)
+
+class FormatSequence (object):
+ """
+ A special formatting operation for applying collection of other operations
+ in sequence.
+ """
+ def __init__(me, seq):
+ me.seq = seq
+ def __repr__(me):
+ return '#<%s [%s]>' % (type(me).__name__,
+ ', '.join(repr(p) for p in me.seq))
+ def format(me):
+ for p in me.seq: p.format()
+
+class BaseFormatOperation (object):
+ """
+ The base class for built-in formatting operations (and, probably, most
+ extensions).
+
+ Subclasses should implement a `_format' method.
+
+ _format(ATP, COLONP, [PARAM = DEFAULT, ...])
+ Called to produce output. The ATP and COLONP flags are from
+ the constructor. The remaining function arguments are the
+ computed parameter values. Arguments may be collected using
+ the `getarg' attribute.
+
+ Subclasses can set class attributes to influence the constructor.
+
+ MINPARAM The minimal number of parameters acceptable. If fewer
+ parameters are supplied then an error is reported at compile
+ time. The default is zero.
+
+ MAXPARAM The maximal number of parameters acceptable. If more
+ parameters are supplied then an error is reported at compile
+ time. The default is zero; `None' means that there is no
+ maximum (but this is unusual).
+
+ Instances have a number of useful attributes.
+
+ atp True if an `@' modifier appeared in the directive.
+
+ char The directive character from the control string.
+
+ colonp True if a `:' modifier appeared in the directive.
+
+ getarg Argument collector; may be called by `_format'.
+
+ params A list of parameter collector objects.
+ """
+
+ ## Default bounds on parameters.
+ MINPARAM = MAXPARAM = 0
+
+ def __init__(me, atp, colonp, getarg, params, char):
+ """
+ Constructor: store information about the directive, and check the bounds
+ on the parameters.
+
+ A subclass should call this before doing anything fancy such as parsing
+ the control string further.
+ """
+
+ ## Store information.
+ me.atp = atp
+ me.colonp = colonp
+ me.getarg = getarg
+ me.params = params
+ me.char = char
+
+ ## Check the parameters.
+ bad = False
+ if len(params) < me.MINPARAM: bad = True
+ elif me.MAXPARAM is not None and len(params) > me.MAXPARAM: bad = True
+ if bad:
+ format_string_error('bad parameters')
+
+ def format(me):
+ """Produce output: call the subclass's formatting function."""
+ me._format(me.atp, me.colonp, *[p.get() for p in me.params])
+
+ def tostr(me):
+ """Convert the operation to a directive string."""
+ return '~%s%s%s%s%s' % (
+ ','.join(a.tostr() for a in me.params),
+ me.colonp and ':' or '',
+ me.atp and '@' or '',
+ (lambda s: s and '={%s}' % s or '')(me.getarg.tostr(False)),
+ me.char)
+
+ def __repr__(me):
+ """Produce a readable (ahem) version of the directive."""
+ return '#<%s "%s">' % (type(me).__name__, me.tostr())
+
+class FormatDelimiter (BaseFormatOperation):
+ """
+ A fake formatting operation which exists to impose additional syntactic
+ structure on control strings.
+
+ No `_format' method is actually defined, so `FormatDelimiter' objects
+ should never find their way into the output pipeline. Instead, they are
+ typically useful in conjunction with the `collect_subformat' function. To
+ this end, the constructor will fail if its directive character is not in
+ listed as an expected delimiter in `CONTROL.delim'.
+ """
+
+ def __init__(me, *args):
+ """
+ Constructor: make sure this delimiter is expected in the current context.
+ """
+ super(FormatDelimiter, me).__init__(*args)
+ if me.char not in COMPILE.delim:
+ format_string_error("unexpected close delimiter `~%s'" % me.char)
+
+###--------------------------------------------------------------------------
+### Parsing format strings.
+
+def parse_operator():
+ """
+ Parse the next portion of the current control string and return a single
+ formatting operator for it.
+
+ If we have reached the end of the control string (as stored in
+ `CONTROL.end') then return `None'.
+ """
+
+ c = COMPILE.control
+ s, e = COMPILE.start, COMPILE.end
+
+ ## If we're at the end then stop.
+ if s >= e: return None
+
+ ## If there's some literal text then collect it.
+ if c[s] != '~':
+ i = c.find('~', s, e)
+ if i < 0: i = e
+ COMPILE.start = i
+ return FormatLiteral(c[s:i])
+
+ ## Otherwise there's a formatting directive to collect.
+ s += 1
+
+ ## First, collect arguments.
+ aa = []
+ while True:
+ if s >= e: break
+ if c[s] == ',':
+ aa.append(LITNONE)
+ s += 1
+ continue
+ elif c[s] == "'":
+ s += 1
+ if s >= e: raise FormatStringError('missing argument character', c, s)
+ aa.append(LiteralParameter(c[s]))
+ s += 1
+ elif c[s].upper() == 'V':
+ s += 1
+ aa.append(VARNEXT)
+ elif c[s] == '!':
+ COMPILE.start = s + 1
+ getarg = parse_arg()
+ s = COMPILE.start
+ aa.append(VariableParameter(getarg))
+ elif c[s] == '#':
+ s += 1
+ aa.append(REMAIN)
+ else:
+ m = R_INT.match(c, s, e)
+ if not m: break
+ aa.append(LiteralParameter(int(m.group())))
+ s = m.end()
+ if s >= e or c[s] != ',': break
+ s += 1
+
+ ## Maybe there's an explicit argument.
+ if s < e and c[s] == '=':
+ COMPILE.start = s + 1
+ getarg = parse_arg()
+ s = COMPILE.start
+ else:
+ getarg = NEXTARG
+
+ ## Next, collect the flags.
+ atp = colonp = False
+ while True:
+ if s >= e:
+ break
+ elif c[s] == '@':
+ if atp: raise FormatStringError('duplicate at flag', c, s)
+ atp = True
+ elif c[s] == ':':
+ if colonp: raise FormatStringError('duplicate colon flag', c, s)
+ colonp = True
+ else:
+ break
+ s += 1
+
+ ## We should now have a directive character.
+ if s >= e: raise FormatStringError('missing directive', c, s)
+ ch = c[s].upper()
+ op = None
+ for map in COMPILE.opmaps:
+ try: op = map[ch]
+ except KeyError: pass
+ else: break
+ else:
+ raise FormatStringError('unknown directive', c, s)
+ s += 1
+
+ ## Done.
+ COMPILE.start = s
+ return op(atp, colonp, getarg, aa, ch)
+
+def collect_subformat(delim):
+ """
+ Parse formatting operations from the control string until we find one whose
+ directive character is listed in DELIM.
+
+ Where an operation accepts multiple sequences of formatting directives, the
+ first element of DELIM should be the proper closing delimiter. The
+ traditional separator is `~;'.
+ """
+ pp = []
+ with COMPILE.bind(delim = delim):
+ while True:
+ p = parse_operator()
+ if not p:
+ format_string_error("missing close delimiter `~%s'" % delim[0])
+ if isinstance(p, FormatDelimiter) and p.char in delim: break
+ pp.append(p)
+ return FormatSequence(pp), p
+
+def compile(control):
+ """
+ Parse the whole CONTROL string, returning the corresponding formatting
+ operator.
+ """
+ pp = []
+ with COMPILE.bind(control = control, start = 0, end = len(control),
+ delim = ''):
+ while True:
+ p = parse_operator()
+ if not p: break
+ pp.append(p)
+ return FormatSequence(pp)
+
+###--------------------------------------------------------------------------
+### Formatting text.
+
+def format(out, control, *args, **kw):
+ """
+ Format the positional args and keywords according to the CONTROL, and write
+ the result to OUT.
+
+ The output is written to OUT, which may be one of the following.
+
+ `True' Write to standard output.
+
+ `False' Write to standard error.
+
+ `None' Return the output as a string.
+
+ Any object with a `write' attribute
+ Call `write' repeatedly with strings to be output.
+
+ Any callable object
+ Call the object repeatedly with strings to be output.
+
+ The CONTROL argument may be one of the following.
+
+ A string or unicode object
+ Compile the string into a formatting operation and use that.
+
+ A formatting operation
+ Apply the operation to the arguments.
+ """
+
+ ## Turn the output argument into a function which we can use easily. If
+ ## we're writing to a string, we'll have to extract the result at the end,
+ ## so keep track of anything we have to do later.
+ final = U.constantly(None)
+ if out is True:
+ write = SYS.stdout.write
+ elif out is False:
+ write = SYS.stderr.write
+ elif out is None:
+ strio = StringIO()
+ write = strio.write
+ final = strio.getvalue
+ elif hasattr(out, 'write'):
+ write = out.write
+ elif callable(out):
+ write = out
+ else:
+ raise TypeError, out
+
+ ## Turn the control argument into a formatting operation.
+ if isinstance(control, basestring):
+ op = compile(control)
+ else:
+ op = control
+
+ ## Invoke the formatting operation in the correct environment.
+ with FORMAT.bind(write = write, pushback = [],
+ argseq = args, argpos = 0,
+ argmap = kw):
+ op.format()
+
+ ## Done.
+ return final()
+
+###--------------------------------------------------------------------------
+### Standard formatting directives.
+
+## A dictionary, in which we'll build the basic set of formatting operators.
+## Callers wishing to implement extensions should include this in their
+## `opmaps' lists.
+BASEOPS = {}
+COMPILE.opmaps = [BASEOPS]
+
+## Some standard delimiter directives.
+for i in [']', ')', '}', '>', ';']: BASEOPS[i] = FormatDelimiter
+
+class SimpleFormatOperation (BaseFormatOperation):
+ """
+ Common base class for the `~A' (`str') and `~S' (`repr') directives.
+
+ These take similar parameters, so it's useful to deal with them at the same
+ time. Subclasses should implement a method `_convert' of one argument,
+ which returns a string to be formatted.
+
+ The parameters are as follows.
+
+ MINCOL The minimum number of characters to output. Padding is added
+ if the output string is shorter than this.
+
+ COLINC Lengths of padding groups. The number of padding characters
+ will be MINPAD more than a multiple of COLINC.
+
+ MINPAD The smallest number of padding characters to write.
+
+ PADCHAR The padding character.
+
+ If the `@' modifier is given, then padding is applied on the left;
+ otherwise it is applied on the right.
+ """
+
+ MAXPARAM = 4
+
+ def _format(me, atp, colonp,
+ mincol = 0, colinc = 1, minpad = 0, padchar = ' '):
+ what = me._convert(me.getarg.get())
+ n = len(what)
+ p = mincol - n - minpad + colinc - 1
+ p -= p%colinc
+ if p < 0: p = 0
+ p += minpad
+ if p <= 0: pass
+ elif atp: what = (p * padchar) + what
+ else: what = what + (p * padchar)
+ FORMAT.write(what)
+
+class FormatString (SimpleFormatOperation):
+ """~A: convert argument to a string."""
+ def _convert(me, arg): return str(arg)
+BASEOPS['A'] = FormatString
+
+class FormatRepr (SimpleFormatOperation):
+ """~S: convert argument to readable form."""
+ def _convert(me, arg): return repr(arg)
+BASEOPS['S'] = FormatRepr
+
+class IntegerFormat (BaseFormatOperation):
+ """
+ Common base class for the integer formatting directives `~D', `~B', `~O~,
+ `~X', and `~R'.
+
+ These take similar parameters, so it's useful to deal with them at the same
+ time. There is a `_convert' method which does the main work. By default,
+ `_format' calls this with the argument and the value of the class attribute
+ `RADIX'; complicated subclasses might want to override this behaviour.
+
+ The parameters are as follows.
+
+ MINCOL Minimum column width. If the output is smaller than this
+ then it will be padded on the left. The default is 0.
+
+ PADCHAR Character to use to pad the output, should this be necessary.
+ The default is space.
+
+ COMMACHAR If the `:' modifier is present, then use this character to
+ separate groups of digits. The default is `,'.
+
+ COMMAINTERVAL If the `:' modifier is present, then separate groups of this
+ many digits. The default is 3.
+
+ If `@' is present, then a sign is always written; otherwise only `-' signs
+ are written.
+ """
+
+ MAXPARAM = 4
+
+ def _convert(me, n, radix, atp, colonp,
+ mincol = 0, padchar = ' ',
+ commachar = ',', commainterval = 3):
+ """
+ Convert the integer N into the given RADIX, under the control of the
+ formatting parameters supplied.
+ """
+
+ ## Sort out the sign. We'll deal with it at the end: for now it's just a
+ ## distraction.
+ if n < 0: sign = '-'; n = -n
+ elif atp: sign = '+'
+ else: sign = None
+
+ ## Build in `dd' a list of the digits, in reverse order. This will make
+ ## the commafication easier later. The general radix conversion is
+ ## inefficient but we can make that better later.
+ def revdigits(s):
+ l = list(s)
+ l.reverse()
+ return l
+ if radix == 10: dd = revdigits(str(n))
+ elif radix == 8: dd = revdigits(oct(n))
+ elif radix == 16: dd = revdigits(hex(n).upper())
+ else:
+ dd = []
+ while n:
+ q, r = divmod(n, radix)
+ if r < 10: ch = asc(ord('0') + r)
+ elif r < 36: ch = asc(ord('A') - 10 + r)
+ else: ch = asc(ord('a') - 36 + r)
+ dd.append(ch)
+ if not dd: dd.append('0')
+
+ ## If we must commafy then do that.
+ if colonp:
+ ndd = []
+ i = 0
+ for d in dd:
+ if i >= commainterval: ndd.append(commachar); i = 0
+ ndd.append(d)
+ dd = ndd
+
+ ## Include the sign.
+ if sign: dd.append(sign)
+
+ ## Maybe we must pad the result.
+ s = ''.join(reversed(dd))
+ npad = mincol - len(s)
+ if npad > 0: s = npad*padchar + s
+
+ ## And we're done.
+ FORMAT.write(s)
+
+ def _format(me, atp, colonp, mincol = 0, padchar = ' ',
+ commachar = ',', commainterval = 3):
+ me._convert(me.getarg.get(), me.RADIX, atp, colonp, mincol, padchar,
+ commachar, commainterval)
+
+class FormatDecimal (IntegerFormat):
+ """~D: Decimal formatting."""
+ RADIX = 10
+BASEOPS['D'] = FormatDecimal
+
+class FormatBinary (IntegerFormat):
+ """~B: Binary formatting."""
+ RADIX = 2
+BASEOPS['B'] = FormatBinary
+
+class FormatOctal (IntegerFormat):
+ """~O: Octal formatting."""
+ RADIX = 8
+BASEOPS['O'] = FormatOctal
+
+class FormatHex (IntegerFormat):
+ """~X: Hexadecimal formatting."""
+ RADIX = 16
+BASEOPS['X'] = FormatHex
+
+class FormatRadix (IntegerFormat):
+ """~R: General integer formatting."""
+ MAXPARAM = 5
+ def _format(me, atp, colonp, radix = None, mincol = 0, padchar = ' ',
+ commachar = ',', commainterval = 3):
+ if radix is None:
+ raise ValueError, 'Not implemented'
+ me._convert(me.getarg.get(), radix, atp, colonp, mincol, padchar,
+ commachar, commainterval)
+BASEOPS['R'] = FormatRadix
+
+class FormatSuppressNewline (BaseFormatOperation):
+ """
+ ~newline: suppressed newline and/or spaces.
+
+ Unless the `@' modifier is present, don't print the newline. Unless the
+ `:' modifier is present, don't print the following string of whitespace
+ characters either.
+ """
+ R_SPACE = RX.compile(r'\s*')
+ def __init__(me, *args):
+ super(FormatSuppressNewline, me).__init__(*args)
+ m = me.R_SPACE.match(COMPILE.control, COMPILE.start, COMPILE.end)
+ me.trail = m.group()
+ COMPILE.start = m.end()
+ def _format(me, atp, colonp):
+ if atp: FORMAT.write('\n')
+ if colonp: FORMAT.write(me.trail)
+BASEOPS['\n'] = FormatSuppressNewline
+
+class LiteralFormat (BaseFormatOperation):
+ """
+ A base class for formatting operations which write fixed strings.
+
+ Subclasses should have an attribute `CHAR' containing the string (usually a
+ single character) to be written.
+
+ These operations accept a single parameter:
+
+ COUNT The number of copies of the string to be written.
+ """
+ MAXPARAM = 1
+ def _format(me, atp, colonp, count = 1):
+ FORMAT.write(count * me.CHAR)
+
+class FormatNewline (LiteralFormat):
+ """~%: Start a new line."""
+ CHAR = '\n'
+BASEOPS['%'] = FormatNewline
+
+class FormatTilde (LiteralFormat):
+ """~~: Print a literal `@'."""
+ CHAR = '~'
+BASEOPS['~'] = FormatTilde
+
+class FormatCaseConvert (BaseFormatOperation):
+ """
+ ~(...~): Case-convert the contained output.
+
+ The material output by the contained directives is subject to case
+ conversion as follows.
+
+ no modifiers Convert to lower-case.
+ @ Make initial letter upper-case and remainder lower.
+ : Make initial letters of words upper-case.
+ @: Convert to upper-case.
+ """
+ def __init__(me, *args):
+ super(FormatCaseConvert, me).__init__(*args)
+ me.sub, _ = collect_subformat(')')
+ def _format(me, atp, colonp):
+ strio = StringIO()
+ try:
+ with FORMAT.bind(write = strio.write):
+ me.sub.format()
+ finally:
+ inner = strio.getvalue()
+ if atp:
+ if colonp: out = inner.upper()
+ else: out = inner.capitalize()
+ else:
+ if colonp: out = inner.title()
+ else: out = inner.lower()
+ FORMAT.write(out)
+BASEOPS['('] = FormatCaseConvert
+
+class FormatGoto (BaseFormatOperation):
+ """
+ ~*: Seek in positional arguments.
+
+ There may be a parameter N; the default value depends on which modifiers
+ are present. Without `@', skip forwards or backwards by N (default
+ 1) places; with `@', move to argument N (default 0). With `:', negate N,
+ so move backwards instead of forwards, or count from the end rather than
+ the beginning. (Exception: `~@:0*' leaves no arguments remaining, whereas
+ `~@-0*' is the same as `~@0*', and starts again from the beginning.
+
+ BUG: The list of pushed-back arguments is cleared.
+ """
+ MAXPARAM = 1
+ def _format(me, atp, colonp, n = None):
+ if atp:
+ if n is None: n = 0
+ if colonp:
+ if n > 0: n = -n
+ else: n = len(FORMAT.argseq)
+ if n < 0: n += len(FORMAT.argseq)
+ else:
+ if n is None: n = 1
+ if colonp: n = -n
+ n += FORMAT.argpos
+ FORMAT.argpos = n
+ FORMAT.pushback = []
+BASEOPS['*'] = FormatGoto
+
+class FormatConditional (BaseFormatOperation):
+ """
+ ~[...[~;...]...[~:;...]~]: Conditional formatting.
+
+ There are three variants, which are best dealt with separately.
+
+ With no modifiers, apply the Nth enclosed piece, where N is either the
+ parameter, or the argument if no parameter is provided. If there is no
+ such piece (i.e., N is negative or too large) and the final piece is
+ introduced by `~:;' then use that piece; otherwise produce no output.
+
+ With `:', there must be exactly two pieces: apply the first if the argument
+ is false, otherwise the second.
+
+ With `@', there must be exactly one piece: if the argument is not `None'
+ then push it back and apply the enclosed piece.
+ """
+
+ MAXPARAM = 1
+
+ def __init__(me, *args):
+
+ ## Store the arguments.
+ super(FormatConditional, me).__init__(*args)
+
+ ## Collect the pieces, and keep track of whether there's a default piece.
+ pieces = []
+ default = None
+ nextdef = False
+ while True:
+ piece, delim = collect_subformat('];')
+ if nextdef: default = piece
+ else: pieces.append(piece)
+ if delim.char == ']': break
+ if delim.colonp:
+ if default: format_string_error('multiple defaults')
+ nextdef = True
+
+ ## Make sure the syntax matches the modifiers we've been given.
+ if (me.colonp or me.atp) and default:
+ format_string_error('default not allowed here')
+ if (me.colonp and len(pieces) != 2) or \
+ (me.atp and len(pieces) != 1):
+ format_string_error('wrong number of pieces')
+
+ ## Store stuff.
+ me.pieces = pieces
+ me.default = default
+
+ def _format(me, atp, colonp, n = None):
+ if colonp:
+ arg = me.getarg.get()
+ if arg: me.pieces[1].format()
+ else: me.pieces[0].format()
+ elif atp:
+ arg = me.getarg.get()
+ if arg is not None:
+ FORMAT.pushback.append(arg)
+ me.pieces[0].format()
+ else:
+ if n is None: n = me.getarg.get()
+ if 0 <= n < len(me.pieces): piece = me.pieces[n]
+ else: piece = me.default
+ if piece: piece.format()
+BASEOPS['['] = FormatConditional
+
+class FormatIteration (BaseFormatOperation):
+ """
+ ~{...~}: Repeated formatting.
+
+ Repeatedly apply the enclosed formatting directives to a sequence of
+ different arguments. The directives may contain `~^' to escape early.
+
+ Without `@', an argument is fetched and is expected to be a sequence; with
+ `@', the remaining positional arguments are processed.
+
+ Without `:', the enclosed directives are simply applied until the sequence
+ of arguments is exhausted: each iteration may consume any number of
+ arguments (even zero, though this is likely a bad plan) and any left over
+ are available to the next iteration. With `:', each element of the
+ sequence of arguments is itself treated as a collection of arguments --
+ either positional or keyword depending on whether it looks like a map --
+ and exactly one such element is consumed in each iteration.
+
+ If a parameter is supplied then perform at most this many iterations. If
+ the closing delimeter bears a `:' modifier, and the parameter is not zero,
+ then the enclosed directives are applied once even if the argument sequence
+ is empty.
+
+ If the formatting directives are empty then a formatting string is fetched
+ using the argument collector associated with the closing delimiter.
+ """
+
+ MAXPARAM = 1
+
+ def __init__(me, *args):
+ super(FormatIteration, me).__init__(*args)
+ me.body, me.end = collect_subformat('}')
+
+ def _multi(me, body):
+ """
+ Treat the positional arguments as a sequence of argument sets to be
+ processed.
+ """
+ args = NEXTARG.get()
+ with U.Escape() as esc:
+ with bind_args(args, multi_escape = FORMAT.escape, escape = esc,
+ last_multi_p = not remaining()):
+ body.format()
+
+ def _single(me, body):
+ """
+ Format arguments from a single argument sequence.
+ """
+ body.format()
+
+ def _loop(me, each, max):
+ """
+ Apply the function EACH repeatedly. Stop if no positional arguments
+ remain; if MAX is not `None', then stop after that number of iterations.
+ The EACH function is passed a formatting operation representing the body
+ to be applied
+ """
+ if me.body.seq: body = me.body
+ else: body = compile(me.end.getarg.get())
+ oncep = me.end.colonp
+ i = 0
+ while True:
+ if max is not None and i >= max: break
+ if (i > 0 or not oncep) and not remaining(): break
+ each(body)
+ i += 1
+
+ def _format(me, atp, colonp, max = None):
+ if colonp: each = me._multi
+ else: each = me._single
+ with U.Escape() as esc:
+ with FORMAT.bind(escape = esc):
+ if atp:
+ me._loop(each, max)
+ else:
+ with bind_args(me.getarg.get()):
+ me._loop(each, max)
+BASEOPS['{'] = FormatIteration
+
+class FormatEscape (BaseFormatOperation):
+ """
+ ~^: Escape from iteration.
+
+ Conditionally leave an iteration early.
+
+ There may be up to three parameters: call then X, Y and Z. If all three
+ are present then exit unless Y is between X and Z (inclusive); if two are
+ present then exit if X = Y; if only one is present, then exit if X is
+ zero. Obviously these are more useful if at least one of X, Y and Z is
+ variable.
+
+ With no parameters, exit if there are no positional arguments remaining.
+ With `:', check the number of argument sets (as read by `~:{...~}') rather
+ than the number of arguments in the current set, and escape from the entire
+ iteration rather than from the processing the current set.
+ """
+ MAXPARAM = 3
+ def _format(me, atp, colonp, x = None, y = None, z = None):
+ if z is not None: cond = x <= y <= z
+ elif y is not None: cond = x != y
+ elif x is not None: cond = x != 0
+ elif colonp: cond = not FORMAT.last_multi_p
+ else: cond = remaining()
+ if cond: return
+ if colonp: FORMAT.multi_escape()
+ else: FORMAT.escape()
+BASEOPS['^'] = FormatEscape
+
+class FormatRecursive (BaseFormatOperation):
+ """
+ ~?: Recursive formatting.
+
+ Without `@', read a pair of arguments: use the first as a format string,
+ and apply it to the arguments extracted from the second (which may be a
+ sequence or a map).
+
+ With `@', read a single argument: use it as a format string and apply it to
+ the remaining arguments.
+ """
+ def _format(me, atp, colonp):
+ with U.Escape() as esc:
+ if atp:
+ control = me.getarg.get()
+ op = compile(control)
+ with FORMAT.bind(escape = esc): op.format()
+ else:
+ control, args = me.getarg.pair()
+ op = compile(control)
+ with bind_args(args, escape = esc): op.format()
+BASEOPS['?'] = FormatRecursive
+
+###----- That's all, folks --------------------------------------------------