| 1 | ### -*-python-*- |
| 2 | ### |
| 3 | ### String formatting, with bells, whistles, and gongs |
| 4 | ### |
| 5 | ### (c) 2013 Mark Wooding |
| 6 | ### |
| 7 | |
| 8 | ###----- Licensing notice --------------------------------------------------- |
| 9 | ### |
| 10 | ### This file is part of Chopwood: a password-changing service. |
| 11 | ### |
| 12 | ### Chopwood is free software; you can redistribute it and/or modify |
| 13 | ### it under the terms of the GNU Affero General Public License as |
| 14 | ### published by the Free Software Foundation; either version 3 of the |
| 15 | ### License, or (at your option) any later version. |
| 16 | ### |
| 17 | ### Chopwood is distributed in the hope that it will be useful, |
| 18 | ### but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ### GNU Affero General Public License for more details. |
| 21 | ### |
| 22 | ### You should have received a copy of the GNU Affero General Public |
| 23 | ### License along with Chopwood; if not, see |
| 24 | ### <http://www.gnu.org/licenses/>. |
| 25 | |
| 26 | from __future__ import with_statement |
| 27 | |
| 28 | import contextlib as CTX |
| 29 | import re as RX |
| 30 | from cStringIO import StringIO |
| 31 | import sys as SYS |
| 32 | |
| 33 | import util as U |
| 34 | |
| 35 | ###-------------------------------------------------------------------------- |
| 36 | ### A quick guide to the formatting machinery. |
| 37 | ### |
| 38 | ### This is basically a re-implementation of Common Lisp's FORMAT function in |
| 39 | ### Python. It differs in a few respects. |
| 40 | ### |
| 41 | ### * Most essentially, Python's object and argument-passing models aren't |
| 42 | ### the same as Lisp's. In fact, for our purposes, they're a bit better: |
| 43 | ### Python's sharp distinction between positional and keyword arguments |
| 44 | ### is often extremely annoying, but here they become a clear benefit. |
| 45 | ### Inspired by Python's own enhanced string-formatting machinery (the |
| 46 | ### new `str.format' method, and `string.Formatting' class, we provide |
| 47 | ### additional syntax to access keyword arguments by name, positional |
| 48 | ### arguments by position (without moving the cursor as manipulated by |
| 49 | ### `~*'), and for selecting individual elements of arguments by indexing |
| 50 | ### or attribute lookup. |
| 51 | ### |
| 52 | ### * Unfortunately, Python's I/O subsystem is much less rich than Lisp's. |
| 53 | ### We lack streams which remember their cursor position, and so can't |
| 54 | ### implmenent the `?&' (fresh line) or `~T' (horizontal tab) operators |
| 55 | ### usefully. Moreover, the Python pretty-printer is rather less well |
| 56 | ### developed than the XP-based Lisp pretty-printer, so the pretty- |
| 57 | ### printing operations are unlikely to be implemented any time soon. |
| 58 | ### |
| 59 | ### * This implementation is missing a number of formatting directives just |
| 60 | ### because they're somewhat tedious to write, such as the detailed |
| 61 | ### floating-point printing provided by `~E', `~F' and `~G'. These might |
| 62 | ### appear in time. |
| 63 | ### |
| 64 | ### Formatting takes place in two separable stages. First, a format string |
| 65 | ### is compiled into a formatting operation. Then, the formatting operation |
| 66 | ### can be applied to sets of arguments. State for these two stages is |
| 67 | ### maintained in fluid variable sets `COMPILE' and `FORMAT'. |
| 68 | ### |
| 69 | ### There are a number of protocols involved in making all of this work. |
| 70 | ### They're described in detail as we come across them, but here's an |
| 71 | ### overview. |
| 72 | ### |
| 73 | ### * Output is determined by formatting-operation objects, typically (but |
| 74 | ### not necessarily) subclasses of `BaseFormatOperation'. A format |
| 75 | ### string is compiled into a single compound formatting operation. |
| 76 | ### |
| 77 | ### * Formatting operations determine what to output from their own |
| 78 | ### internal state and from formatting arguments. The latter are |
| 79 | ### collected from argument-collection objects which are subclasses of |
| 80 | ### `BaseArg'. |
| 81 | ### |
| 82 | ### * Formatting operations can be modified using parameters, which are |
| 83 | ### supplied either through the format string or from arguments. To |
| 84 | ### abstract over this distinction, parameters are collected from |
| 85 | ### parameter-collection objects which are subclasses of `BaseParameter'. |
| 86 | |
| 87 | FORMAT = U.Fluid() |
| 88 | ## State for format-time processing. The base state is established by the |
| 89 | ## `format' function, though various formatting operations will rebind |
| 90 | ## portions of the state while they perform recursive processing. The |
| 91 | ## variables are as follows. |
| 92 | ## |
| 93 | ## argmap The map (typically a dictionary) of keyword arguments to be |
| 94 | ## formatted. These can be accessed only though `=KEY' or |
| 95 | ## `!KEY' syntax. |
| 96 | ## |
| 97 | ## argpos The index of the next positional argument to be collected. |
| 98 | ## The `~*' directive works by setting this variable. |
| 99 | ## |
| 100 | ## argseq The sequence (typically a list) of positional arguments to be |
| 101 | ## formatted. These are collected in order (as modified by the |
| 102 | ## `~*' directive), or may be accessed through `=INDEX' or |
| 103 | ## `!INDEX' syntax. |
| 104 | ## |
| 105 | ## escape An escape procedure (i.e., usually created by `Escape()') to |
| 106 | ## be called by `~^'. |
| 107 | ## |
| 108 | ## last_multi_p A boolean, indicating that there are no more lists of |
| 109 | ## arguments (e.g., from `~:{...~}'), so `~:^' should escape if |
| 110 | ## it is encountered. |
| 111 | ## |
| 112 | ## multi_escape An escape procedure (i.e., usually created by `Escape()') to |
| 113 | ## be called by `~:^'. |
| 114 | ## |
| 115 | ## pushback Some formatting operations, notably `~@[...~]', read |
| 116 | ## arguments without consuming them, so a subsequent operation |
| 117 | ## should collect the same argument. This works by pushing the |
| 118 | ## arguments onto the `pushback' list. |
| 119 | ## |
| 120 | ## write A function which writes its single string argument to the |
| 121 | ## current output. |
| 122 | |
| 123 | COMPILE = U.Fluid() |
| 124 | ## State for compile-time processing. The base state is established by the |
| 125 | ## `compile' function, though some formatting operations will rebind portions |
| 126 | ## of the state while they perform recursive processing. The variables are |
| 127 | ## as follows. |
| 128 | ## |
| 129 | ## control The control string being parsed. |
| 130 | ## |
| 131 | ## delim An iterable (usually a string) of delimiter directives. See |
| 132 | ## the `FormatDelimeter' class and the `collect_subformat' |
| 133 | ## function for details of this. |
| 134 | ## |
| 135 | ## end The end of the portion of the control string being parsed. |
| 136 | ## There might be more of the string, but we should pretend that |
| 137 | ## it doesn't exist. |
| 138 | ## |
| 139 | ## opmaps A list of operation maps, i.e., dictionaries mapping |
| 140 | ## formatting directive characters to the corresponding |
| 141 | ## formatting operation classes. The list is searched in order, |
| 142 | ## and the first match is used. This can be used to provide |
| 143 | ## local extensions to the formatting language. |
| 144 | ## |
| 145 | ## start The current position in the control string. This is advanced |
| 146 | ## as pieces of the string are successfully parsed. |
| 147 | |
| 148 | ###-------------------------------------------------------------------------- |
| 149 | ### A few random utilities. |
| 150 | |
| 151 | def remaining(): |
| 152 | """ |
| 153 | Return the number of positional arguments remaining. |
| 154 | |
| 155 | This will /include/ pushed-back arguments, so this needn't be monotonic |
| 156 | even in the absence of `~*' repositioning. |
| 157 | """ |
| 158 | return len(FORMAT.pushback) + len(FORMAT.argseq) - FORMAT.argpos |
| 159 | |
| 160 | @CTX.contextmanager |
| 161 | def bind_args(args, **kw): |
| 162 | """ |
| 163 | Context manager: temporarily establish a different collection of arguments. |
| 164 | |
| 165 | If the ARGS have a `keys' attribute, then they're assumed to be a mapping |
| 166 | object and are set as the keyword arguments, preserving the positional |
| 167 | arguments; otherwise, the positional arguments are set and the keyword |
| 168 | arguments are preserved. |
| 169 | |
| 170 | Other keyword arguments to this function are treated as additional `FORMAT' |
| 171 | variables to be bound. |
| 172 | """ |
| 173 | if hasattr(args, 'keys'): |
| 174 | with FORMAT.bind(argmap = args, **kw): yield |
| 175 | else: |
| 176 | with FORMAT.bind(argseq = args, argpos = 0, pushback = [], **kw): yield |
| 177 | |
| 178 | ## Some regular expressions for parsing things. |
| 179 | R_INT = RX.compile(r'[-+]?[0-9]+') |
| 180 | R_WORD = RX.compile(r'[_a-zA-Z][_a-zA-Z0-9]*') |
| 181 | |
| 182 | ###-------------------------------------------------------------------------- |
| 183 | ### Format string errors. |
| 184 | |
| 185 | class FormatStringError (Exception): |
| 186 | """ |
| 187 | An exception type for reporting errors in format control strings. |
| 188 | |
| 189 | Its most useful feature is that it points out where the error is in a |
| 190 | vaguely useful way. Attributes are as follows. |
| 191 | |
| 192 | control The offending format control string. |
| 193 | |
| 194 | msg The error message, as a human-readable string. |
| 195 | |
| 196 | pos The position at which the error was discovered. This might |
| 197 | be a little way from the actual problem, but it's usually |
| 198 | good enough. |
| 199 | """ |
| 200 | |
| 201 | def __init__(me, msg, control, pos): |
| 202 | """ |
| 203 | Construct the exception, given a message MSG, a format CONTROL string, |
| 204 | and the position POS at which the error was found. |
| 205 | """ |
| 206 | me.msg = msg |
| 207 | me.control = control |
| 208 | me.pos = pos |
| 209 | |
| 210 | def __str__(me): |
| 211 | """ |
| 212 | Present a string explaining the problem, including a dump of the |
| 213 | offending portion of the string. |
| 214 | """ |
| 215 | s = me.control.rfind('\n', 0, me.pos) + 1 |
| 216 | e = me.control.find('\n', me.pos) |
| 217 | if e < 0: e = len(me.control) |
| 218 | return '%s\n %s\n %*s^\n' % \ |
| 219 | (me.msg, me.control[s:e], me.pos - s, '') |
| 220 | |
| 221 | def format_string_error(msg): |
| 222 | """Report an error in the current format string.""" |
| 223 | raise FormatStringError(msg, COMPILE.control, COMPILE.start) |
| 224 | |
| 225 | ###-------------------------------------------------------------------------- |
| 226 | ### Argument collection protocol. |
| 227 | |
| 228 | ## Argument collectors abstract away the details of collecting formatting |
| 229 | ## arguments. They're used both for collecting arguments to be output, and |
| 230 | ## for parameters designated using the `v' or `!ARG' syntaxes. |
| 231 | ## |
| 232 | ## There are a small number of primitive collectors, and some `compound |
| 233 | ## collectors' which read an argument using some other collector, and then |
| 234 | ## process it in some way. |
| 235 | ## |
| 236 | ## An argument collector should implement the following methods. |
| 237 | ## |
| 238 | ## get() Return the argument variable. |
| 239 | ## |
| 240 | ## pair() Return a pair of arguments. |
| 241 | ## |
| 242 | ## tostr(FORCEP) |
| 243 | ## Return a string representation of the collector. If FORCEP, |
| 244 | ## always return a string; otherwise, a `NextArg' collector |
| 245 | ## returns `None' to indicate that no syntax is required to |
| 246 | ## select it. |
| 247 | |
| 248 | class BaseArg (object): |
| 249 | """ |
| 250 | Base class for argument collectors. |
| 251 | |
| 252 | This implements the `pair' method by calling `get' and hoping that the |
| 253 | corresponding argument is indeed a sequence of two items. |
| 254 | """ |
| 255 | |
| 256 | def __init__(me): |
| 257 | """Trivial constructor.""" |
| 258 | pass |
| 259 | |
| 260 | def pair(me): |
| 261 | """ |
| 262 | Return a pair of arguments, by returning an argument which is a pair. |
| 263 | """ |
| 264 | return me.get() |
| 265 | |
| 266 | def __repr__(me): |
| 267 | """Print a useful string representation of the collector.""" |
| 268 | return '#<%s "=%s">' % (type(me).__name__, me.tostr(True)) |
| 269 | |
| 270 | class NextArg (BaseArg): |
| 271 | """The default argument collector.""" |
| 272 | |
| 273 | def get(me): |
| 274 | """ |
| 275 | Return the next argument. |
| 276 | |
| 277 | If there are pushed-back arguments, then return the one most recently |
| 278 | pushed back. Otherwise, return the next argument from `argseq', |
| 279 | advancing `argpos'. |
| 280 | """ |
| 281 | if FORMAT.pushback: return FORMAT.pushback.pop() |
| 282 | i = FORMAT.argpos |
| 283 | a = FORMAT.argseq[i] |
| 284 | FORMAT.argpos = i + 1 |
| 285 | return a |
| 286 | |
| 287 | def pair(me): |
| 288 | """Return a pair of arguments, by fetching two separate arguments.""" |
| 289 | left = me.get() |
| 290 | right = me.get() |
| 291 | return left, right |
| 292 | |
| 293 | def tostr(me, forcep): |
| 294 | """Convert the default collector to a string.""" |
| 295 | if forcep: return '+' |
| 296 | else: return None |
| 297 | |
| 298 | NEXTARG = NextArg() |
| 299 | ## Because a `NextArg' collectors are used so commonly, and they're all the |
| 300 | ## same, we make a distinguished one and try to use that instead. Nothing |
| 301 | ## goes badly wrong if you don't use this, but you'll use more memory than |
| 302 | ## strictly necessary. |
| 303 | |
| 304 | class ThisArg (BaseArg): |
| 305 | """Return the current positional argument without consuming it.""" |
| 306 | def _get(me, i): |
| 307 | """Return the positional argument I on from the current position.""" |
| 308 | n = len(FORMAT.pushback) |
| 309 | if n > i: return FORMAT.pushback[n - i - 1] |
| 310 | else: return FORMAT.argseq[FORMAT.argpos + i - n] |
| 311 | def get(me): |
| 312 | """Return the next argument.""" |
| 313 | return me._get(0) |
| 314 | def pair(me): |
| 315 | """Return the next two arguments without consuming either.""" |
| 316 | return me._get(0), me._get(1) |
| 317 | def tostr(me, forcep): |
| 318 | """Convert the colector to a string.""" |
| 319 | return '@' |
| 320 | |
| 321 | THISARG = ThisArg() |
| 322 | |
| 323 | class SeqArg (BaseArg): |
| 324 | """ |
| 325 | A primitive collector which picks out the positional argument at a specific |
| 326 | index. |
| 327 | """ |
| 328 | def __init__(me, index): me.index = index |
| 329 | def get(me): return FORMAT.argseq[me.index] |
| 330 | def tostr(me, forcep): return '%d' % me.index |
| 331 | |
| 332 | class MapArg (BaseArg): |
| 333 | """ |
| 334 | A primitive collector which picks out the keyword argument with a specific |
| 335 | key. |
| 336 | """ |
| 337 | def __init__(me, key): me.key = key |
| 338 | def get(me): return FORMAT.argmap[me.key] |
| 339 | def tostr(me, forcep): return '%s' % me.key |
| 340 | |
| 341 | class IndexArg (BaseArg): |
| 342 | """ |
| 343 | A compound collector which indexes an argument. |
| 344 | """ |
| 345 | def __init__(me, base, index): |
| 346 | me.base = base |
| 347 | me.index = index |
| 348 | def get(me): |
| 349 | return me.base.get()[me.index] |
| 350 | def tostr(me, forcep): |
| 351 | return '%s[%s]' % (me.base.tostr(True), me.index) |
| 352 | |
| 353 | class AttrArg (BaseArg): |
| 354 | """ |
| 355 | A compound collector which returns an attribute of an argument. |
| 356 | """ |
| 357 | def __init__(me, base, attr): |
| 358 | me.base = base |
| 359 | me.attr = attr |
| 360 | def get(me): |
| 361 | return getattr(me.base.get(), me.attr) |
| 362 | def tostr(me, forcep): |
| 363 | return '%s.%s' % (me.base.tostr(True), me.attr) |
| 364 | |
| 365 | ## Regular expression matching compound-argument suffixes. |
| 366 | R_REF = RX.compile(r''' |
| 367 | \[ ( [-+]? [0-9]+ ) \] |
| 368 | | \[ ( [^]]* ) \] |
| 369 | | \. ( [_a-zA-Z] [_a-zA-Z0-9]* ) |
| 370 | ''', RX.VERBOSE) |
| 371 | |
| 372 | def parse_arg(): |
| 373 | """ |
| 374 | Parse an argument collector from the current format control string. |
| 375 | |
| 376 | The syntax of an argument is as follows. |
| 377 | |
| 378 | ARG ::= COMPOUND-ARG | `{' COMPOUND-ARG `}' |
| 379 | |
| 380 | COMPOUND-ARG ::= SIMPLE-ARG |
| 381 | | COMPOUND-ARG `[' INDEX `]' |
| 382 | | COMPOUND-ARG `.' WORD |
| 383 | |
| 384 | SIMPLE-ARG ::= INT | WORD | `+' | `@' |
| 385 | |
| 386 | Surrounding braces mean nothing, but may serve to separate the argument |
| 387 | from a following alphabetic formatting directive. |
| 388 | |
| 389 | A `+' means `the next pushed-back or positional argument'. It's useful to |
| 390 | be able to say this explicitly so that indexing and attribute references |
| 391 | can be attached to it: for example, in `~={thing}@[~={+.attr}A~]'. |
| 392 | Similarly, `@' designates the same argument, except that it is not |
| 393 | consumed. |
| 394 | |
| 395 | An integer argument selects the positional argument with that index; a |
| 396 | negative index counts backwards from the end, as is usual in Python. |
| 397 | |
| 398 | A word argument selects the keyword argument with that key. |
| 399 | """ |
| 400 | |
| 401 | c = COMPILE.control |
| 402 | s, e = COMPILE.start, COMPILE.end |
| 403 | |
| 404 | ## If it's delimited then pick through the delimiter. |
| 405 | brace = None |
| 406 | if s < e and c[s] == '{': |
| 407 | brace = '}' |
| 408 | s += 1 |
| 409 | |
| 410 | ## Make sure there's something to look at. |
| 411 | if s >= e: raise FormatStringError('missing argument specifier', c, s) |
| 412 | |
| 413 | ## Find the start of the breadcrumbs. |
| 414 | if c[s] == '+': |
| 415 | getarg = NEXTARG |
| 416 | s += 1 |
| 417 | if c[s] == '@': |
| 418 | getarg = THISARG |
| 419 | s += 1 |
| 420 | elif c[s].isdigit(): |
| 421 | m = R_INT.match(c, s, e) |
| 422 | getarg = SeqArg(int(m.group())) |
| 423 | s = m.end() |
| 424 | else: |
| 425 | m = R_WORD.match(c, s, e) |
| 426 | if not m: raise FormatStringError('unknown argument specifier', c, s) |
| 427 | getarg = MapArg(m.group()) |
| 428 | s = m.end() |
| 429 | |
| 430 | ## Now parse indices and attribute references. |
| 431 | while True: |
| 432 | m = R_REF.match(c, s, e) |
| 433 | if not m: break |
| 434 | if m.group(1): getarg = IndexArg(getarg, int(m.group(1))) |
| 435 | elif m.group(2): getarg = IndexArg(getarg, m.group(2)) |
| 436 | elif m.group(3): getarg = AttrArg(getarg, m.group(3)) |
| 437 | else: raise FormatStringError('internal error (weird ref)', c, s) |
| 438 | s = m.end() |
| 439 | |
| 440 | ## Finally, check that we have the close delimiter we want. |
| 441 | if brace: |
| 442 | if s >= e or c[s] != brace: |
| 443 | raise FormatStringError('missing close brace', c, s) |
| 444 | s += 1 |
| 445 | |
| 446 | ## Done. |
| 447 | COMPILE.start = s |
| 448 | return getarg |
| 449 | |
| 450 | ###-------------------------------------------------------------------------- |
| 451 | ### Parameter collectors. |
| 452 | |
| 453 | ## These are pretty similar in shape to argument collectors. The required |
| 454 | ## methods are as follows. |
| 455 | ## |
| 456 | ## get() Return the parameter value. |
| 457 | ## |
| 458 | ## tostr() Return a string representation of the collector. (We don't |
| 459 | ## need a FORCEP argument here, because there are no default |
| 460 | ## parameters.) |
| 461 | |
| 462 | class BaseParameter (object): |
| 463 | """ |
| 464 | Base class for parameter collector objects. |
| 465 | |
| 466 | This isn't currently very useful, because all it provides is `__repr__', |
| 467 | but the protocol might get more complicated later. |
| 468 | """ |
| 469 | def __init__(me): pass |
| 470 | def __repr__(me): return '#<%s "%s">' % (type(me).__name__, me.tostr()) |
| 471 | |
| 472 | class LiteralParameter (BaseParameter): |
| 473 | """ |
| 474 | A literal parameter, parsed from the control string. |
| 475 | """ |
| 476 | def __init__(me, lit): me.lit = lit |
| 477 | def get(me): return me.lit |
| 478 | def tostr(me): |
| 479 | if me.lit is None: return '' |
| 480 | elif isinstance(me.lit, (int, long)): return str(me.lit) |
| 481 | else: return "'%c" % me.lit |
| 482 | |
| 483 | ## Many parameters are omitted, so let's just reuse a distinguished collector |
| 484 | ## for them. |
| 485 | LITNONE = LiteralParameter(None) |
| 486 | |
| 487 | class RemainingParameter (BaseParameter): |
| 488 | """ |
| 489 | A parameter which collects the number of remaining positional arguments. |
| 490 | """ |
| 491 | def get(me): return remaining() |
| 492 | def tostr(me): return '#' |
| 493 | |
| 494 | ## These are all the same, so let's just have one of them. |
| 495 | REMAIN = RemainingParameter() |
| 496 | |
| 497 | class VariableParameter (BaseParameter): |
| 498 | """ |
| 499 | A variable parameter, fetched from an argument. |
| 500 | """ |
| 501 | def __init__(me, arg): me.arg = arg |
| 502 | def get(me): return me.arg.get() |
| 503 | def tostr(me): |
| 504 | s = me.arg.tostr(False) |
| 505 | if not s: return 'V' |
| 506 | else: return '!' + s |
| 507 | VARNEXT = VariableParameter(NEXTARG) |
| 508 | |
| 509 | ###-------------------------------------------------------------------------- |
| 510 | ### Formatting protocol. |
| 511 | |
| 512 | ## The formatting operation protocol is pretty straightforward. An operation |
| 513 | ## must implement a method `format' which takes no arguments, and should |
| 514 | ## produce its output (if any) by calling `FORMAT.write'. In the course of |
| 515 | ## its execution, it may collect parameters and arguments. |
| 516 | ## |
| 517 | ## The `opmaps' table maps formatting directives (which are individual |
| 518 | ## characters, in upper-case for letters) to functions returning formatting |
| 519 | ## operation objects. All of the directives are implemented in this way. |
| 520 | ## The functions for the base directives are actually the (callable) class |
| 521 | ## objects for subclasses of `BaseFormatOperation', though this isn't |
| 522 | ## necessary. |
| 523 | ## |
| 524 | ## The constructor functions are called as follows: |
| 525 | ## |
| 526 | ## FUNC(ATP, COLONP, GETARG, PARAMS, CHAR) |
| 527 | ## The ATP and COLONP arguments are booleans indicating respectively |
| 528 | ## whether the `@' and `:' modifiers were set in the control string. |
| 529 | ## GETARG is the collector for the operation's argument(s). The PARAMS |
| 530 | ## are a list of parameter collectors. Finally, CHAR is the directive |
| 531 | ## character (so directives with siilar behaviour can use the same |
| 532 | ## class). |
| 533 | |
| 534 | class FormatLiteral (object): |
| 535 | """ |
| 536 | A special formatting operation for printing literal text. |
| 537 | """ |
| 538 | def __init__(me, s): me.s = s |
| 539 | def __repr__(me): return '#<%s %r>' % (type(me).__name__, me.s) |
| 540 | def format(me): FORMAT.write(me.s) |
| 541 | |
| 542 | class FormatSequence (object): |
| 543 | """ |
| 544 | A special formatting operation for applying collection of other operations |
| 545 | in sequence. |
| 546 | """ |
| 547 | def __init__(me, seq): |
| 548 | me.seq = seq |
| 549 | def __repr__(me): |
| 550 | return '#<%s [%s]>' % (type(me).__name__, |
| 551 | ', '.join(repr(p) for p in me.seq)) |
| 552 | def format(me): |
| 553 | for p in me.seq: p.format() |
| 554 | |
| 555 | class BaseFormatOperation (object): |
| 556 | """ |
| 557 | The base class for built-in formatting operations (and, probably, most |
| 558 | extensions). |
| 559 | |
| 560 | Subclasses should implement a `_format' method. |
| 561 | |
| 562 | _format(ATP, COLONP, [PARAM = DEFAULT, ...]) |
| 563 | Called to produce output. The ATP and COLONP flags are from |
| 564 | the constructor. The remaining function arguments are the |
| 565 | computed parameter values. Arguments may be collected using |
| 566 | the `getarg' attribute. |
| 567 | |
| 568 | Subclasses can set class attributes to influence the constructor. |
| 569 | |
| 570 | MINPARAM The minimal number of parameters acceptable. If fewer |
| 571 | parameters are supplied then an error is reported at compile |
| 572 | time. The default is zero. |
| 573 | |
| 574 | MAXPARAM The maximal number of parameters acceptable. If more |
| 575 | parameters are supplied then an error is reported at compile |
| 576 | time. The default is zero; `None' means that there is no |
| 577 | maximum (but this is unusual). |
| 578 | |
| 579 | Instances have a number of useful attributes. |
| 580 | |
| 581 | atp True if an `@' modifier appeared in the directive. |
| 582 | |
| 583 | char The directive character from the control string. |
| 584 | |
| 585 | colonp True if a `:' modifier appeared in the directive. |
| 586 | |
| 587 | getarg Argument collector; may be called by `_format'. |
| 588 | |
| 589 | params A list of parameter collector objects. |
| 590 | """ |
| 591 | |
| 592 | ## Default bounds on parameters. |
| 593 | MINPARAM = MAXPARAM = 0 |
| 594 | |
| 595 | def __init__(me, atp, colonp, getarg, params, char): |
| 596 | """ |
| 597 | Constructor: store information about the directive, and check the bounds |
| 598 | on the parameters. |
| 599 | |
| 600 | A subclass should call this before doing anything fancy such as parsing |
| 601 | the control string further. |
| 602 | """ |
| 603 | |
| 604 | ## Store information. |
| 605 | me.atp = atp |
| 606 | me.colonp = colonp |
| 607 | me.getarg = getarg |
| 608 | me.params = params |
| 609 | me.char = char |
| 610 | |
| 611 | ## Check the parameters. |
| 612 | bad = False |
| 613 | if len(params) < me.MINPARAM: bad = True |
| 614 | elif me.MAXPARAM is not None and len(params) > me.MAXPARAM: bad = True |
| 615 | if bad: |
| 616 | format_string_error('bad parameters') |
| 617 | |
| 618 | def format(me): |
| 619 | """Produce output: call the subclass's formatting function.""" |
| 620 | me._format(me.atp, me.colonp, *[p.get() for p in me.params]) |
| 621 | |
| 622 | def tostr(me): |
| 623 | """Convert the operation to a directive string.""" |
| 624 | return '~%s%s%s%s%s' % ( |
| 625 | ','.join(a.tostr() for a in me.params), |
| 626 | me.colonp and ':' or '', |
| 627 | me.atp and '@' or '', |
| 628 | (lambda s: s and '={%s}' % s or '')(me.getarg.tostr(False)), |
| 629 | me.char) |
| 630 | |
| 631 | def __repr__(me): |
| 632 | """Produce a readable (ahem) version of the directive.""" |
| 633 | return '#<%s "%s">' % (type(me).__name__, me.tostr()) |
| 634 | |
| 635 | class FormatDelimiter (BaseFormatOperation): |
| 636 | """ |
| 637 | A fake formatting operation which exists to impose additional syntactic |
| 638 | structure on control strings. |
| 639 | |
| 640 | No `_format' method is actually defined, so `FormatDelimiter' objects |
| 641 | should never find their way into the output pipeline. Instead, they are |
| 642 | typically useful in conjunction with the `collect_subformat' function. To |
| 643 | this end, the constructor will fail if its directive character is not in |
| 644 | listed as an expected delimiter in `CONTROL.delim'. |
| 645 | """ |
| 646 | |
| 647 | def __init__(me, *args): |
| 648 | """ |
| 649 | Constructor: make sure this delimiter is expected in the current context. |
| 650 | """ |
| 651 | super(FormatDelimiter, me).__init__(*args) |
| 652 | if me.char not in COMPILE.delim: |
| 653 | format_string_error("unexpected close delimiter `~%s'" % me.char) |
| 654 | |
| 655 | ###-------------------------------------------------------------------------- |
| 656 | ### Parsing format strings. |
| 657 | |
| 658 | def parse_operator(): |
| 659 | """ |
| 660 | Parse the next portion of the current control string and return a single |
| 661 | formatting operator for it. |
| 662 | |
| 663 | If we have reached the end of the control string (as stored in |
| 664 | `CONTROL.end') then return `None'. |
| 665 | """ |
| 666 | |
| 667 | c = COMPILE.control |
| 668 | s, e = COMPILE.start, COMPILE.end |
| 669 | |
| 670 | ## If we're at the end then stop. |
| 671 | if s >= e: return None |
| 672 | |
| 673 | ## If there's some literal text then collect it. |
| 674 | if c[s] != '~': |
| 675 | i = c.find('~', s, e) |
| 676 | if i < 0: i = e |
| 677 | COMPILE.start = i |
| 678 | return FormatLiteral(c[s:i]) |
| 679 | |
| 680 | ## Otherwise there's a formatting directive to collect. |
| 681 | s += 1 |
| 682 | |
| 683 | ## First, collect arguments. |
| 684 | aa = [] |
| 685 | while True: |
| 686 | if s >= e: break |
| 687 | if c[s] == ',': |
| 688 | aa.append(LITNONE) |
| 689 | s += 1 |
| 690 | continue |
| 691 | elif c[s] == "'": |
| 692 | s += 1 |
| 693 | if s >= e: raise FormatStringError('missing argument character', c, s) |
| 694 | aa.append(LiteralParameter(c[s])) |
| 695 | s += 1 |
| 696 | elif c[s].upper() == 'V': |
| 697 | s += 1 |
| 698 | aa.append(VARNEXT) |
| 699 | elif c[s] == '!': |
| 700 | COMPILE.start = s + 1 |
| 701 | getarg = parse_arg() |
| 702 | s = COMPILE.start |
| 703 | aa.append(VariableParameter(getarg)) |
| 704 | elif c[s] == '#': |
| 705 | s += 1 |
| 706 | aa.append(REMAIN) |
| 707 | else: |
| 708 | m = R_INT.match(c, s, e) |
| 709 | if not m: break |
| 710 | aa.append(LiteralParameter(int(m.group()))) |
| 711 | s = m.end() |
| 712 | if s >= e or c[s] != ',': break |
| 713 | s += 1 |
| 714 | |
| 715 | ## Maybe there's an explicit argument. |
| 716 | if s < e and c[s] == '=': |
| 717 | COMPILE.start = s + 1 |
| 718 | getarg = parse_arg() |
| 719 | s = COMPILE.start |
| 720 | else: |
| 721 | getarg = NEXTARG |
| 722 | |
| 723 | ## Next, collect the flags. |
| 724 | atp = colonp = False |
| 725 | while True: |
| 726 | if s >= e: |
| 727 | break |
| 728 | elif c[s] == '@': |
| 729 | if atp: raise FormatStringError('duplicate at flag', c, s) |
| 730 | atp = True |
| 731 | elif c[s] == ':': |
| 732 | if colonp: raise FormatStringError('duplicate colon flag', c, s) |
| 733 | colonp = True |
| 734 | else: |
| 735 | break |
| 736 | s += 1 |
| 737 | |
| 738 | ## We should now have a directive character. |
| 739 | if s >= e: raise FormatStringError('missing directive', c, s) |
| 740 | ch = c[s].upper() |
| 741 | op = None |
| 742 | for map in COMPILE.opmaps: |
| 743 | try: op = map[ch] |
| 744 | except KeyError: pass |
| 745 | else: break |
| 746 | else: |
| 747 | raise FormatStringError('unknown directive', c, s) |
| 748 | s += 1 |
| 749 | |
| 750 | ## Done. |
| 751 | COMPILE.start = s |
| 752 | return op(atp, colonp, getarg, aa, ch) |
| 753 | |
| 754 | def collect_subformat(delim): |
| 755 | """ |
| 756 | Parse formatting operations from the control string until we find one whose |
| 757 | directive character is listed in DELIM. |
| 758 | |
| 759 | Where an operation accepts multiple sequences of formatting directives, the |
| 760 | first element of DELIM should be the proper closing delimiter. The |
| 761 | traditional separator is `~;'. |
| 762 | """ |
| 763 | pp = [] |
| 764 | with COMPILE.bind(delim = delim): |
| 765 | while True: |
| 766 | p = parse_operator() |
| 767 | if not p: |
| 768 | format_string_error("missing close delimiter `~%s'" % delim[0]) |
| 769 | if isinstance(p, FormatDelimiter) and p.char in delim: break |
| 770 | pp.append(p) |
| 771 | return FormatSequence(pp), p |
| 772 | |
| 773 | def compile(control): |
| 774 | """ |
| 775 | Parse the whole CONTROL string, returning the corresponding formatting |
| 776 | operator. |
| 777 | |
| 778 | A format control string consists of formatting directives, introduced by |
| 779 | the `~' character, and literal text. Literal text is simply output as-is. |
| 780 | Formatting directives may read /arguments/ which are provided as additional |
| 781 | inputs to the `format' function, and are typically items to be written to |
| 782 | the output in some form, and /parameters/, which control the formatting of |
| 783 | the arguments, and may be supplied in the control string, or themselves |
| 784 | read from arguments. A directive may also carry up to two flags, `@' and |
| 785 | `:'. |
| 786 | |
| 787 | The effects of the directive are determined by the corresponding formatting |
| 788 | operation, an object found by looking up the directive's identifying |
| 789 | character in `COMPILE.opmaps', which is a list of dictionaries. The |
| 790 | character is converted to upper-case (if it is alphabetic), and then the |
| 791 | dictionaries are examined in order: the first match found wins. See the |
| 792 | description of the `Formatting protocol' for details of how formatting |
| 793 | operations work. |
| 794 | |
| 795 | A formatting directive has the following syntax. |
| 796 | |
| 797 | DIRECTIVE ::= `~' [PARAMS] [`=' ARG] FLAGS CHAR |
| 798 | |
| 799 | PARAMS ::= PARAM [`,' PARAMS] |
| 800 | |
| 801 | PARAM ::= EMPTY | INT | `'' CHAR | `v' | `!' ARG |
| 802 | |
| 803 | FLAGS ::= [[ `@' | `:' ]]* |
| 804 | |
| 805 | (The useful but unusual notation [[ X | Y | ... ]]* denotes a sequence of |
| 806 | items drawn from the listed alternatives, each appearing at most once. See |
| 807 | the function `parse_arg' for the syntax of ARG.) |
| 808 | |
| 809 | An empty PARAM is equivalent to omitting the parameter; `!ARG' reads the |
| 810 | parameter value from the argument; `v' is equivalent to `!+', as a |
| 811 | convenient abbreviation and for Common Lisp compatibility. The `=ARG' |
| 812 | notation indicates which argument(s) should be processed by the operation: |
| 813 | the default is `=+'. |
| 814 | """ |
| 815 | if not isinstance(control, basestring): return control |
| 816 | pp = [] |
| 817 | with COMPILE.bind(control = control, start = 0, end = len(control), |
| 818 | delim = ''): |
| 819 | while True: |
| 820 | p = parse_operator() |
| 821 | if not p: break |
| 822 | pp.append(p) |
| 823 | return FormatSequence(pp) |
| 824 | |
| 825 | ###-------------------------------------------------------------------------- |
| 826 | ### Formatting text. |
| 827 | |
| 828 | def format(out, control, *args, **kw): |
| 829 | """ |
| 830 | Format the positional args and keywords according to the CONTROL, and write |
| 831 | the result to OUT. |
| 832 | |
| 833 | The output is written to OUT, which may be one of the following. |
| 834 | |
| 835 | `True' Write to standard output. |
| 836 | |
| 837 | `False' Write to standard error. |
| 838 | |
| 839 | `None' Return the output as a string. |
| 840 | |
| 841 | Any object with a `write' attribute |
| 842 | Call `write' repeatedly with strings to be output. |
| 843 | |
| 844 | Any callable object |
| 845 | Call the object repeatedly with strings to be output. |
| 846 | |
| 847 | The CONTROL argument may be one of the following. |
| 848 | |
| 849 | A string or unicode object |
| 850 | Compile the string into a formatting operation and use that. |
| 851 | |
| 852 | A formatting operation |
| 853 | Apply the operation to the arguments. |
| 854 | """ |
| 855 | |
| 856 | ## Turn the output argument into a function which we can use easily. If |
| 857 | ## we're writing to a string, we'll have to extract the result at the end, |
| 858 | ## so keep track of anything we have to do later. |
| 859 | final = U.constantly(None) |
| 860 | if out is True: |
| 861 | write = SYS.stdout.write |
| 862 | elif out is False: |
| 863 | write = SYS.stderr.write |
| 864 | elif out is None: |
| 865 | strio = StringIO() |
| 866 | write = strio.write |
| 867 | final = strio.getvalue |
| 868 | elif hasattr(out, 'write'): |
| 869 | write = out.write |
| 870 | elif callable(out): |
| 871 | write = out |
| 872 | else: |
| 873 | raise TypeError, out |
| 874 | |
| 875 | ## Turn the control argument into a formatting operation. |
| 876 | op = compile(control) |
| 877 | |
| 878 | ## Invoke the formatting operation in the correct environment. |
| 879 | with FORMAT.bind(write = write, pushback = [], |
| 880 | argseq = args, argpos = 0, |
| 881 | argmap = kw): |
| 882 | op.format() |
| 883 | |
| 884 | ## Done. |
| 885 | return final() |
| 886 | |
| 887 | ###-------------------------------------------------------------------------- |
| 888 | ### Standard formatting directives. |
| 889 | |
| 890 | ## A dictionary, in which we'll build the basic set of formatting operators. |
| 891 | ## Callers wishing to implement extensions should include this in their |
| 892 | ## `opmaps' lists. |
| 893 | BASEOPS = {} |
| 894 | COMPILE.opmaps = [BASEOPS] |
| 895 | |
| 896 | ## Some standard delimiter directives. |
| 897 | for i in [']', ')', '}', '>', ';']: BASEOPS[i] = FormatDelimiter |
| 898 | |
| 899 | class SimpleFormatOperation (BaseFormatOperation): |
| 900 | """ |
| 901 | Common base class for the `~A' (`str') and `~S' (`repr') directives. |
| 902 | |
| 903 | These take similar parameters, so it's useful to deal with them at the same |
| 904 | time. Subclasses should implement a method `_convert' of one argument, |
| 905 | which returns a string to be formatted. |
| 906 | |
| 907 | The parameters are as follows. |
| 908 | |
| 909 | MINCOL The minimum number of characters to output. Padding is added |
| 910 | if the output string is shorter than this. |
| 911 | |
| 912 | COLINC Lengths of padding groups. The number of padding characters |
| 913 | will be MINPAD more than a multiple of COLINC. |
| 914 | |
| 915 | MINPAD The smallest number of padding characters to write. |
| 916 | |
| 917 | PADCHAR The padding character. |
| 918 | |
| 919 | If the `@' modifier is given, then padding is applied on the left; |
| 920 | otherwise it is applied on the right. |
| 921 | """ |
| 922 | |
| 923 | MAXPARAM = 4 |
| 924 | |
| 925 | def _format(me, atp, colonp, |
| 926 | mincol = 0, colinc = 1, minpad = 0, padchar = ' '): |
| 927 | what = me._convert(me.getarg.get()) |
| 928 | n = len(what) |
| 929 | p = mincol - n - minpad + colinc - 1 |
| 930 | p -= p%colinc |
| 931 | if p < 0: p = 0 |
| 932 | p += minpad |
| 933 | if p <= 0: pass |
| 934 | elif atp: what = (p * padchar) + what |
| 935 | else: what = what + (p * padchar) |
| 936 | FORMAT.write(what) |
| 937 | |
| 938 | class FormatString (SimpleFormatOperation): |
| 939 | """~A: convert argument to a string.""" |
| 940 | def _convert(me, arg): return str(arg) |
| 941 | BASEOPS['A'] = FormatString |
| 942 | |
| 943 | class FormatRepr (SimpleFormatOperation): |
| 944 | """~S: convert argument to readable form.""" |
| 945 | def _convert(me, arg): return repr(arg) |
| 946 | BASEOPS['S'] = FormatRepr |
| 947 | |
| 948 | class IntegerFormat (BaseFormatOperation): |
| 949 | """ |
| 950 | Common base class for the integer formatting directives `~D', `~B', `~O~, |
| 951 | `~X', and `~R'. |
| 952 | |
| 953 | These take similar parameters, so it's useful to deal with them at the same |
| 954 | time. There is a `_convert' method which does the main work. By default, |
| 955 | `_format' calls this with the argument and the value of the class attribute |
| 956 | `RADIX'; complicated subclasses might want to override this behaviour. |
| 957 | |
| 958 | The parameters are as follows. |
| 959 | |
| 960 | MINCOL Minimum column width. If the output is smaller than this |
| 961 | then it will be padded on the left. The default is 0. |
| 962 | |
| 963 | PADCHAR Character to use to pad the output, should this be necessary. |
| 964 | The default is space. |
| 965 | |
| 966 | COMMACHAR If the `:' modifier is present, then use this character to |
| 967 | separate groups of digits. The default is `,'. |
| 968 | |
| 969 | COMMAINTERVAL If the `:' modifier is present, then separate groups of this |
| 970 | many digits. The default is 3. |
| 971 | |
| 972 | If `@' is present, then a sign is always written; otherwise only `-' signs |
| 973 | are written. |
| 974 | """ |
| 975 | |
| 976 | MAXPARAM = 4 |
| 977 | |
| 978 | def _convert(me, n, radix, atp, colonp, |
| 979 | mincol = 0, padchar = ' ', |
| 980 | commachar = ',', commainterval = 3): |
| 981 | """ |
| 982 | Convert the integer N into the given RADIX, under the control of the |
| 983 | formatting parameters supplied. |
| 984 | """ |
| 985 | |
| 986 | ## Sort out the sign. We'll deal with it at the end: for now it's just a |
| 987 | ## distraction. |
| 988 | if n < 0: sign = '-'; n = -n |
| 989 | elif atp: sign = '+' |
| 990 | else: sign = None |
| 991 | |
| 992 | ## Build in `dd' a list of the digits, in reverse order. This will make |
| 993 | ## the commafication easier later. The general radix conversion is |
| 994 | ## inefficient but we can make that better later. |
| 995 | def revdigits(s): |
| 996 | l = list(s) |
| 997 | l.reverse() |
| 998 | return l |
| 999 | if radix == 10: dd = revdigits(str(n)) |
| 1000 | elif radix == 8: dd = revdigits(oct(n)) |
| 1001 | elif radix == 16: dd = revdigits(hex(n).upper()) |
| 1002 | else: |
| 1003 | dd = [] |
| 1004 | while n: |
| 1005 | q, r = divmod(n, radix) |
| 1006 | if r < 10: ch = asc(ord('0') + r) |
| 1007 | elif r < 36: ch = asc(ord('A') - 10 + r) |
| 1008 | else: ch = asc(ord('a') - 36 + r) |
| 1009 | dd.append(ch) |
| 1010 | if not dd: dd.append('0') |
| 1011 | |
| 1012 | ## If we must commafy then do that. |
| 1013 | if colonp: |
| 1014 | ndd = [] |
| 1015 | i = 0 |
| 1016 | for d in dd: |
| 1017 | if i >= commainterval: ndd.append(commachar); i = 0 |
| 1018 | ndd.append(d) |
| 1019 | dd = ndd |
| 1020 | |
| 1021 | ## Include the sign. |
| 1022 | if sign: dd.append(sign) |
| 1023 | |
| 1024 | ## Maybe we must pad the result. |
| 1025 | s = ''.join(reversed(dd)) |
| 1026 | npad = mincol - len(s) |
| 1027 | if npad > 0: s = npad*padchar + s |
| 1028 | |
| 1029 | ## And we're done. |
| 1030 | FORMAT.write(s) |
| 1031 | |
| 1032 | def _format(me, atp, colonp, mincol = 0, padchar = ' ', |
| 1033 | commachar = ',', commainterval = 3): |
| 1034 | me._convert(me.getarg.get(), me.RADIX, atp, colonp, mincol, padchar, |
| 1035 | commachar, commainterval) |
| 1036 | |
| 1037 | class FormatDecimal (IntegerFormat): |
| 1038 | """~D: Decimal formatting.""" |
| 1039 | RADIX = 10 |
| 1040 | BASEOPS['D'] = FormatDecimal |
| 1041 | |
| 1042 | class FormatBinary (IntegerFormat): |
| 1043 | """~B: Binary formatting.""" |
| 1044 | RADIX = 2 |
| 1045 | BASEOPS['B'] = FormatBinary |
| 1046 | |
| 1047 | class FormatOctal (IntegerFormat): |
| 1048 | """~O: Octal formatting.""" |
| 1049 | RADIX = 8 |
| 1050 | BASEOPS['O'] = FormatOctal |
| 1051 | |
| 1052 | class FormatHex (IntegerFormat): |
| 1053 | """~X: Hexadecimal formatting.""" |
| 1054 | RADIX = 16 |
| 1055 | BASEOPS['X'] = FormatHex |
| 1056 | |
| 1057 | class FormatRadix (IntegerFormat): |
| 1058 | """~R: General integer formatting.""" |
| 1059 | MAXPARAM = 5 |
| 1060 | def _format(me, atp, colonp, radix = None, mincol = 0, padchar = ' ', |
| 1061 | commachar = ',', commainterval = 3): |
| 1062 | if radix is None: |
| 1063 | raise ValueError, 'Not implemented' |
| 1064 | me._convert(me.getarg.get(), radix, atp, colonp, mincol, padchar, |
| 1065 | commachar, commainterval) |
| 1066 | BASEOPS['R'] = FormatRadix |
| 1067 | |
| 1068 | class FormatSuppressNewline (BaseFormatOperation): |
| 1069 | """ |
| 1070 | ~newline: suppressed newline and/or spaces. |
| 1071 | |
| 1072 | Unless the `@' modifier is present, don't print the newline. Unless the |
| 1073 | `:' modifier is present, don't print the following string of whitespace |
| 1074 | characters either. |
| 1075 | """ |
| 1076 | R_SPACE = RX.compile(r'\s*') |
| 1077 | def __init__(me, *args): |
| 1078 | super(FormatSuppressNewline, me).__init__(*args) |
| 1079 | m = me.R_SPACE.match(COMPILE.control, COMPILE.start, COMPILE.end) |
| 1080 | me.trail = m.group() |
| 1081 | COMPILE.start = m.end() |
| 1082 | def _format(me, atp, colonp): |
| 1083 | if atp: FORMAT.write('\n') |
| 1084 | if colonp: FORMAT.write(me.trail) |
| 1085 | BASEOPS['\n'] = FormatSuppressNewline |
| 1086 | |
| 1087 | class LiteralFormat (BaseFormatOperation): |
| 1088 | """ |
| 1089 | A base class for formatting operations which write fixed strings. |
| 1090 | |
| 1091 | Subclasses should have an attribute `CHAR' containing the string (usually a |
| 1092 | single character) to be written. |
| 1093 | |
| 1094 | These operations accept a single parameter: |
| 1095 | |
| 1096 | COUNT The number of copies of the string to be written. |
| 1097 | """ |
| 1098 | MAXPARAM = 1 |
| 1099 | def _format(me, atp, colonp, count = 1): |
| 1100 | FORMAT.write(count * me.CHAR) |
| 1101 | |
| 1102 | class FormatNewline (LiteralFormat): |
| 1103 | """~%: Start a new line.""" |
| 1104 | CHAR = '\n' |
| 1105 | BASEOPS['%'] = FormatNewline |
| 1106 | |
| 1107 | class FormatTilde (LiteralFormat): |
| 1108 | """~~: Print a literal `@'.""" |
| 1109 | CHAR = '~' |
| 1110 | BASEOPS['~'] = FormatTilde |
| 1111 | |
| 1112 | class FormatCaseConvert (BaseFormatOperation): |
| 1113 | """ |
| 1114 | ~(...~): Case-convert the contained output. |
| 1115 | |
| 1116 | The material output by the contained directives is subject to case |
| 1117 | conversion as follows. |
| 1118 | |
| 1119 | no modifiers Convert to lower-case. |
| 1120 | @ Make initial letter upper-case and remainder lower. |
| 1121 | : Make initial letters of words upper-case. |
| 1122 | @: Convert to upper-case. |
| 1123 | """ |
| 1124 | def __init__(me, *args): |
| 1125 | super(FormatCaseConvert, me).__init__(*args) |
| 1126 | me.sub, _ = collect_subformat(')') |
| 1127 | def _format(me, atp, colonp): |
| 1128 | strio = StringIO() |
| 1129 | try: |
| 1130 | with FORMAT.bind(write = strio.write): |
| 1131 | me.sub.format() |
| 1132 | finally: |
| 1133 | inner = strio.getvalue() |
| 1134 | if atp: |
| 1135 | if colonp: out = inner.upper() |
| 1136 | else: out = inner.capitalize() |
| 1137 | else: |
| 1138 | if colonp: out = inner.title() |
| 1139 | else: out = inner.lower() |
| 1140 | FORMAT.write(out) |
| 1141 | BASEOPS['('] = FormatCaseConvert |
| 1142 | |
| 1143 | class FormatGoto (BaseFormatOperation): |
| 1144 | """ |
| 1145 | ~*: Seek in positional arguments. |
| 1146 | |
| 1147 | There may be a parameter N; the default value depends on which modifiers |
| 1148 | are present. Without `@', skip forwards or backwards by N (default |
| 1149 | 1) places; with `@', move to argument N (default 0). With `:', negate N, |
| 1150 | so move backwards instead of forwards, or count from the end rather than |
| 1151 | the beginning. (Exception: `~@:0*' leaves no arguments remaining, whereas |
| 1152 | `~@-0*' is the same as `~@0*', and starts again from the beginning. |
| 1153 | |
| 1154 | BUG: The list of pushed-back arguments is cleared. |
| 1155 | """ |
| 1156 | MAXPARAM = 1 |
| 1157 | def _format(me, atp, colonp, n = None): |
| 1158 | if atp: |
| 1159 | if n is None: n = 0 |
| 1160 | if colonp: |
| 1161 | if n > 0: n = -n |
| 1162 | else: n = len(FORMAT.argseq) |
| 1163 | if n < 0: n += len(FORMAT.argseq) |
| 1164 | else: |
| 1165 | if n is None: n = 1 |
| 1166 | if colonp: n = -n |
| 1167 | n += FORMAT.argpos |
| 1168 | FORMAT.argpos = n |
| 1169 | FORMAT.pushback = [] |
| 1170 | BASEOPS['*'] = FormatGoto |
| 1171 | |
| 1172 | class FormatConditional (BaseFormatOperation): |
| 1173 | """ |
| 1174 | ~[...[~;...]...[~:;...]~]: Conditional formatting. |
| 1175 | |
| 1176 | There are three variants, which are best dealt with separately. |
| 1177 | |
| 1178 | With no modifiers, apply the Nth enclosed piece, where N is either the |
| 1179 | parameter, or the argument if no parameter is provided. If there is no |
| 1180 | such piece (i.e., N is negative or too large) and the final piece is |
| 1181 | introduced by `~:;' then use that piece; otherwise produce no output. |
| 1182 | |
| 1183 | With `:', there must be exactly two pieces: apply the first if the argument |
| 1184 | is false, otherwise the second. |
| 1185 | |
| 1186 | With `@', there must be exactly one piece: if the argument is not `None' |
| 1187 | then push it back and apply the enclosed piece. |
| 1188 | """ |
| 1189 | |
| 1190 | MAXPARAM = 1 |
| 1191 | |
| 1192 | def __init__(me, *args): |
| 1193 | |
| 1194 | ## Store the arguments. |
| 1195 | super(FormatConditional, me).__init__(*args) |
| 1196 | |
| 1197 | ## Collect the pieces, and keep track of whether there's a default piece. |
| 1198 | pieces = [] |
| 1199 | default = None |
| 1200 | nextdef = False |
| 1201 | while True: |
| 1202 | piece, delim = collect_subformat('];') |
| 1203 | if nextdef: default = piece |
| 1204 | else: pieces.append(piece) |
| 1205 | if delim.char == ']': break |
| 1206 | if delim.colonp: |
| 1207 | if default: format_string_error('multiple defaults') |
| 1208 | nextdef = True |
| 1209 | |
| 1210 | ## Make sure the syntax matches the modifiers we've been given. |
| 1211 | if (me.colonp or me.atp) and default: |
| 1212 | format_string_error('default not allowed here') |
| 1213 | if (me.colonp and len(pieces) != 2) or \ |
| 1214 | (me.atp and len(pieces) != 1): |
| 1215 | format_string_error('wrong number of pieces') |
| 1216 | |
| 1217 | ## Store stuff. |
| 1218 | me.pieces = pieces |
| 1219 | me.default = default |
| 1220 | |
| 1221 | def _format(me, atp, colonp, n = None): |
| 1222 | if colonp: |
| 1223 | arg = me.getarg.get() |
| 1224 | if arg: me.pieces[1].format() |
| 1225 | else: me.pieces[0].format() |
| 1226 | elif atp: |
| 1227 | arg = me.getarg.get() |
| 1228 | if arg is not None: |
| 1229 | FORMAT.pushback.append(arg) |
| 1230 | me.pieces[0].format() |
| 1231 | else: |
| 1232 | if n is None: n = me.getarg.get() |
| 1233 | if 0 <= n < len(me.pieces): piece = me.pieces[n] |
| 1234 | else: piece = me.default |
| 1235 | if piece: piece.format() |
| 1236 | BASEOPS['['] = FormatConditional |
| 1237 | |
| 1238 | class FormatIteration (BaseFormatOperation): |
| 1239 | """ |
| 1240 | ~{...~}: Repeated formatting. |
| 1241 | |
| 1242 | Repeatedly apply the enclosed formatting directives to a sequence of |
| 1243 | different arguments. The directives may contain `~^' to escape early. |
| 1244 | |
| 1245 | Without `@', an argument is fetched and is expected to be a sequence; with |
| 1246 | `@', the remaining positional arguments are processed. |
| 1247 | |
| 1248 | Without `:', the enclosed directives are simply applied until the sequence |
| 1249 | of arguments is exhausted: each iteration may consume any number of |
| 1250 | arguments (even zero, though this is likely a bad plan) and any left over |
| 1251 | are available to the next iteration. With `:', each element of the |
| 1252 | sequence of arguments is itself treated as a collection of arguments -- |
| 1253 | either positional or keyword depending on whether it looks like a map -- |
| 1254 | and exactly one such element is consumed in each iteration. |
| 1255 | |
| 1256 | If a parameter is supplied then perform at most this many iterations. If |
| 1257 | the closing delimeter bears a `:' modifier, and the parameter is not zero, |
| 1258 | then the enclosed directives are applied once even if the argument sequence |
| 1259 | is empty. |
| 1260 | |
| 1261 | If the formatting directives are empty then a formatting control is fetched |
| 1262 | using the argument collector associated with the closing delimiter. |
| 1263 | """ |
| 1264 | |
| 1265 | MAXPARAM = 1 |
| 1266 | |
| 1267 | def __init__(me, *args): |
| 1268 | super(FormatIteration, me).__init__(*args) |
| 1269 | me.body, me.end = collect_subformat('}') |
| 1270 | |
| 1271 | def _multi(me, body): |
| 1272 | """ |
| 1273 | Treat the positional arguments as a sequence of argument sets to be |
| 1274 | processed. |
| 1275 | """ |
| 1276 | args = NEXTARG.get() |
| 1277 | with U.Escape() as esc: |
| 1278 | with bind_args(args, multi_escape = FORMAT.escape, escape = esc, |
| 1279 | last_multi_p = not remaining()): |
| 1280 | body.format() |
| 1281 | |
| 1282 | def _single(me, body): |
| 1283 | """ |
| 1284 | Format arguments from a single argument sequence. |
| 1285 | """ |
| 1286 | body.format() |
| 1287 | |
| 1288 | def _loop(me, each, max): |
| 1289 | """ |
| 1290 | Apply the function EACH repeatedly. Stop if no positional arguments |
| 1291 | remain; if MAX is not `None', then stop after that number of iterations. |
| 1292 | The EACH function is passed a formatting operation representing the body |
| 1293 | to be applied |
| 1294 | """ |
| 1295 | if me.body.seq: body = me.body |
| 1296 | else: body = compile(me.end.getarg.get()) |
| 1297 | oncep = me.end.colonp |
| 1298 | i = 0 |
| 1299 | while True: |
| 1300 | if max is not None and i >= max: break |
| 1301 | if (i > 0 or not oncep) and not remaining(): break |
| 1302 | each(body) |
| 1303 | i += 1 |
| 1304 | |
| 1305 | def _format(me, atp, colonp, max = None): |
| 1306 | if colonp: each = me._multi |
| 1307 | else: each = me._single |
| 1308 | with U.Escape() as esc: |
| 1309 | with FORMAT.bind(escape = esc): |
| 1310 | if atp: |
| 1311 | me._loop(each, max) |
| 1312 | else: |
| 1313 | with bind_args(me.getarg.get()): |
| 1314 | me._loop(each, max) |
| 1315 | BASEOPS['{'] = FormatIteration |
| 1316 | |
| 1317 | class FormatEscape (BaseFormatOperation): |
| 1318 | """ |
| 1319 | ~^: Escape from iteration. |
| 1320 | |
| 1321 | Conditionally leave an iteration early. |
| 1322 | |
| 1323 | There may be up to three parameters: call then X, Y and Z. If all three |
| 1324 | are present then exit unless Y is between X and Z (inclusive); if two are |
| 1325 | present then exit if X = Y; if only one is present, then exit if X is |
| 1326 | zero. Obviously these are more useful if at least one of X, Y and Z is |
| 1327 | variable. |
| 1328 | |
| 1329 | With no parameters, exit if there are no positional arguments remaining. |
| 1330 | With `:', check the number of argument sets (as read by `~:{...~}') rather |
| 1331 | than the number of arguments in the current set, and escape from the entire |
| 1332 | iteration rather than from the processing the current set. |
| 1333 | """ |
| 1334 | MAXPARAM = 3 |
| 1335 | def _format(me, atp, colonp, x = None, y = None, z = None): |
| 1336 | if z is not None: cond = x <= y <= z |
| 1337 | elif y is not None: cond = x != y |
| 1338 | elif x is not None: cond = x != 0 |
| 1339 | elif colonp: cond = not FORMAT.last_multi_p |
| 1340 | else: cond = remaining() |
| 1341 | if cond: return |
| 1342 | if colonp: FORMAT.multi_escape() |
| 1343 | else: FORMAT.escape() |
| 1344 | BASEOPS['^'] = FormatEscape |
| 1345 | |
| 1346 | class FormatRecursive (BaseFormatOperation): |
| 1347 | """ |
| 1348 | ~?: Recursive formatting. |
| 1349 | |
| 1350 | Without `@', read a pair of arguments: use the first as a format control, |
| 1351 | and apply it to the arguments extracted from the second (which may be a |
| 1352 | sequence or a map). |
| 1353 | |
| 1354 | With `@', read a single argument: use it as a format string and apply it to |
| 1355 | the remaining arguments. |
| 1356 | """ |
| 1357 | def _format(me, atp, colonp): |
| 1358 | with U.Escape() as esc: |
| 1359 | if atp: |
| 1360 | control = me.getarg.get() |
| 1361 | op = compile(control) |
| 1362 | with FORMAT.bind(escape = esc): op.format() |
| 1363 | else: |
| 1364 | control, args = me.getarg.pair() |
| 1365 | op = compile(control) |
| 1366 | with bind_args(args, escape = esc): op.format() |
| 1367 | BASEOPS['?'] = FormatRecursive |
| 1368 | |
| 1369 | ###----- That's all, folks -------------------------------------------------- |