X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/tripe/blobdiff_plain/2d51bc9fb4f3f7f10b28d1931058214925afcdf8..4063c2b5a87394841a65addf392b4058be52e942:/peerdb/tripe-newpeers.in diff --git a/peerdb/tripe-newpeers.in b/peerdb/tripe-newpeers.in index 59fd85fa..00361d40 100644 --- a/peerdb/tripe-newpeers.in +++ b/peerdb/tripe-newpeers.in @@ -28,13 +28,13 @@ VERSION = '@VERSION@' ###-------------------------------------------------------------------------- ### External dependencies. -import ConfigParser as CP import mLib as M from optparse import OptionParser import cdb as CDB from sys import stdin, stdout, exit, argv import re as RX import os as OS +from cStringIO import StringIO ###-------------------------------------------------------------------------- ### Utilities. @@ -103,12 +103,38 @@ class BulkResolver (object): ###-------------------------------------------------------------------------- ### The configuration parser. +## Match a comment or empty line. +RX_COMMENT = RX.compile(r'(?x) ^ \s* (?: $ | [;#])') + +## Match a section group header. +RX_GRPHDR = RX.compile(r'(?x) ^ \s* \[ (.*) \] \s* $') + +## Match an assignment line. +RX_ASSGN = RX.compile(r'''(?x) ^ + ([^\s:=] (?: [^:=]* [^\s:=])?) + \s* [:=] \s* + (| \S | \S.*\S) + \s* $''') + +## Match a continuation line. +RX_CONT = RX.compile(r'''(?x) ^ \s+ + (| \S | \S.*\S) + \s* $''') + ## Match a $(VAR) configuration variable reference; group 1 is the VAR. RX_REF = RX.compile(r'(?x) \$ \( ([^)]+) \)') ## Match a $[HOST] name resolution reference; group 1 is the HOST. RX_RESOLVE = RX.compile(r'(?x) \$ \[ ([^]]+) \]') +class ConfigSyntaxError (Exception): + def __init__(me, fname, lno, msg): + me.fname = fname + me.lno = lno + me.msg = msg + def __str__(me): + return '%s:%d: %s' % (me.fname, me.lno, me.msg) + def _fmt_path(path): return ' -> '.join(["`%s'" % hop for hop in path]) @@ -130,6 +156,12 @@ class InheritanceCycleError (Exception): return "Found a cycle %s looking up key `%s'" % \ (_fmt_path(me.path), me.key) +class MissingSectionException (Exception): + def __init__(me, sec): + me.key = key + def __str__(me): + return "Section `%s' not found" % (me.sec) + class MissingKeyException (Exception): def __init__(me, sec, key): me.sec = sec @@ -137,131 +169,108 @@ class MissingKeyException (Exception): def __str__(me): return "Key `%s' not found in section `%s'" % (me.key, me.sec) -class MyConfigParser (CP.RawConfigParser): +class ConfigSection (object): """ - A more advanced configuration parser. - - This has three major enhancements over the standard ConfigParser which are - relevant to us. - - * It recognizes `@inherits' keys and follows them when expanding a - value. - - * It recognizes `$(VAR)' references to configuration variables during - expansion and processes them correctly. - - * It recognizes `$[HOST]' name-resolver requests and handles them - correctly. - - Use: - - 1. Call read(FILENAME) and/or read(FP, [FILENAME]) to slurp in the - configuration data. - - 2. Call resolve() to collect the hostnames which need to be resolved and - actually do the name resolution. + A section in a configuration parser. - 3. Call get(SECTION, ITEM) to collect the results, or items(SECTION) to - iterate over them. + This is where a lot of the nitty-gritty stuff actually happens. The + `MyConfigParser' knows a lot about the internals of this class, which saves + on building a complicated interface. """ - def __init__(me): - """ - Initialize a new, empty configuration parser. - """ - CP.RawConfigParser.__init__(me) - me._resolver = BulkResolver() - - def resolve(me): - """ - Works out all of the hostnames which need resolving and resolves them. - - Until you call this, attempts to fetch configuration items which need to - resolve hostnames will fail! - """ - for sec in me.sections(): - for key, value in me.items(sec, resolvep = False): - for match in RX_RESOLVE.finditer(value): - me._resolver.prepare(match.group(1)) - me._resolver.run() - - def _expand(me, sec, string, resolvep): + def __init__(me, name, cp): + """Initialize a new, empty section with a given NAME and parent CP.""" + + ## The cache maps item keys to entries, which consist of a pair of + ## objects. There are four possible states for a cache entry: + ## + ## * missing -- there is no entry at all with this key, so we must + ## search for it; + ## + ## * None, None -- we are actively trying to resolve this key, so if we + ## encounter this state, we have found a cycle in the inheritance + ## graph; + ## + ## * None, [] -- we know that this key isn't reachable through any of + ## our parents; + ## + ## * VALUE, PATH -- we know that the key resolves to VALUE, along the + ## PATH from us (exclusive) to the defining parent (inclusive). + me.name = name + me._itemmap = dict() + me._cache = dict() + me._cp = cp + + def _expand(me, string, resolvep): """ Expands $(...) and (optionally) $[...] placeholders in STRING. - The SEC is the configuration section from which to satisfy $(...) - requests. RESOLVEP is a boolean switch: do we bother to tax the resolver - or not? This is turned off by the resolve() method while it's collecting - hostnames to be resolved. + RESOLVEP is a boolean switch: do we bother to tax the resolver or not? + This is turned off by MyConfigParser's resolve() method while it's + collecting hostnames to be resolved. """ string = RX_REF.sub \ - (lambda m: me.get(sec, m.group(1), resolvep), string) + (lambda m: me.get(m.group(1), resolvep), string) if resolvep: - string = RX_RESOLVE.sub(lambda m: me._resolver.lookup(m.group(1)), + string = RX_RESOLVE.sub(lambda m: me._cp._resolver.lookup(m.group(1)), string) return string - def has_option(me, sec, key): - """ - Decide whether section SEC has a configuration key KEY. + def _parents(me): + """Yield this section's parents.""" + try: names = me._itemmap['@inherit'] + except KeyError: return + for name in names.replace(',', ' ').split(): + yield me._cp.section(name) - This version of the method properly handles the @inherit key. - """ - return key == 'name' or me._get(sec, key)[0] is not None - - def _get(me, sec, key, map = None, path = None): + def _get(me, key, path = None): """ Low-level option-fetching method. - Fetch the value for the named KEY from section SEC, or maybe - (recursively) a section which SEC inherits from. + Fetch the value for the named KEY in this section, or maybe (recursively) + a section which it inherits from. Returns a pair VALUE, PATH. The value is not expanded; nor do we check for the special `name' key. The caller is expected to do these things. Returns None if no value could be found. """ - ## If we weren't given a memoization map or path, then we'd better make - ## one. - if map is None: map = {} + ## If we weren't given a path, then we'd better make one. if path is None: path = [] - ## Extend the path to cover the lookup section, but remember to remove us - ## again when we've finished. If we need to pass the current path back - ## upwards, then remember to take a copy. - path.append(sec) + ## Extend the path to cover us, but remember to remove us again when + ## we've finished. If we need to pass the current path back upwards, + ## then remember to take a copy. + path.append(me.name) try: - ## If we've been this way before on another pass through then return - ## the value we found then. If we're still thinking about it then - ## we've found a cycle. - try: threadp, value = map[sec] + ## If we've been this way before on another pass through then return the + ## value we found then. If we're still thinking about it then we've + ## found a cycle. + try: v, p = me._cache[key] except KeyError: pass else: - if threadp: raise InheritanceCycleError(key, path[:]) + if p is None: raise InheritanceCycleError(key, path[:]) + else: return v, path + p ## See whether the answer is ready waiting for us. - try: v = CP.RawConfigParser.get(me, sec, key) - except CP.NoOptionError: pass - else: return v, path[:] - - ## No, apparently, not. Find out our list of parents. - try: - parents = CP.RawConfigParser.get(me, sec, '@inherit').\ - replace(',', ' ').split() - except CP.NoOptionError: - parents = [] + try: v = me._itemmap[key] + except KeyError: pass + else: + p = path[:] + me._cache[key] = v, [] + return v, p ## Initially we have no idea. value = None - winner = None + winner = [] ## Go through our parents and ask them what they think. - map[sec] = True, None - for p in parents: + me._cache[key] = None, None + for p in me._parents(): ## See whether we get an answer. If not, keep on going. - v, pp = me._get(p, key, map, path) + v, pp = p._get(key, path) if v is None: continue ## If we got an answer, check that it matches any previous ones. @@ -272,60 +281,184 @@ class MyConfigParser (CP.RawConfigParser): raise AmbiguousOptionError(key, winner, value, pp, v) ## That's the best we could manage. - map[sec] = False, value + me._cache[key] = value, winner[len(path):] return value, winner finally: ## Remove us from the path again. path.pop() - def get(me, sec, key, resolvep = True): + def get(me, key, resolvep = True): """ - Retrieve the value of KEY from section SEC. + Retrieve the value of KEY from this section. """ ## Special handling for the `name' key. if key == 'name': - try: value = CP.RawConfigParser.get(me, sec, key) - except CP.NoOptionError: value = sec + value = me._itemmap.get('name', me.name) + elif key == '@inherits': + try: return me._itemmap['@inherits'] + except KeyError: raise MissingKeyException(me.name, key) else: - value, _ = me._get(sec, key) + value, _ = me._get(key) if value is None: - raise MissingKeyException(sec, key) + raise MissingKeyException(me.name, key) ## Expand the value and return it. - return me._expand(sec, value, resolvep) + return me._expand(value, resolvep) - def items(me, sec, resolvep = True): + def items(me, resolvep = True): """ - Return a list of (NAME, VALUE) items in section SEC. - - This extends the default method by handling the inheritance chain. + Yield a list of item names in the section. """ ## Initialize for a depth-first walk of the inheritance graph. - d = {} - visited = {} - basesec = sec - stack = [sec] + seen = { 'name': True } + visiting = { me.name: True } + stack = [me] ## Visit nodes, collecting their keys. Don't believe the values: ## resolving inheritance is too hard to do like this. while stack: sec = stack.pop() - if sec in visited: continue - visited[sec] = True - - for key, value in CP.RawConfigParser.items(me, sec): - if key == '@inherit': stack += value.replace(',', ' ').split() - else: d[key] = None + for p in sec._parents(): + if p.name not in visiting: + stack.append(p); visiting[p.name] = True - ## Now collect the values for the known keys, one by one. - items = [] - for key in d: items.append((key, me.get(basesec, key, resolvep))) + for key in sec._itemmap.iterkeys(): seen[key] = None ## And we're done. - return items + return seen.iterkeys() + +class MyConfigParser (object): + """ + A more advanced configuration parser. + + This has four major enhancements over the standard ConfigParser which are + relevant to us. + + * It recognizes `@inherits' keys and follows them when expanding a + value. + + * It recognizes `$(VAR)' references to configuration variables during + expansion and processes them correctly. + + * It recognizes `$[HOST]' name-resolver requests and handles them + correctly. + + * Its parsing behaviour is well-defined. + + Use: + + 1. Call parse(FILENAME) to slurp in the configuration data. + + 2. Call resolve() to collect the hostnames which need to be resolved and + actually do the name resolution. + + 3. Call sections() to get a list of the configuration sections, or + section(NAME) to find a named section. + + 4. Call get(ITEM) on a section to collect the results, or items() to + iterate over them. + """ + + def __init__(me): + """ + Initialize a new, empty configuration parser. + """ + me._sectmap = dict() + me._resolver = BulkResolver() + + def parse(me, f): + """ + Parse configuration from a file F. + """ + + ## Initial parser state. + sect = None + key = None + val = None + lno = 0 + + ## An unpleasant hack. Python makes it hard to capture a value in a + ## variable and examine it in a single action, and this is the best that + ## I came up with. + m = [None] + def match(rx): m[0] = rx.match(line); return m[0] + + ## Commit a key's value when we've determined that there are no further + ## continuation lines. + def flush(): + if key is not None: sect._itemmap[key] = val.getvalue() + + ## Work through all of the input lines. + for line in f: + lno += 1 + + if match(RX_COMMENT): + ## A comment or a blank line. Nothing doing. (This means that we + ## leave out blank lines which look like they might be continuation + ## lines.) + + pass + + elif match(RX_GRPHDR): + ## A section header. Flush out any previous value and set up the new + ## group. + + flush() + name = m[0].group(1) + try: sect = me._sectmap[name] + except KeyError: sect = me._sectmap[name] = ConfigSection(name, me) + key = None + + elif match(RX_ASSGN): + ## A new assignment. Flush out the old one, and set up to store this + ## one. + + if sect is None: + raise ConfigSyntaxError(f.name, lno, 'no active section to update') + flush() + key = m[0].group(1) + val = StringIO(); val.write(m[0].group(2)) + + elif match(RX_CONT): + ## A continuation line. Accumulate the value. + + if key is None: + raise ConfigSyntaxError(f.name, lno, 'no config value to continue') + val.write('\n'); val.write(m[0].group(1)) + + else: + ## Something else. + + raise ConfigSyntaxError(f.name, lno, 'incomprehensible line') + + ## Don't forget to commit any final value material. + flush() + + def section(me, name): + """Return a ConfigSection with the given NAME.""" + try: return me._sectmap[name] + except KeyError: raise MissingSectionException(name) + + def sections(me): + """Yield the known sections.""" + return me._sectmap.itervalues() + + def resolve(me): + """ + Works out all of the hostnames which need resolving and resolves them. + + Until you call this, attempts to fetch configuration items which need to + resolve hostnames will fail! + """ + for sec in me.sections(): + for key in sec.items(): + value = sec.get(key, resolvep = False) + for match in RX_RESOLVE.finditer(value): + me._resolver.prepare(match.group(1)) + me._resolver.run() ###-------------------------------------------------------------------------- ### Command-line handling. @@ -382,7 +515,7 @@ def getconf(args): """ conf = MyConfigParser() for f in inputiter(args): - conf.readfp(f) + conf.parse(f) conf.resolve() return conf @@ -393,22 +526,24 @@ def output(conf, cdb): This is where the special `user' and `auto' database entries get set. """ auto = [] - for sec in sorted(conf.sections()): - if sec.startswith('@'): + for sec in sorted(conf.sections(), key = lambda sec: sec.name): + if sec.name.startswith('@'): continue - elif sec.startswith('$'): - label = sec + elif sec.name.startswith('$'): + label = sec.name else: - label = 'P%s' % sec - if conf.has_option(sec, 'auto') and \ - conf.get(sec, 'auto') in ('y', 'yes', 't', 'true', '1', 'on'): - auto.append(sec) - if conf.has_option(sec, 'user'): - cdb.add('U%s' % conf.get(sec, 'user'), sec) - url = M.URLEncode(laxp = True, semip = True) - for key, value in sorted(conf.items(sec), key = lambda (k, v): k): + label = 'P%s' % sec.name + try: a = sec.get('auto') + except MissingKeyException: pass + else: + if a in ('y', 'yes', 't', 'true', '1', 'on'): auto.append(sec.name) + try: u = sec.get('user') + except MissingKeyException: pass + else: cdb.add('U%s' % u) + url = M.URLEncode(semip = True) + for key in sorted(sec.items()): if not key.startswith('@'): - url.encode(key, ' '.join(M.split(value)[0])) + url.encode(key, sec.get(key)) cdb.add(label, url.result) cdb.add('%AUTO', ' '.join(auto)) cdb.finish()