chiark / gitweb /
peerdb/tripe-newpeers.in: Split out a resolver base class.
[tripe] / peerdb / tripe-newpeers.in
CommitLineData
6005ef9b
MW
1#! @PYTHON@
2### -*-python-*-
3###
4### Build a CDB file from configuration file
5###
6### (c) 2007 Straylight/Edgeware
7###
8
9###----- Licensing notice ---------------------------------------------------
10###
11### This file is part of Trivial IP Encryption (TrIPE).
12###
11ad66c2
MW
13### TrIPE is free software: you can redistribute it and/or modify it under
14### the terms of the GNU General Public License as published by the Free
15### Software Foundation; either version 3 of the License, or (at your
16### option) any later version.
6005ef9b 17###
11ad66c2
MW
18### TrIPE is distributed in the hope that it will be useful, but WITHOUT
19### ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
20### FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21### for more details.
6005ef9b
MW
22###
23### You should have received a copy of the GNU General Public License
11ad66c2 24### along with TrIPE. If not, see <https://www.gnu.org/licenses/>.
6005ef9b
MW
25
26VERSION = '@VERSION@'
27
28###--------------------------------------------------------------------------
29### External dependencies.
30
6005ef9b
MW
31import mLib as M
32from optparse import OptionParser
33import cdb as CDB
34from sys import stdin, stdout, exit, argv
35import re as RX
36import os as OS
97567475 37import socket as S
b7e5aa06 38from cStringIO import StringIO
6005ef9b
MW
39
40###--------------------------------------------------------------------------
41### Utilities.
42
43class CDBFake (object):
44 """Like cdbmake, but just outputs data suitable for cdb-map."""
45 def __init__(me, file = stdout):
46 me.file = file
47 def add(me, key, value):
48 me.file.write('%s:%s\n' % (key, value))
49 def finish(me):
50 pass
51
1c4623dd
MW
52class ExpectedError (Exception): pass
53
6005ef9b
MW
54###--------------------------------------------------------------------------
55### A bulk DNS resolver.
56
1c4623dd 57class ResolverFailure (ExpectedError):
6f48da4a
MW
58 def __init__(me, host, msg):
59 me.host = host
60 me.msg = msg
61 def __str__(me):
62 return "failed to resolve `%s': %s" % (me.host, me.msg)
63
660564a1
MW
64class ResolvingHost (object):
65 """
66 A host name which is being looked up by a bulk-resolver instance.
ef7d7afb
MW
67
68 Most notably, this is where the flag-handling logic lives for the
69 $FLAGS[HOSTNAME] syntax.
660564a1
MW
70 """
71
72 def __init__(me, name):
73 """Make a new resolving-host object for the host NAME."""
74 me.name = name
97567475 75 me.addr = { 'INET': [], 'INET6': [] }
660564a1
MW
76 me.failure = None
77
cc72e4b3
MW
78 def addaddr(me, af, addr):
79 """
80 Add the address ADDR with address family AF.
81
97567475 82 The address family may be `INET' or `INET6'.
cc72e4b3
MW
83 """
84 me.addr[af].append(addr)
660564a1
MW
85
86 def failed(me, msg):
87 """
88 Report that resolution of this host failed, with a human-readable MSG.
89 """
90 me.failure = msg
91
ef7d7afb
MW
92 def get(me, flags):
93 """Return a list of addresses according to the FLAGS string."""
660564a1 94 if me.failure is not None: raise ResolverFailure(me.name, me.failure)
cc72e4b3
MW
95 aa = []
96 a4 = me.addr['INET']
97567475 97 a6 = me.addr['INET6']
cc72e4b3 98 all, any = False, False
ef7d7afb
MW
99 for ch in flags:
100 if ch == '*': all = True
cc72e4b3 101 elif ch == '4': aa += a4; any = True
97567475 102 elif ch == '6': aa += a6; any = True
ef7d7afb 103 else: raise ValueError("unknown address-resolution flag `%s'" % ch)
97567475 104 if not any: aa = a4 + a6
ef7d7afb
MW
105 if not aa: raise ResolverFailure(me.name, 'no matching addresses found')
106 if not all: aa = [aa[0]]
107 return aa
660564a1 108
ad367891 109class BaseBulkResolver (object):
6005ef9b
MW
110 """
111 Resolve a number of DNS names in parallel.
112
113 The BulkResovler resolves a number of hostnames in parallel. Using it
114 works in three phases:
115
116 1. You call prepare(HOSTNAME) a number of times, to feed in the hostnames
117 you're interested in.
118
119 2. You call run() to actually drive the resolver.
120
121 3. You call lookup(HOSTNAME) to get the address you wanted. This will
122 fail with KeyError if the resolver couldn't resolve the HOSTNAME.
123 """
124
125 def __init__(me):
126 """Initialize the resolver."""
6005ef9b 127 me._namemap = {}
81b1fdde 128
660564a1
MW
129 def prepare(me, name):
130 """Prime the resolver to resolve the given host NAME."""
131 if name not in me._namemap:
132 me._namemap[name] = host = ResolvingHost(name)
97567475
MW
133 try:
134 ailist = S.getaddrinfo(name, None, S.AF_UNSPEC, S.SOCK_DGRAM, 0,
135 S.AI_NUMERICHOST | S.AI_NUMERICSERV)
136 except S.gaierror:
137 me._prepare(host, name)
138 else:
139 for af, skty, proto, cname, sa in ailist:
140 if af == S.AF_INET: host.addaddr('INET', sa[0])
141 elif af == S.AF_INET6: host.addaddr('INET6', sa[0])
6005ef9b 142
ef7d7afb 143 def lookup(me, name, flags):
660564a1 144 """Fetch the address corresponding to the host NAME."""
ef7d7afb 145 return me._namemap[name].get(flags)
6005ef9b 146
ad367891
MW
147class BresBulkResolver (BaseBulkResolver):
148 """
149 A BulkResolver using mLib's `bres' background resolver.
150
151 This is always available (and might use ADNS), but only does IPv4.
152 """
153
154 def __init__(me):
155 super(BresBulkResolver, me).__init__()
156 """Initialize the resolver."""
157 me._noutstand = 0
158
159 def _prepare(me, host, name):
160 """Arrange to resolve a NAME, reporting the results to HOST."""
161 host._resolv = M.SelResolveByName(
162 name,
163 lambda cname, alias, addr: me._resolved(host, cname, addr),
164 lambda: me._resolved(host, None, []))
165 me._noutstand += 1
166
167 def run(me):
168 """Run the background DNS resolver until it's finished."""
169 while me._noutstand: M.select()
170
3c8803fa 171 def _resolved(me, host, cname, addr):
ef7d7afb
MW
172 """Callback function: remember that ADDRs are the addresses for HOST."""
173 if not addr:
660564a1
MW
174 host.failed('(unknown failure)')
175 else:
3c8803fa 176 if cname is not None: host.name = cname
cc72e4b3 177 for a in addr: host.addaddr('INET', a)
660564a1
MW
178 host._resolv = None
179 me._noutstand -= 1
6005ef9b 180
ad367891
MW
181## Select a bulk resolver. Currently, there's only one choice.
182BulkResolver = BresBulkResolver
183
6005ef9b
MW
184###--------------------------------------------------------------------------
185### The configuration parser.
186
b7e5aa06
MW
187## Match a comment or empty line.
188RX_COMMENT = RX.compile(r'(?x) ^ \s* (?: $ | [;#])')
189
190## Match a section group header.
191RX_GRPHDR = RX.compile(r'(?x) ^ \s* \[ (.*) \] \s* $')
192
193## Match an assignment line.
194RX_ASSGN = RX.compile(r'''(?x) ^
195 ([^\s:=] (?: [^:=]* [^\s:=])?)
196 \s* [:=] \s*
197 (| \S | \S.*\S)
198 \s* $''')
199
200## Match a continuation line.
201RX_CONT = RX.compile(r'''(?x) ^ \s+
202 (| \S | \S.*\S)
203 \s* $''')
204
6005ef9b 205## Match a $(VAR) configuration variable reference; group 1 is the VAR.
2d51bc9f 206RX_REF = RX.compile(r'(?x) \$ \( ([^)]+) \)')
6005ef9b 207
ef7d7afb
MW
208## Match a $FLAGS[HOST] name resolution reference; group 1 are the flags;
209## group 2 is the HOST.
97567475 210RX_RESOLVE = RX.compile(r'(?x) \$ ([46*]*) \[ ([^]]+) \]')
6005ef9b 211
1c4623dd 212class ConfigSyntaxError (ExpectedError):
b7e5aa06
MW
213 def __init__(me, fname, lno, msg):
214 me.fname = fname
215 me.lno = lno
216 me.msg = msg
217 def __str__(me):
218 return '%s:%d: %s' % (me.fname, me.lno, me.msg)
219
bd3db76c
MW
220def _fmt_path(path):
221 return ' -> '.join(["`%s'" % hop for hop in path])
222
1c4623dd 223class AmbiguousOptionError (ExpectedError):
bd3db76c
MW
224 def __init__(me, key, patha, vala, pathb, valb):
225 me.key = key
226 me.patha, me.vala = patha, vala
227 me.pathb, me.valb = pathb, valb
228 def __str__(me):
229 return "Ambiguous answer resolving key `%s': " \
230 "path %s yields `%s' but %s yields `%s'" % \
231 (me.key, _fmt_path(me.patha), me.vala, _fmt_path(me.pathb), me.valb)
232
1c4623dd 233class InheritanceCycleError (ExpectedError):
bd3db76c
MW
234 def __init__(me, key, path):
235 me.key = key
236 me.path = path
237 def __str__(me):
238 return "Found a cycle %s looking up key `%s'" % \
239 (_fmt_path(me.path), me.key)
240
1c4623dd 241class MissingSectionException (ExpectedError):
e3ec3a3a 242 def __init__(me, sec):
260dce8e 243 me.sec = sec
e3ec3a3a
MW
244 def __str__(me):
245 return "Section `%s' not found" % (me.sec)
246
1c4623dd 247class MissingKeyException (ExpectedError):
bd3db76c
MW
248 def __init__(me, sec, key):
249 me.sec = sec
250 me.key = key
251 def __str__(me):
252 return "Key `%s' not found in section `%s'" % (me.key, me.sec)
253
e3ec3a3a
MW
254class ConfigSection (object):
255 """
256 A section in a configuration parser.
257
258 This is where a lot of the nitty-gritty stuff actually happens. The
259 `MyConfigParser' knows a lot about the internals of this class, which saves
260 on building a complicated interface.
261 """
262
263 def __init__(me, name, cp):
264 """Initialize a new, empty section with a given NAME and parent CP."""
886350e8
MW
265
266 ## The cache maps item keys to entries, which consist of a pair of
267 ## objects. There are four possible states for a cache entry:
268 ##
269 ## * missing -- there is no entry at all with this key, so we must
270 ## search for it;
271 ##
272 ## * None, None -- we are actively trying to resolve this key, so if we
273 ## encounter this state, we have found a cycle in the inheritance
274 ## graph;
275 ##
276 ## * None, [] -- we know that this key isn't reachable through any of
277 ## our parents;
278 ##
279 ## * VALUE, PATH -- we know that the key resolves to VALUE, along the
280 ## PATH from us (exclusive) to the defining parent (inclusive).
e3ec3a3a
MW
281 me.name = name
282 me._itemmap = dict()
886350e8 283 me._cache = dict()
e3ec3a3a
MW
284 me._cp = cp
285
286 def _expand(me, string, resolvep):
287 """
ef7d7afb 288 Expands $(...) and (optionally) $FLAGS[...] placeholders in STRING.
e3ec3a3a
MW
289
290 RESOLVEP is a boolean switch: do we bother to tax the resolver or not?
291 This is turned off by MyConfigParser's resolve() method while it's
292 collecting hostnames to be resolved.
293 """
ef7d7afb 294 string = RX_REF.sub(lambda m: me.get(m.group(1), resolvep), string)
e3ec3a3a 295 if resolvep:
ef7d7afb
MW
296 string = RX_RESOLVE.sub(
297 lambda m: ' '.join(me._cp._resolver.lookup(m.group(2), m.group(1))),
298 string)
e3ec3a3a
MW
299 return string
300
4251f8ad
MW
301 def _parents(me):
302 """Yield this section's parents."""
303 try: names = me._itemmap['@inherit']
304 except KeyError: return
305 for name in names.replace(',', ' ').split():
306 yield me._cp.section(name)
307
886350e8 308 def _get(me, key, path = None):
e3ec3a3a
MW
309 """
310 Low-level option-fetching method.
311
312 Fetch the value for the named KEY in this section, or maybe (recursively)
313 a section which it inherits from.
314
315 Returns a pair VALUE, PATH. The value is not expanded; nor do we check
316 for the special `name' key. The caller is expected to do these things.
317 Returns None if no value could be found.
318 """
319
886350e8 320 ## If we weren't given a path, then we'd better make one.
e3ec3a3a
MW
321 if path is None: path = []
322
323 ## Extend the path to cover us, but remember to remove us again when
324 ## we've finished. If we need to pass the current path back upwards,
325 ## then remember to take a copy.
326 path.append(me.name)
327 try:
328
886350e8
MW
329 ## If we've been this way before on another pass through then return the
330 ## value we found then. If we're still thinking about it then we've
331 ## found a cycle.
332 try: v, p = me._cache[key]
e3ec3a3a
MW
333 except KeyError: pass
334 else:
886350e8
MW
335 if p is None: raise InheritanceCycleError(key, path[:])
336 else: return v, path + p
e3ec3a3a
MW
337
338 ## See whether the answer is ready waiting for us.
339 try: v = me._itemmap[key]
340 except KeyError: pass
886350e8
MW
341 else:
342 p = path[:]
343 me._cache[key] = v, []
344 return v, p
e3ec3a3a 345
e3ec3a3a
MW
346 ## Initially we have no idea.
347 value = None
886350e8 348 winner = []
e3ec3a3a
MW
349
350 ## Go through our parents and ask them what they think.
886350e8 351 me._cache[key] = None, None
4251f8ad 352 for p in me._parents():
e3ec3a3a
MW
353
354 ## See whether we get an answer. If not, keep on going.
886350e8 355 v, pp = p._get(key, path)
e3ec3a3a
MW
356 if v is None: continue
357
358 ## If we got an answer, check that it matches any previous ones.
359 if value is None:
360 value = v
361 winner = pp
362 elif value != v:
363 raise AmbiguousOptionError(key, winner, value, pp, v)
364
365 ## That's the best we could manage.
886350e8 366 me._cache[key] = value, winner[len(path):]
e3ec3a3a
MW
367 return value, winner
368
369 finally:
370 ## Remove us from the path again.
371 path.pop()
372
373 def get(me, key, resolvep = True):
374 """
375 Retrieve the value of KEY from this section.
376 """
377
378 ## Special handling for the `name' key.
379 if key == 'name':
380 value = me._itemmap.get('name', me.name)
7dd9d51f
MW
381 elif key == '@inherits':
382 try: return me._itemmap['@inherits']
383 except KeyError: raise MissingKeyException(me.name, key)
e3ec3a3a
MW
384 else:
385 value, _ = me._get(key)
386 if value is None:
387 raise MissingKeyException(me.name, key)
388
389 ## Expand the value and return it.
390 return me._expand(value, resolvep)
391
392 def items(me, resolvep = True):
393 """
85341d9c 394 Yield a list of item names in the section.
e3ec3a3a
MW
395 """
396
397 ## Initialize for a depth-first walk of the inheritance graph.
4063c2b5 398 seen = { 'name': True }
f417591a 399 visiting = { me.name: True }
4251f8ad 400 stack = [me]
e3ec3a3a
MW
401
402 ## Visit nodes, collecting their keys. Don't believe the values:
403 ## resolving inheritance is too hard to do like this.
404 while stack:
4251f8ad 405 sec = stack.pop()
f417591a
MW
406 for p in sec._parents():
407 if p.name not in visiting:
408 stack.append(p); visiting[p.name] = True
e3ec3a3a 409
7dd9d51f 410 for key in sec._itemmap.iterkeys(): seen[key] = None
e3ec3a3a 411
e3ec3a3a 412 ## And we're done.
6e5794ef 413 return seen.iterkeys()
e3ec3a3a 414
b7e5aa06 415class MyConfigParser (object):
6005ef9b
MW
416 """
417 A more advanced configuration parser.
418
b7e5aa06 419 This has four major enhancements over the standard ConfigParser which are
6005ef9b
MW
420 relevant to us.
421
422 * It recognizes `@inherits' keys and follows them when expanding a
423 value.
424
425 * It recognizes `$(VAR)' references to configuration variables during
426 expansion and processes them correctly.
427
ef7d7afb 428 * It recognizes `$FLAGS[HOST]' name-resolver requests and handles them
97567475
MW
429 correctly. FLAGS consists of characters `4' (IPv4 addresses), `6'
430 (IPv6 addresses), and `*' (all, space-separated, rather than just the
431 first).
6005ef9b 432
b7e5aa06
MW
433 * Its parsing behaviour is well-defined.
434
6005ef9b
MW
435 Use:
436
b7e5aa06 437 1. Call parse(FILENAME) to slurp in the configuration data.
6005ef9b
MW
438
439 2. Call resolve() to collect the hostnames which need to be resolved and
440 actually do the name resolution.
441
e3ec3a3a
MW
442 3. Call sections() to get a list of the configuration sections, or
443 section(NAME) to find a named section.
444
445 4. Call get(ITEM) on a section to collect the results, or items() to
6005ef9b
MW
446 iterate over them.
447 """
448
449 def __init__(me):
450 """
451 Initialize a new, empty configuration parser.
452 """
b7e5aa06 453 me._sectmap = dict()
6005ef9b
MW
454 me._resolver = BulkResolver()
455
b7e5aa06
MW
456 def parse(me, f):
457 """
458 Parse configuration from a file F.
459 """
460
461 ## Initial parser state.
462 sect = None
463 key = None
464 val = None
465 lno = 0
466
467 ## An unpleasant hack. Python makes it hard to capture a value in a
468 ## variable and examine it in a single action, and this is the best that
469 ## I came up with.
470 m = [None]
471 def match(rx): m[0] = rx.match(line); return m[0]
472
473 ## Commit a key's value when we've determined that there are no further
474 ## continuation lines.
475 def flush():
e3ec3a3a 476 if key is not None: sect._itemmap[key] = val.getvalue()
b7e5aa06
MW
477
478 ## Work through all of the input lines.
479 for line in f:
480 lno += 1
481
482 if match(RX_COMMENT):
483 ## A comment or a blank line. Nothing doing. (This means that we
484 ## leave out blank lines which look like they might be continuation
485 ## lines.)
486
487 pass
488
489 elif match(RX_GRPHDR):
490 ## A section header. Flush out any previous value and set up the new
491 ## group.
492
493 flush()
494 name = m[0].group(1)
495 try: sect = me._sectmap[name]
e3ec3a3a 496 except KeyError: sect = me._sectmap[name] = ConfigSection(name, me)
b7e5aa06
MW
497 key = None
498
499 elif match(RX_ASSGN):
500 ## A new assignment. Flush out the old one, and set up to store this
501 ## one.
502
503 if sect is None:
504 raise ConfigSyntaxError(f.name, lno, 'no active section to update')
505 flush()
506 key = m[0].group(1)
507 val = StringIO(); val.write(m[0].group(2))
508
509 elif match(RX_CONT):
510 ## A continuation line. Accumulate the value.
511
512 if key is None:
513 raise ConfigSyntaxError(f.name, lno, 'no config value to continue')
514 val.write('\n'); val.write(m[0].group(1))
515
516 else:
517 ## Something else.
518
519 raise ConfigSyntaxError(f.name, lno, 'incomprehensible line')
520
521 ## Don't forget to commit any final value material.
522 flush()
523
e3ec3a3a
MW
524 def section(me, name):
525 """Return a ConfigSection with the given NAME."""
526 try: return me._sectmap[name]
527 except KeyError: raise MissingSectionException(name)
528
b7e5aa06 529 def sections(me):
e3ec3a3a
MW
530 """Yield the known sections."""
531 return me._sectmap.itervalues()
b7e5aa06 532
6005ef9b
MW
533 def resolve(me):
534 """
535 Works out all of the hostnames which need resolving and resolves them.
536
537 Until you call this, attempts to fetch configuration items which need to
538 resolve hostnames will fail!
539 """
e3ec3a3a 540 for sec in me.sections():
85341d9c
MW
541 for key in sec.items():
542 value = sec.get(key, resolvep = False)
2d51bc9f 543 for match in RX_RESOLVE.finditer(value):
ef7d7afb 544 me._resolver.prepare(match.group(2))
6005ef9b
MW
545 me._resolver.run()
546
6005ef9b
MW
547###--------------------------------------------------------------------------
548### Command-line handling.
549
550def inputiter(things):
551 """
552 Iterate over command-line arguments, returning corresponding open files.
553
554 If none were given, or one is `-', assume standard input; if one is a
555 directory, scan it for files other than backups; otherwise return the
556 opened files.
557 """
558
559 if not things:
560 if OS.isatty(stdin.fileno()):
561 M.die('no input given, and stdin is a terminal')
562 yield stdin
563 else:
564 for thing in things:
565 if thing == '-':
566 yield stdin
567 elif OS.path.isdir(thing):
568 for item in OS.listdir(thing):
569 if item.endswith('~') or item.endswith('#'):
570 continue
571 name = OS.path.join(thing, item)
572 if not OS.path.isfile(name):
573 continue
574 yield file(name)
575 else:
576 yield file(thing)
577
578def parse_options(argv = argv):
579 """
580 Parse command-line options, returning a pair (OPTS, ARGS).
581 """
582 M.ego(argv[0])
583 op = OptionParser(usage = '%prog [-c CDB] INPUT...',
584 version = '%%prog (tripe, version %s)' % VERSION)
585 op.add_option('-c', '--cdb', metavar = 'CDB',
586 dest = 'cdbfile', default = None,
587 help = 'Compile output into a CDB file.')
588 opts, args = op.parse_args(argv)
589 return opts, args
590
591###--------------------------------------------------------------------------
592### Main code.
593
594def getconf(args):
595 """
596 Read the configuration files and return the accumulated result.
597
598 We make sure that all hostnames have been properly resolved.
599 """
600 conf = MyConfigParser()
601 for f in inputiter(args):
b7e5aa06 602 conf.parse(f)
6005ef9b
MW
603 conf.resolve()
604 return conf
605
606def output(conf, cdb):
607 """
608 Output the configuration information CONF to the database CDB.
609
610 This is where the special `user' and `auto' database entries get set.
611 """
612 auto = []
e3ec3a3a
MW
613 for sec in sorted(conf.sections(), key = lambda sec: sec.name):
614 if sec.name.startswith('@'):
6005ef9b 615 continue
e3ec3a3a
MW
616 elif sec.name.startswith('$'):
617 label = sec.name
6005ef9b 618 else:
e3ec3a3a 619 label = 'P%s' % sec.name
fd1ba90c
MW
620 try: a = sec.get('auto')
621 except MissingKeyException: pass
622 else:
623 if a in ('y', 'yes', 't', 'true', '1', 'on'): auto.append(sec.name)
624 try: u = sec.get('user')
625 except MissingKeyException: pass
626 else: cdb.add('U%s' % u)
6090fc43 627 url = M.URLEncode(semip = True)
85341d9c 628 for key in sorted(sec.items()):
6005ef9b 629 if not key.startswith('@'):
6090fc43 630 url.encode(key, sec.get(key))
6005ef9b
MW
631 cdb.add(label, url.result)
632 cdb.add('%AUTO', ' '.join(auto))
633 cdb.finish()
634
635def main():
636 """Main program."""
637 opts, args = parse_options()
638 if opts.cdbfile:
639 cdb = CDB.cdbmake(opts.cdbfile, opts.cdbfile + '.new')
640 else:
641 cdb = CDBFake()
1c4623dd
MW
642 try:
643 conf = getconf(args[1:])
644 output(conf, cdb)
645 except ExpectedError, e:
646 M.moan(str(e))
647 exit(2)
6005ef9b
MW
648
649if __name__ == '__main__':
650 main()
651
652###----- That's all, folks --------------------------------------------------