#! @PYTHON@
### -*-python-*-
###
### Build a CDB file from configuration file
###
### (c) 2007 Straylight/Edgeware
###

###----- Licensing notice ---------------------------------------------------
###
### This file is part of Trivial IP Encryption (TrIPE).
###
### TrIPE is free software: you can redistribute it and/or modify it under
### the terms of the GNU General Public License as published by the Free
### Software Foundation; either version 3 of the License, or (at your
### option) any later version.
###
### TrIPE is distributed in the hope that it will be useful, but WITHOUT
### ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
### FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
### for more details.
###
### You should have received a copy of the GNU General Public License
### along with TrIPE.  If not, see <https://www.gnu.org/licenses/>.

VERSION = '@VERSION@'

###--------------------------------------------------------------------------
### External dependencies.

import mLib as M
from optparse import OptionParser
import cdb as CDB
from sys import stdin, stdout, exit, argv
import subprocess as SUB
import re as RX
import os as OS
import errno as E
import fcntl as F
import socket as S
from cStringIO import StringIO

###--------------------------------------------------------------------------
### Utilities.

class CDBFake (object):
  """Like cdbmake, but just outputs data suitable for cdb-map."""
  def __init__(me, file = stdout):
    me.file = file
  def add(me, key, value):
    me.file.write('%s:%s\n' % (key, value))
  def finish(me):
    pass

class ExpectedError (Exception): pass

###--------------------------------------------------------------------------
### A bulk DNS resolver.

class ResolverFailure (ExpectedError):
  def __init__(me, host, msg):
    me.host = host
    me.msg = msg
  def __str__(me):
    return "failed to resolve `%s': %s" % (me.host, me.msg)

class ResolvingHost (object):
  """
  A host name which is being looked up by a bulk-resolver instance.

  Most notably, this is where the flag-handling logic lives for the
  $FLAGS[HOSTNAME] syntax.
  """

  def __init__(me, name):
    """Make a new resolving-host object for the host NAME."""
    me.name = name
    me.addr = { 'INET': [], 'INET6': [] }
    me.failure = None

  def addaddr(me, af, addr):
    """
    Add the address ADDR with address family AF.

    The address family may be `INET' or `INET6'.
    """
    me.addr[af].append(addr)

  def failed(me, msg):
    """
    Report that resolution of this host failed, with a human-readable MSG.
    """
    me.failure = msg

  def get(me, flags):
    """Return a list of addresses according to the FLAGS string."""
    if me.failure is not None: raise ResolverFailure(me.name, me.failure)
    aa = []
    a4 = me.addr['INET']
    a6 = me.addr['INET6']
    all, any = False, False
    for ch in flags:
      if ch == '*': all = True
      elif ch == '4': aa += a4; any = True
      elif ch == '6': aa += a6; any = True
      else: raise ValueError("unknown address-resolution flag `%s'" % ch)
    if not any: aa = a4 + a6
    if not aa: raise ResolverFailure(me.name, 'no matching addresses found')
    if not all: aa = [aa[0]]
    return aa

class BaseBulkResolver (object):
  """
  Resolve a number of DNS names in parallel.

  The BulkResovler resolves a number of hostnames in parallel.  Using it
  works in three phases:

    1. You call prepare(HOSTNAME) a number of times, to feed in the hostnames
       you're interested in.

    2. You call run() to actually drive the resolver.

    3. You call lookup(HOSTNAME) to get the address you wanted.  This will
       fail with KeyError if the resolver couldn't resolve the HOSTNAME.
  """

  def __init__(me):
    """Initialize the resolver."""
    me._namemap = {}

  def prepare(me, name):
    """Prime the resolver to resolve the given host NAME."""
    if name not in me._namemap:
      me._namemap[name] = host = ResolvingHost(name)
      try:
        ailist = S.getaddrinfo(name, None, S.AF_UNSPEC, S.SOCK_DGRAM, 0,
                               S.AI_NUMERICHOST | S.AI_NUMERICSERV)
      except S.gaierror:
        me._prepare(host, name)
      else:
        for af, skty, proto, cname, sa in ailist:
          if af == S.AF_INET: host.addaddr('INET', sa[0])
          elif af == S.AF_INET6: host.addaddr('INET6', sa[0])

  def lookup(me, name, flags):
    """Fetch the address corresponding to the host NAME."""
    return me._namemap[name].get(flags)

class BresBulkResolver (BaseBulkResolver):
  """
  A BulkResolver using mLib's `bres' background resolver.

  This is always available (and might use ADNS), but only does IPv4.
  """

  def __init__(me):
    super(BresBulkResolver, me).__init__()
    """Initialize the resolver."""
    me._noutstand = 0

  def _prepare(me, host, name):
    """Arrange to resolve a NAME, reporting the results to HOST."""
    host._resolv = M.SelResolveByName(
      name,
      lambda cname, alias, addr: me._resolved(host, cname, addr),
      lambda: me._resolved(host, None, []))
    me._noutstand += 1

  def run(me):
    """Run the background DNS resolver until it's finished."""
    while me._noutstand: M.select()

  def _resolved(me, host, cname, addr):
    """Callback function: remember that ADDRs are the addresses for HOST."""
    if not addr:
      host.failed('(unknown failure)')
    else:
      if cname is not None: host.name = cname
      for a in addr: host.addaddr('INET', a)
    host._resolv = None
    me._noutstand -= 1

class AdnsBulkResolver (BaseBulkResolver):
  """
  A BulkResolver using ADNS, via the `adnshost' command-line tool.

  This can do simultaneous IPv4 and IPv6 lookups and is quite shiny.
  """

  def __init__(me):
    """Initialize the resolver."""

    super(AdnsBulkResolver, me).__init__()

    ## Start the external resolver process.
    me._kid = SUB.Popen(['adnshost', '-afs'],
                        stdin = SUB.PIPE, stdout = SUB.PIPE)

    ## Set up the machinery for feeding input to the resolver.
    me._in = me._kid.stdin
    M.fdflags(me._in, fbic = OS.O_NONBLOCK, fxor = OS.O_NONBLOCK)
    me._insel = M.SelFile(me._in.fileno(), M.SEL_WRITE, me._write)
    me._inbuf, me._inoff, me._inlen = '', 0, 0
    me._idmap = {}
    me._nextid = 0

    ## Set up the machinery for collecting the resolver's output.
    me._out = me._kid.stdout
    M.fdflags(me._out, fbic = OS.O_NONBLOCK, fxor = OS.O_NONBLOCK)
    me._outline = M.SelLineBuffer(me._out,
                                  lineproc = me._hostline, eofproc = me._eof)
    me._outline.enable()

    ## It's not finished yet.
    me._done = False

  def _prepare(me, host, name):
    """Arrange for the resolver to resolve the name NAME."""

    ## Work out the next job id, and associate that with the host record.
    host.id = me._nextid; me._nextid += 1
    me._namemap[name] = me._idmap[host.id] = host

    ## Feed the name to the resolver process.
    me._inbuf += name + '\n'
    me._inlen += len(name) + 1
    if not me._insel.activep: me._insel.enable()
    while me._inoff < me._inlen: M.select()

  def _write(me):
    """Write material from `_inbuf' to the resolver when it's ready."""

    ## Try to feed some more material to the resolver.
    try: n = OS.write(me._in.fileno(), me._inbuf[me._inoff:])
    except OSError, e:
      if e.errno == E.EAGAIN or e.errno == E.EWOULDBLOCK: return
      else: raise

    ## If we're done, then clear the buffer.
    me._inoff += n
    if me._inoff >= me._inlen:
      me._insel.disable()
      me._inbuf, me._inoff, me._inlen = '', 0, 0

  def _eof(me):
    """Notice that the resolver has finished."""
    me._outline.disable()
    me._done = True
    me._kid.wait()

  def run(me):
    """
    Tell the resolver it has all of our input now, and wait for it to finish.
    """
    me._in.close()
    while not me._done: M.select()
    if me._idmap:
      raise Exception('adnshost failed to process all the requests')

  def _hostline(me, line):
    """Handle a host line from the resolver."""

    ## Parse the line into fields.
    (id, nrrs, stty, stocde, stmsg, owner, cname, ststr), _ = \
        M.split(line, quotep = True)
    id, nrrs = int(id), int(nrrs)

    ## Find the right record.
    host = me._idmap[id]
    if stty != 'ok': host.failed(ststr)

    ## Stash away the canonical name of the host.
    host.name = cname == '$' and owner or cname

    ## If there are no record lines to come, then remove this record from the
    ## list of outstanding jobs.  Otherwise, switch to the handler for record
    ## lines.
    if not nrrs:
      del me._idmap[id]
    else:
      me._outline.lineproc = me._rrline
      me._nrrs = nrrs
      me._outhost = host

  def _rrline(me, line):
    """Handle a record line from the resolver."""

    ## Parse the line into fields.
    ww, _ = M.split(line, quotep = True)
    owner, type, af = ww[:3]

    ## If this is an address record, and it looks like an interesting address
    ## type, then stash the address.
    if type == 'A' and (af == 'INET' or af == 'INET6'):
      me._outhost.addaddr(af, ww[3])

    ## Update the parser state.  If there are no more records for this job
    ## then mark the job as done and switch back to expecting a host line.
    me._nrrs -= 1
    if not me._nrrs:
      me._outline.lineproc = me._hostline
      del me._idmap[me._outhost.id]
      me._outhost = None

## Select a bulk resolver.  If `adnshost' exists then we might as well use
## it.
BulkResolver = BresBulkResolver
try:
  p = SUB.Popen(['adnshost', '--version'],
                stdin = SUB.PIPE, stdout = SUB.PIPE, stderr = SUB.PIPE)
  _out, _err = p.communicate()
  st = p.wait()
  if st == 0: BulkResolver = AdnsBulkResolver
except OSError:
  pass

###--------------------------------------------------------------------------
### The configuration parser.

## Match a comment or empty line.
RX_COMMENT = RX.compile(r'(?x) ^ \s* (?: $ | [;#])')

## Match a section group header.
RX_GRPHDR = RX.compile(r'(?x) ^ \s* \[ (.*) \] \s* $')

## Match an assignment line.
RX_ASSGN = RX.compile(r'''(?x) ^
        ([^\s:=] (?: [^:=]* [^\s:=])?)
        \s* [:=] \s*
        (| \S | \S.*\S)
        \s* $''')

## Match a continuation line.
RX_CONT = RX.compile(r'''(?x) ^ \s+
        (| \S | \S.*\S)
        \s* $''')

## Match a $(VAR) configuration variable reference; group 1 is the VAR.
RX_REF = RX.compile(r'(?x) \$ \( ([^)]+) \)')

## Match a $FLAGS[HOST] name resolution reference; group 1 are the flags;
## group 2 is the HOST.
RX_RESOLVE = RX.compile(r'(?x) \$ ([46*]*) \[ ([^]]+) \]')

class ConfigSyntaxError (ExpectedError):
  def __init__(me, fname, lno, msg):
    me.fname = fname
    me.lno = lno
    me.msg = msg
  def __str__(me):
    return '%s:%d: %s' % (me.fname, me.lno, me.msg)

def _fmt_path(path):
  return ' -> '.join(["`%s'" % hop for hop in path])

class AmbiguousOptionError (ExpectedError):
  def __init__(me, key, patha, vala, pathb, valb):
    me.key = key
    me.patha, me.vala = patha, vala
    me.pathb, me.valb = pathb, valb
  def __str__(me):
    return "Ambiguous answer resolving key `%s': " \
        "path %s yields `%s' but %s yields `%s'" % \
        (me.key, _fmt_path(me.patha), me.vala, _fmt_path(me.pathb), me.valb)

class InheritanceCycleError (ExpectedError):
  def __init__(me, key, path):
    me.key = key
    me.path = path
  def __str__(me):
    return "Found a cycle %s looking up key `%s'" % \
        (_fmt_path(me.path), me.key)

class MissingSectionException (ExpectedError):
  def __init__(me, sec):
    me.sec = sec
  def __str__(me):
    return "Section `%s' not found" % (me.sec)

class MissingKeyException (ExpectedError):
  def __init__(me, sec, key):
    me.sec = sec
    me.key = key
  def __str__(me):
    return "Key `%s' not found in section `%s'" % (me.key, me.sec)

class ConfigSection (object):
  """
  A section in a configuration parser.

  This is where a lot of the nitty-gritty stuff actually happens.  The
  `MyConfigParser' knows a lot about the internals of this class, which saves
  on building a complicated interface.
  """

  def __init__(me, name, cp):
    """Initialize a new, empty section with a given NAME and parent CP."""

    ## The cache maps item keys to entries, which consist of a pair of
    ## objects.  There are four possible states for a cache entry:
    ##
    ##   * missing -- there is no entry at all with this key, so we must
    ##     search for it;
    ##
    ##   * None, None -- we are actively trying to resolve this key, so if we
    ##     encounter this state, we have found a cycle in the inheritance
    ##     graph;
    ##
    ##   * None, [] -- we know that this key isn't reachable through any of
    ##     our parents;
    ##
    ##   * VALUE, PATH -- we know that the key resolves to VALUE, along the
    ##     PATH from us (exclusive) to the defining parent (inclusive).
    me.name = name
    me._itemmap = dict()
    me._cache = dict()
    me._cp = cp

  def _expand(me, string, resolvep):
    """
    Expands $(...) and (optionally) $FLAGS[...] placeholders in STRING.

    RESOLVEP is a boolean switch: do we bother to tax the resolver or not?
    This is turned off by MyConfigParser's resolve() method while it's
    collecting hostnames to be resolved.
    """
    string = RX_REF.sub(lambda m: me.get(m.group(1), resolvep), string)
    if resolvep:
      string = RX_RESOLVE.sub(
        lambda m: ' '.join(me._cp._resolver.lookup(m.group(2), m.group(1))),
        string)
    return string

  def _parents(me):
    """Yield this section's parents."""
    try: names = me._itemmap['@inherit']
    except KeyError: return
    for name in names.replace(',', ' ').split():
      yield me._cp.section(name)

  def _get(me, key, path = None):
    """
    Low-level option-fetching method.

    Fetch the value for the named KEY in this section, or maybe (recursively)
    a section which it inherits from.

    Returns a pair VALUE, PATH.  The value is not expanded; nor do we check
    for the special `name' key.  The caller is expected to do these things.
    Returns None if no value could be found.
    """

    ## If we weren't given a path, then we'd better make one.
    if path is None: path = []

    ## Extend the path to cover us, but remember to remove us again when
    ## we've finished.  If we need to pass the current path back upwards,
    ## then remember to take a copy.
    path.append(me.name)
    try:

      ## If we've been this way before on another pass through then return
      ## the value we found then.  If we're still thinking about it then
      ## we've found a cycle.
      try: v, p = me._cache[key]
      except KeyError: pass
      else:
        if p is None: raise InheritanceCycleError(key, path[:])
        else: return v, path + p

      ## See whether the answer is ready waiting for us.
      try: v = me._itemmap[key]
      except KeyError: pass
      else:
        p = path[:]
        me._cache[key] = v, []
        return v, p

      ## Initially we have no idea.
      value = None
      winner = []

      ## Go through our parents and ask them what they think.
      me._cache[key] = None, None
      for p in me._parents():

        ## See whether we get an answer.  If not, keep on going.
        v, pp = p._get(key, path)
        if v is None: continue

        ## If we got an answer, check that it matches any previous ones.
        if value is None:
          value = v
          winner = pp
        elif value != v:
          raise AmbiguousOptionError(key, winner, value, pp, v)

      ## That's the best we could manage.
      me._cache[key] = value, winner[len(path):]
      return value, winner

    finally:
      ## Remove us from the path again.
      path.pop()

  def get(me, key, resolvep = True):
    """
    Retrieve the value of KEY from this section.
    """

    ## Special handling for the `name' key.
    if key == 'name':
      value = me._itemmap.get('name', me.name)
    elif key == '@inherits':
      try: return me._itemmap['@inherits']
      except KeyError: raise MissingKeyException(me.name, key)
    else:
      value, _ = me._get(key)
      if value is None:
        raise MissingKeyException(me.name, key)

    ## Expand the value and return it.
    return me._expand(value, resolvep)

  def items(me, resolvep = True):
    """
    Yield a list of item names in the section.
    """

    ## Initialize for a depth-first walk of the inheritance graph.
    seen = { 'name': True }
    visiting = { me.name: True }
    stack = [me]

    ## Visit nodes, collecting their keys.  Don't believe the values:
    ## resolving inheritance is too hard to do like this.
    while stack:
      sec = stack.pop()
      for p in sec._parents():
        if p.name not in visiting:
          stack.append(p); visiting[p.name] = True

      for key in sec._itemmap.iterkeys(): seen[key] = None

    ## And we're done.
    return seen.iterkeys()

class MyConfigParser (object):
  """
  A more advanced configuration parser.

  This has four major enhancements over the standard ConfigParser which are
  relevant to us.

    * It recognizes `@inherits' keys and follows them when expanding a
      value.

    * It recognizes `$(VAR)' references to configuration variables during
      expansion and processes them correctly.

    * It recognizes `$FLAGS[HOST]' name-resolver requests and handles them
      correctly.  FLAGS consists of characters `4' (IPv4 addresses), `6'
      (IPv6 addresses), and `*' (all, space-separated, rather than just the
      first).

    * Its parsing behaviour is well-defined.

  Use:

    1. Call parse(FILENAME) to slurp in the configuration data.

    2. Call resolve() to collect the hostnames which need to be resolved and
       actually do the name resolution.

    3. Call sections() to get a list of the configuration sections, or
       section(NAME) to find a named section.

    4. Call get(ITEM) on a section to collect the results, or items() to
       iterate over them.
  """

  def __init__(me):
    """
    Initialize a new, empty configuration parser.
    """
    me._sectmap = dict()
    me._resolver = BulkResolver()

  def parse(me, f):
    """
    Parse configuration from a file F.
    """

    ## Initial parser state.
    sect = None
    key = None
    val = None
    lno = 0

    ## An unpleasant hack.  Python makes it hard to capture a value in a
    ## variable and examine it in a single action, and this is the best that
    ## I came up with.
    m = [None]
    def match(rx): m[0] = rx.match(line); return m[0]

    ## Commit a key's value when we've determined that there are no further
    ## continuation lines.
    def flush():
      if key is not None: sect._itemmap[key] = val.getvalue()

    ## Work through all of the input lines.
    for line in f:
      lno += 1

      if match(RX_COMMENT):
        ## A comment or a blank line.  Nothing doing.  (This means that we
        ## leave out blank lines which look like they might be continuation
        ## lines.)

        pass

      elif match(RX_GRPHDR):
        ## A section header.  Flush out any previous value and set up the new
        ## group.

        flush()
        name = m[0].group(1)
        try: sect = me._sectmap[name]
        except KeyError: sect = me._sectmap[name] = ConfigSection(name, me)
        key = None

      elif match(RX_ASSGN):
        ## A new assignment.  Flush out the old one, and set up to store this
        ## one.

        if sect is None:
          raise ConfigSyntaxError(f.name, lno, 'no active section to update')
        flush()
        key = m[0].group(1)
        val = StringIO(); val.write(m[0].group(2))

      elif match(RX_CONT):
        ## A continuation line.  Accumulate the value.

        if key is None:
          raise ConfigSyntaxError(f.name, lno, 'no config value to continue')
        val.write('\n'); val.write(m[0].group(1))

      else:
        ## Something else.

        raise ConfigSyntaxError(f.name, lno, 'incomprehensible line')

    ## Don't forget to commit any final value material.
    flush()

  def section(me, name):
    """Return a ConfigSection with the given NAME."""
    try: return me._sectmap[name]
    except KeyError: raise MissingSectionException(name)

  def sections(me):
    """Yield the known sections."""
    return me._sectmap.itervalues()

  def resolve(me):
    """
    Works out all of the hostnames which need resolving and resolves them.

    Until you call this, attempts to fetch configuration items which need to
    resolve hostnames will fail!
    """
    for sec in me.sections():
      for key in sec.items():
        value = sec.get(key, resolvep = False)
        for match in RX_RESOLVE.finditer(value):
          me._resolver.prepare(match.group(2))
    me._resolver.run()

###--------------------------------------------------------------------------
### Command-line handling.

def inputiter(things):
  """
  Iterate over command-line arguments, returning corresponding open files.

  If none were given, or one is `-', assume standard input; if one is a
  directory, scan it for files other than backups; otherwise return the
  opened files.
  """

  if not things:
    if OS.isatty(stdin.fileno()):
      M.die('no input given, and stdin is a terminal')
    yield stdin
  else:
    for thing in things:
      if thing == '-':
        yield stdin
      elif OS.path.isdir(thing):
        for item in OS.listdir(thing):
          if item.endswith('~') or item.endswith('#'):
            continue
          name = OS.path.join(thing, item)
          if not OS.path.isfile(name):
            continue
          yield file(name)
      else:
        yield file(thing)

def parse_options(argv = argv):
  """
  Parse command-line options, returning a pair (OPTS, ARGS).
  """
  M.ego(argv[0])
  op = OptionParser(usage = '%prog [-c CDB] INPUT...',
                    version = '%%prog (tripe, version %s)' % VERSION)
  op.add_option('-c', '--cdb', metavar = 'CDB',
                dest = 'cdbfile', default = None,
                help = 'Compile output into a CDB file.')
  opts, args = op.parse_args(argv)
  return opts, args

###--------------------------------------------------------------------------
### Main code.

def getconf(args):
  """
  Read the configuration files and return the accumulated result.

  We make sure that all hostnames have been properly resolved.
  """
  conf = MyConfigParser()
  for f in inputiter(args):
    conf.parse(f)
  conf.resolve()
  return conf

def output(conf, cdb):
  """
  Output the configuration information CONF to the database CDB.

  This is where the special `user' and `auto' database entries get set.
  """
  auto = []
  for sec in sorted(conf.sections(), key = lambda sec: sec.name):
    if sec.name.startswith('@'):
      continue
    elif sec.name.startswith('$'):
      label = sec.name
    else:
      label = 'P%s' % sec.name
      try: a = sec.get('auto')
      except MissingKeyException: pass
      else:
        if a in ('y', 'yes', 't', 'true', '1', 'on'): auto.append(sec.name)
      try: u = sec.get('user')
      except MissingKeyException: pass
      else: cdb.add('U%s' % u, sec.name)
    url = M.URLEncode(semip = True)
    for key in sorted(sec.items()):
      if not key.startswith('@'):
        url.encode(key, sec.get(key))
    cdb.add(label, url.result)
  cdb.add('%AUTO', ' '.join(auto))
  cdb.finish()

def main():
  """Main program."""
  opts, args = parse_options()
  if opts.cdbfile:
    cdb = CDB.cdbmake(opts.cdbfile, opts.cdbfile + '.new')
  else:
    cdb = CDBFake()
  try:
    conf = getconf(args[1:])
    output(conf, cdb)
  except ExpectedError, e:
    M.moan(str(e))
    exit(2)

if __name__ == '__main__':
  main()

###----- That's all, folks --------------------------------------------------