defformat('bmp', BMPFormat)
+###--------------------------------------------------------------------------
+### Remaining parsing machinery.
+
+Type = K('type') - Name - D('{') - R(Policy) - D('}')
+def build_type(s, l, t):
+ try:
+ cat = CATEGORYMAP[t[0]]
+ except KeyError:
+ raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
+ pols = t[1]
+ if len(pols) == 1: pol = pols[0]
+ else: pol = AndPolicy(pols)
+ pol.setcategory(cat)
+ return pol
+Type.setParseAction(build_type)
+
+TARGETS = []
+class TargetJob (object):
+ def __init__(me, targetdir, policies):
+ me.targetdir = targetdir
+ me.policies = policies
+ def perform(me):
+ TARGETS.append(me)
+
+Target = K('target') - String - D('{') - R(Type) - D('}')
+def build_target(s, l, t):
+ return TargetJob(t[0], t[1])
+Target.setParseAction(build_target)
+
+VARS = { 'master': None }
+class VarsJob (object):
+ def __init__(me, vars):
+ me.vars = vars
+ def perform(me):
+ for k, v in me.vars:
+ VARS[k] = v
+
+Var = prop('master', String)
+Vars = K('vars') - D('{') - R(Var) - D('}')
+def build_vars(s, l, t):
+ return VarsJob(t[0])
+Vars.setParseAction(build_vars)
+
+TopLevel = Vars | Target
+Config = R(TopLevel)
+Config.ignore(P.pythonStyleComment)
+
###--------------------------------------------------------------------------
### The directory grobbler.
-class Grobbler (object):
+def grobble(master, targets, noact = False):
"""
- The directory grobbler copies a directory tree, converting files.
+ Work through the MASTER directory, writing converted files to TARGETS.
+
+ The TARGETS are a list of `TargetJob' objects, each describing a target
+ directory and a policy to apply to it.
+
+ If NOACT is true, then don't actually do anything permanent to the
+ filesystem.
"""
- def __init__(me, policies, noact = False):
- """
- Create a new Grobbler, working with the given POLICIES.
- """
- me._pmap = {}
- me._noact = noact
- for p in policies:
- me._pmap.setdefault(p.cat, []).append(p)
- me._dirs = []
+ ## Transform the targets into a more convenient data structure.
+ tpolmap = []
+ for t in targets:
+ pmap = {}
+ tpolmap.append(pmap)
+ for p in t.policies: pmap.setdefault(p.cat, []).append(p)
- def _grobble_file(me, master, targetdir, cohorts):
- """
- Convert MASTER, writing the result to TARGETDIR.
+ ## Keep track of the current position in the master tree.
+ dirs = []
- The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is a
- list of (FILENAME, ID) pairs.
+ ## And the files which haven't worked.
+ broken = []
- Since this function might convert the MASTER file, the caller doesn't
- know the name of the output files, so we return then as a list.
- """
+ def grobble_file(master, pmap, targetdir, cohorts):
+ ## Convert MASTER, writing the result to TARGETDIR.
+ ##
+ ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
+ ## a list of (FILENAME, ID) pairs.
+ ##
+ ## Since this function might convert the MASTER file, the caller doesn't
+ ## know the name of the output files, so we return then as a list.
done = set()
st_m = OS.stat(master)
## Go through the category's policies and see if any match. If we fail
## here, see if there are more categories to try.
- for pol in me._pmap[cat]:
+ for pol in pmap[cat]:
acts = pol.actions(master, targetdir, id, cohort)
if acts: break
else:
## Remove the target. (A hardlink will fail if the target already
## exists.)
- if not me._noact:
+ if not noact:
try:
OS.unlink(a.target)
except OSError, err:
raise
## Do whatever it is we decided to do.
- if me._noact:
+ if noact:
STATUS.commit(filestatus(master, a))
else:
a.perform()
return list(done)
@contextmanager
- def _wrap(me, masterfile):
- """
- Handle exceptions found while trying to convert a particular file or
- directory.
- """
+ def wrap(masterfile):
+ ## Handle exceptions found while trying to convert a particular file or
+ ## directory.
try:
yield masterfile
except (IOError, OSError), exc:
STATUS.clear()
STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
- me._broken.append((masterfile, exc))
-
- def _grobble_dir(me, master, target):
- """
- Recursively convert files in MASTER, writing them to TARGET.
- """
-
- ## Make sure the TARGET exists and is a directory. It's a fundamental
- ## assumption of this program that the entire TARGET tree is disposable,
- ## so if something exists but isn't a directory, we should kill it.
- if OS.path.isdir(target):
- pass
- else:
- if OS.path.exists(target):
- STATUS.commit(filestatus(target, 'clear nondirectory'))
- if not me._noact:
- OS.unlink(target)
- STATUS.commit(filestatus(target, 'create directory'))
- if not me._noact:
- OS.mkdir(target)
-
- ## Keep a list of things in the target. As we convert files, we'll check
- ## them off. Anything left over is rubbish and needs to be deleted.
- checklist = {}
- try:
- for i in OS.listdir(target):
- checklist[i] = False
- except OSError, err:
- if err.errno not in (E.ENOENT, E.ENOTDIR):
- raise
-
- ## Keep track of the files in each category.
- catmap = {}
- todo = []
- done = []
-
- ## Work through the master files.
- for f in sorted(OS.listdir(master)):
-
- ## If the killswitch has been pulled then stop. The whole idea is that
- ## we want to cause a clean shutdown if possible, so we don't want to
- ## do it in the middle of encoding because the encoding effort will
- ## have been wasted. This is the only place we need to check. If
- ## we've exited the loop, then clearing old files will probably be
- ## fast, and we'll either end up here when the recursive call returns
- ## or we'll be in the same boat as before, clearing old files, only up
- ## a level. If worst comes to worst, we'll be killed forcibly
- ## somewhere inside `SH.rmtree', and that can continue where it left
- ## off.
- if KILLSWITCH.is_set():
- return
-
- ## Do something with the file.
- with me._wrap(OS.path.join(master, f)) as masterfile:
-
- ## If it's a directory then grobble it recursively. Keep the user
- ## amused by telling him where we are in the tree.
- if OS.path.isdir(masterfile):
- me._dirs.append(f)
- STATUS.set('/'.join(me._dirs))
- try:
- done += me._grobble_dir(masterfile, OS.path.join(target, f))
- finally:
- me._dirs.pop()
- STATUS.set('/'.join(me._dirs))
-
- ## Otherwise it's a file. Work out what kind, and stash it under
- ## the appropriate categories. Later, we'll apply policy to the
- ## files, by category, and work out what to do with them all.
- else:
- gf = GIO.File(masterfile)
- mime = gf.query_info('standard::content-type').get_content_type()
- cats = []
- for cat in me._pmap.iterkeys():
- id = cat.identify(masterfile, mime)
- if id is None: continue
- catmap.setdefault(cat, []).append((masterfile, id))
- cats.append((cat, id))
- if not cats:
- catmap.setdefault(None, []).append((masterfile, id))
- todo.append((masterfile, cats))
-
- ## Work through the categorized files to see what actions to do for
- ## them.
- for masterfile, cats in todo:
- with me._wrap(masterfile):
- done += me._grobble_file(masterfile, target,
- [(cat, id, catmap[cat])
- for cat, id in cats])
-
- ## Check the results off the list so that we don't clear it later.
- for f in done:
- checklist[OS.path.basename(f)] = True
-
- ## Maybe there's stuff in the target which isn't accounted for. Delete
- ## it: either the master has changed, or the policy for this target has
- ## changed. Either way, the old files aren't wanted.
- for f in checklist:
- if not checklist[f]:
- STATUS.commit(filestatus(f, 'clear bogus file'))
- if not me._noact:
- bogus = OS.path.join(target, f)
- try:
- if OS.path.isdir(bogus):
- SH.rmtree(bogus)
- else:
- OS.unlink(bogus)
- except OSError, err:
- if err.errno != E.ENOENT:
- raise
-
- ## Return the target name, so that it can be checked off.
- return [target]
-
- def grobble(me, master, target):
- """
- Convert MASTER, writing a directory tree TARGET.
+ broken.append((masterfile, exc))
- Returns a list of files which couldn't be converted.
- """
- try:
- me._broken = []
- me._grobble_dir(master, target)
- return me._broken
- finally:
- del me._broken
+ def grobble_dir(master, targets):
+ ## Recursively convert files in MASTER, writing them to the TARGETS.
-###--------------------------------------------------------------------------
-### Remaining parsing machinery.
+ ## Keep track of the subdirectories we encounter, because we'll need to
+ ## do all of those in one go at the end.
+ subdirs = set()
-Type = K('type') - Name - D('{') - R(Policy) - D('}')
-def build_type(s, l, t):
- try:
- cat = CATEGORYMAP[t[0]]
- except KeyError:
- raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
- pols = t[1]
- if len(pols) == 1: pol = pols[0]
- else: pol = AndPolicy(pols)
- pol.setcategory(cat)
- return pol
-Type.setParseAction(build_type)
+ ## Work through each target directory in turn.
+ for target, pmap in zip(targets, tpolmap):
-TARGETS = []
-class TargetJob (object):
- def __init__(me, targetdir, policies):
- me.targetdir = targetdir
- me.policies = policies
- def perform(me):
- TARGETS.append(me)
-
-Target = K('target') - String - D('{') - R(Type) - D('}')
-def build_target(s, l, t):
- return TargetJob(t[0], t[1])
-Target.setParseAction(build_target)
-
-VARS = { 'master': None }
-class VarsJob (object):
- def __init__(me, vars):
- me.vars = vars
- def perform(me):
- for k, v in me.vars:
- VARS[k] = v
-
-Var = prop('master', String)
-Vars = K('vars') - D('{') - R(Var) - D('}')
-def build_vars(s, l, t):
- return VarsJob(t[0])
-Vars.setParseAction(build_vars)
+ ## Make sure the TARGET exists and is a directory. It's a fundamental
+ ## assumption of this program that the entire TARGET tree is
+ ## disposable, so if something exists but isn't a directory, we should
+ ## kill it.
+ if OS.path.isdir(target):
+ pass
+ else:
+ if OS.path.exists(target):
+ STATUS.commit(filestatus(target, 'clear nondirectory'))
+ if not noact:
+ OS.unlink(target)
+ STATUS.commit(filestatus(target, 'create directory'))
+ if not noact:
+ OS.mkdir(target)
+
+ ## Keep a list of things in the target. As we convert files, we'll
+ ## check them off. Anything left over is rubbish and needs to be
+ ## deleted.
+ checklist = {}
+ try:
+ for i in OS.listdir(target):
+ checklist[i] = False
+ except OSError, err:
+ if err.errno not in (E.ENOENT, E.ENOTDIR):
+ raise
+
+ ## Keep track of the files in each category.
+ catmap = {}
+ todo = []
+ done = []
+
+ ## Work through the master files.
+ for f in sorted(OS.listdir(master)):
+
+ ## If the killswitch has been pulled then stop. The whole idea is
+ ## that we want to cause a clean shutdown if possible, so we don't
+ ## want to do it in the middle of encoding because the encoding
+ ## effort will have been wasted. This is the only place we need to
+ ## check. If we've exited the loop, then clearing old files will
+ ## probably be fast, and we'll either end up here when the recursive
+ ## call returns or we'll be in the same boat as before, clearing old
+ ## files, only up a level. If worst comes to worst, we'll be killed
+ ## forcibly somewhere inside `SH.rmtree', and that can continue where
+ ## it left off.
+ if KILLSWITCH.is_set():
+ return
+
+ ## Do something with the file.
+ with wrap(OS.path.join(master, f)) as masterfile:
+
+ ## If it's a directory then prepare to grobble it recursively, but
+ ## don't do that yet.
+ if OS.path.isdir(masterfile):
+ subdirs.add(f)
+ done.append(OS.path.join(target, f))
+
+ ## Otherwise it's a file. Work out what kind, and stash it under
+ ## the appropriate categories. Later, we'll apply policy to the
+ ## files, by category, and work out what to do with them all.
+ else:
+ gf = GIO.File(masterfile)
+ mime = gf.query_info('standard::content-type').get_content_type()
+ cats = []
+ for cat in pmap.iterkeys():
+ id = cat.identify(masterfile, mime)
+ if id is None: continue
+ catmap.setdefault(cat, []).append((masterfile, id))
+ cats.append((cat, id))
+ if not cats:
+ catmap.setdefault(None, []).append((masterfile, id))
+ todo.append((masterfile, cats))
+
+ ## Work through the categorized files to see what actions to do for
+ ## them.
+ for masterfile, cats in todo:
+ with wrap(masterfile):
+ done += grobble_file(masterfile, pmap, target,
+ [(cat, id, catmap[cat]) for cat, id in cats])
+
+ ## Check the results off the list so that we don't clear it later.
+ for f in done:
+ checklist[OS.path.basename(f)] = True
+
+ ## Maybe there's stuff in the target which isn't accounted for. Delete
+ ## it: either the master has changed, or the policy for this target has
+ ## changed. Either way, the old files aren't wanted.
+ for f in checklist:
+ if not checklist[f]:
+ STATUS.commit(filestatus(f, 'clear bogus file'))
+ if not noact:
+ bogus = OS.path.join(target, f)
+ try:
+ if OS.path.isdir(bogus):
+ SH.rmtree(bogus)
+ else:
+ OS.unlink(bogus)
+ except OSError, err:
+ if err.errno != E.ENOENT:
+ raise
+
+ ## If there are subdirectories which want processing then do those.
+ ## Keep the user amused by telling him where we are in the tree.
+ for d in sorted(subdirs):
+ dirs.append(d)
+ STATUS.set('/'.join(dirs))
+ with wrap(OS.path.join(master, d)) as masterdir:
+ try:
+ grobble_dir(masterdir,
+ [OS.path.join(target, d) for target in targets])
+ finally:
+ dirs.pop()
+ STATUS.set('/'.join(dirs))
-TopLevel = Vars | Target
-Config = R(TopLevel)
-Config.ignore(P.pythonStyleComment)
+ ## Right. We're ready to go.
+ grobble_dir(master, [t.targetdir for t in targets])
+ return broken
###--------------------------------------------------------------------------
### Command-line interface.
opts = parse_opts(SYS.argv[1:])
if 'master' not in VARS:
die("no master directory set")
- broken = []
- for t in TARGETS:
- g = Grobbler(t.policies, opts.noact)
- b = g.grobble(VARS['master'], t.targetdir)
- broken += b
+ broken = grobble(VARS['master'], TARGETS, opts.noact)
if broken:
moan('failed to convert some files:')
for file, exc in broken: