chiark - git - mdw - autoys/blob - gremlin/gremlin.in

   1 #! @PYTHON@
   2 ###
   3 ### Convert a directory tree of audio files
   4 ###
   5 ### (c) 2010 Mark Wooding
   6 ###
   7
   8 ###----- Licensing notice ---------------------------------------------------
   9 ###
  10 ### This program is free software; you can redistribute it and/or modify
  11 ### it under the terms of the GNU General Public License as published by
  12 ### the Free Software Foundation; either version 2 of the License, or
  13 ### (at your option) any later version.
  14 ###
  15 ### This program is distributed in the hope that it will be useful,
  16 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 ### GNU General Public License for more details.
  19 ###
  20 ### You should have received a copy of the GNU General Public License
  21 ### along with this program; if not, write to the Free Software Foundation,
  22 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  23
  24 ###--------------------------------------------------------------------------
  25 ### External dependencies.
  26
  27 ## Language features.
  28 from __future__ import with_statement
  29
  30 ## Standard Python libraries.
  31 import sys as SYS
  32 import os as OS
  33 import errno as E
  34 import time as T
  35 import unicodedata as UD
  36 import fnmatch as FN
  37 import re as RX
  38 import shutil as SH
  39 import optparse as OP
  40 import threading as TH
  41 import shlex as L
  42 from math import sqrt
  43 from contextlib import contextmanager
  44
  45 ## eyeD3 tag fettling.
  46 import eyeD3 as E3
  47
  48 ## Gstreamer.  It picks up command-line arguments -- most notably `--help' --
  49 ## and processes them itself.  Of course, its help is completely wrong.  This
  50 ## kludge is due to Jonas Wagner.
  51 _argv, SYS.argv = SYS.argv, []
  52 import gobject as G
  53 import gio as GIO
  54 import gst as GS
  55 SYS.argv = _argv
  56
  57 ## Python Imaging.
  58 from PIL import Image as I
  59
  60 ## Python parsing.
  61 import pyparsing as P
  62
  63 ###--------------------------------------------------------------------------
  64 ### Special initialization.
  65
  66 VERSION = '@VERSION@'
  67
  68 ## GLib.
  69 G.threads_init()
  70
  71 ###--------------------------------------------------------------------------
  72 ### Eyecandy progress reports.
  73
  74 def charwidth(s):
  75   """
  76   Return the width of S, in characters.
  77
  78   Specifically, this is the number of backspace characters required to
  79   overprint the string S.  If the current encoding for `stdout' appears to be
  80   Unicode then do a complicated Unicode thing; otherwise assume that
  81   characters take up one cell each.
  82
  83   None of this handles tab characters in any kind of useful way.  Sorry.
  84   """
  85
  86   ## If there's no encoding for stdout then we're doing something stupid.
  87   if SYS.stdout.encoding is None: return len(s)
  88
  89   ## Turn the string into Unicode so we can hack on it properly.  Maybe that
  90   ## won't work out, in which case fall back to being stupid.
  91   try: u = s.decode(SYS.stdout.encoding)
  92   except UnicodeError: return len(s)
  93
  94   ## Our main problem is combining characters, but we should also try to
  95   ## handle wide (mostly Asian) characters, and zero-width ones.  This hack
  96   ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
  97   w = 0
  98   for ch in u:
  99     cd = ord(ch)
 100     if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
 101           0x1160 <= cd <= 0x11ff: pass
 102     elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
 103     else: w += 1
 104
 105   ## Done.
 106   return w
 107
 108 class StatusLine (object):
 109   """
 110   Maintains a status line containing ephemeral progress information.
 111
 112   The status line isn't especially important, but it keeps interactive users
 113   amused.
 114
 115   There should be only one status line object in your program; otherwise
 116   they'll interfere with each other and get confused.
 117
 118   The update algorithm (in `set') is fairly careful to do the right thing
 119   with long status `lines', and to work properly in an Emacs `shell' buffer.
 120   """
 121
 122   def __init__(me):
 123     "Initialize the status line."
 124     me._last = ''
 125     me._lastlen = 0
 126     me.eyecandyp = OS.isatty(SYS.stdout.fileno())
 127
 128   def set(me, line):
 129     """
 130     Set the status line contents to LINE, replacing what was there before.
 131
 132     This only produces actual output if stdout is interactive.
 133     """
 134     n = len(line)
 135
 136     ## Eyecandy update.
 137     if me.eyecandyp:
 138
 139       ## If the old line was longer, we need to clobber its tail, so work out
 140       ## what that involves.
 141       if n < me._lastlen:
 142         b = charwidth(me._last[n:])
 143         pre = '\b'*b + ' '*b
 144       else:
 145         pre = ''
 146
 147       ## Now figure out the length of the common prefix between what we had
 148       ## before and what we have now.  This reduces the amount of I/O done,
 149       ## which keeps network traffic down on SSH links, and keeps down the
 150       ## amount of work slow terminal emulators like Emacs have to do.
 151       i = 0
 152       m = min(n, me._lastlen)
 153       while i < m and line[i] == me._last[i]:
 154         i += 1
 155
 156       ## Actually do the output, all in one syscall.
 157       b = charwidth(me._last[i:])
 158       SYS.stdout.write(pre + '\b'*b + line[i:])
 159       SYS.stdout.flush()
 160
 161     ## Update our idea of what's gone on.
 162     me._lastlen = n
 163     me._last = line
 164
 165   def clear(me):
 166     "Clear the status line.  Just like set('')."
 167     me.set('')
 168
 169   def commit(me, line = None):
 170     """
 171     Commit the current status line, and maybe the string LINE.
 172
 173     If the current status line is nonempty, then commit it to the transcript.
 174     If LINE is not None, then commit that to the transcript too.
 175
 176     After all of this, we clear the status line to get back to a clean state.
 177     """
 178     if me._last:
 179       if me.eyecandyp:
 180         SYS.stdout.write('\n')
 181       else:
 182         SYS.stdout.write(me._last + '\n')
 183     if line is not None:
 184       SYS.stdout.write(line + '\n')
 185     me._lastlen = 0
 186     me._last = ''
 187
 188 STATUS = StatusLine()
 189
 190 def filestatus(file, status):
 191   return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
 192
 193 class ProgressEyecandy (object):
 194   """
 195   Provide amusement while something big and complicated is happening.
 196
 197   This is an abstract class.  Subclasses must provide a method `progress'
 198   returning a pair (CURRENT, MAX) indicating the current progress through the
 199   operation.
 200   """
 201
 202   def __init__(me, what, silentp = False):
 203     """
 204     Initialize a progress meter.
 205
 206     WHAT is a prefix string to be written before the progress eyecandy
 207     itself.
 208     """
 209     me._what = what
 210     me._silentp = silentp
 211     me._spinner = 0
 212     me._start = T.time()
 213
 214   def _fmt_time(me, t):
 215     "Format T as a time, in (maybe hours) minutes and seconds."
 216     s, t = t % 60, int(t/60)
 217     m, h = t % 60, int(t/60)
 218     if h > 0:
 219       return '%d:%02d:%02d' % (h, m, s)
 220     else:
 221       return '%02d:%02d' % (m, s)
 222
 223   def show(me):
 224     "Show the current level of progress."
 225
 226     ## If we're not showing pointless frippery, don't bother at all.
 227     if not STATUS.eyecandyp:
 228       return
 229
 230     ## Update the spinner index.
 231     me._spinner = (me._spinner + 1)%4
 232
 233     ## Fetch the current progress information.  Note that we always fetch
 234     ## both the current and maximum levels, because both might change if an
 235     ## operation revises its idea of how much work needs doing.
 236     cur, max = me.progress()
 237
 238     ## If we couldn't get progress information, display something vaguely
 239     ## amusing anyway.
 240     if cur is None or max is None:
 241       STATUS.set('%s %c [unknown progress]' %
 242                  (me._what, r'/-\|'[me._spinner]))
 243       return
 244
 245     ## Work out -- well, guess -- the time remaining.
 246     if cur:
 247       t = T.time()
 248       eta = me._fmt_time((t - me._start)*(max - cur)/cur)
 249     else:
 250       eta = '???'
 251
 252     ## Set the status bar.
 253     n = 40*cur/max
 254     STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
 255                (me._what,
 256                 r'/-\|'[me._spinner],
 257                 '='*n, ' '*(40 - n),
 258                 100*cur/max,
 259                 eta))
 260
 261   def done(me, win = True):
 262     "Show a completion notice, or a failure if WIN is false."
 263     if not win:
 264       STATUS.set('%s FAILED!' % me._what)
 265     elif not me._silentp:
 266       STATUS.set('%s done (%s)' %
 267                  (me._what,
 268                   me._fmt_time(T.time() - me._start)))
 269     else:
 270       return
 271     STATUS.commit()
 272
 273 ###--------------------------------------------------------------------------
 274 ### Timeout handling.
 275
 276 KILLSWITCH = TH.Event()
 277
 278 def timeout(t0, t1):
 279   T.sleep(t0)
 280   KILLSWITCH.set()
 281   T.sleep(t1)
 282   moan('dying messily due to timeout')
 283   OS._exit(3)
 284
 285 ###--------------------------------------------------------------------------
 286 ### Parsing utilities.
 287
 288 ## Allow hyphens in identifiers.
 289 IDCHARS = P.alphanums + '-_'
 290 P.Keyword.setDefaultKeywordChars(IDCHARS)
 291
 292 ## Some common kinds of tokens.
 293 Name = P.Word(IDCHARS)
 294 Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
 295 String = P.QuotedString('"', '\\')
 296
 297 ## Handy abbreviations for constructed parser elements.
 298 def K(k): return P.Keyword(k).suppress()
 299 def D(d): return P.Literal(d).suppress()
 300 def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
 301 O = P.Optional
 302
 303 ###--------------------------------------------------------------------------
 304 ### Format identification and conversion.
 305
 306 class IdentificationFailure (Exception):
 307   pass
 308
 309 class FileCategory (object):
 310   """
 311   A FileCategory represents a class of files.
 312
 313   For example, it's sensible to consider audio, or image files as a
 314   category.  A file category knows how to recognize member files from
 315   MIME content types.
 316   """
 317
 318   def __init__(me, name, mime_pats, ident):
 319     """
 320     Construct a new category.
 321
 322     The PATS are a list of `fnmatch' patterns to be compared with a MIME
 323     type.  The IDENT is a function which produces an identification object
 324     given a file's name and first-guess MIME type.  The object is passed to a
 325     Format's `check' method to see whether a file needs re-encoding, and to
 326     `convert' to assist with the conversion.
 327
 328     An identification object must have an attribute `mime' which is a set of
 329     possible MIME types accumulated for the object.
 330     """
 331     me.name = name
 332     me._mime_pats = mime_pats
 333     me._ident = ident
 334     CATEGORYMAP[name] = me
 335
 336   def identify(me, file, mime):
 337     """
 338     Attempt to identify FILE, given its apparent MIME type.
 339
 340     If identification succeeds, return an identification object which can be
 341     used by associated file formats; otherwise return None.
 342     """
 343     for p in me._mime_pats:
 344       if not FN.fnmatchcase(mime, p):
 345         continue
 346       try:
 347         return me._ident(file, mime)
 348       except IdentificationFailure:
 349         pass
 350     return None
 351
 352 class BaseFormat (object):
 353   """
 354   A BaseFormat object represents a particular encoding and parameters.
 355
 356   The object can verify (the `check' method) whether a particular file
 357   matches its requirements, and if necessary (`encode') re-encode a file.
 358
 359   Subclasses should define the following methods.
 360
 361   check(ID)
 362           Answer whether the file identified by ID is acceptable according to
 363           the receiver's parameters.
 364
 365   convert(MASTER, ID, TARGET)
 366           Convert the file MASTER, which has been identified as ID, according
 367           to the receiver's parameters, writing the output to TARGET.
 368
 369   Subclasses should also provide these attributes.
 370
 371   CATEGORY
 372           A FileCategory object for the category of files that this format
 373           lives within.
 374
 375   EXT     A file extension to be applied to encoded output files.
 376
 377   NAME    A user-facing name for the format.
 378
 379   PROPS   A parser element to parse a property definition.  It should produce
 380           a pair NAME, VALUE to be stored in a dictionary.
 381
 382   Subclasses for different kinds of file may introduce more subclass
 383   protocol.
 384   """
 385
 386   def fixup(me, path):
 387     """Post-encoding fixups."""
 388     pass
 389
 390 FORMATMAP = {}
 391 CATEGORYMAP = {}
 392
 393 def defformat(name, cls):
 394   "Define a format NAME using class CLS."
 395   if not hasattr(cls, 'NAME'):
 396     raise ValueError, 'abstract class'
 397   if not hasattr(cls, 'CATEGORY'):
 398     raise ValueError, 'no category'
 399   FORMATMAP[name] = cls
 400
 401 class FormatParser (P.ParserElement):
 402   """
 403   Parse a format specifier:
 404
 405   format-spec ::= string [format-properties]
 406   format-properties ::= `{' format-property (`,' format-property)* `}'
 407
 408   The syntax of a format-property is determined by the PROPS attribute on the
 409   named format and its superclasses.
 410   """
 411
 412   ## We cache the parser elements we generate to avoid enormous consing.
 413   CACHE = {}
 414
 415   def parseImpl(me, s, loc, actp = True):
 416
 417     ## Firstly, determine the format name.
 418     loc, r = Name._parse(s, loc, actp)
 419     fmt = r[0]
 420
 421     ## Look up the format class.
 422     try: fcls = FORMATMAP[fmt]
 423     except KeyError:
 424       raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
 425
 426     ## Fetch the property-list parser from the cache, if possible; else
 427     ## construct it.
 428     try:
 429       pp = me.CACHE[fmt]
 430     except KeyError:
 431       seen = set()
 432       prop = None
 433       for c in fcls.mro():
 434         try: p = c.PROPS
 435         except AttributeError: continue
 436         if p in seen: continue
 437         if prop is None: prop = p
 438         else: prop |= p
 439         seen.add(p)
 440       if prop is None:
 441         pp = me.CACHE[fmt] = None
 442       else:
 443         props = P.delimitedList(prop)
 444         props.setParseAction(lambda s, l, t: dict(t.asList()))
 445         pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
 446
 447     ## Parse the properties.
 448     if pp is None:
 449       pd = {}
 450     else:
 451       loc, r = pp._parse(s, loc, actp)
 452       if r: pd = r[0]
 453       else: pd = {}
 454
 455     ## Construct the format object and return it.
 456     return loc, fcls(**pd)
 457
 458 Format = FormatParser()
 459
 460 def prop(kw, pval, tag = None):
 461   if tag is None: tag = kw
 462   if pval is None:
 463     p = K(kw)
 464     p.setParseAction(lambda s, l, t: (tag, True))
 465   else:
 466     p = K(kw) + D('=') + pval
 467     p.setParseAction(lambda s, l, t: (tag, t[0]))
 468   return p
 469
 470 ###--------------------------------------------------------------------------
 471 ### Policies and actions.
 472
 473 class Action (object):
 474   """
 475   An Action object represents a conversion action to be performed.
 476
 477   This class isn't intended to be instantiated directly.  It exists to define
 478   some protocol common to all Action objects.
 479
 480   Action objects have the following attributes.
 481
 482   master        The name of the master (source) file.
 483
 484   target        The name of the target (destination) file.
 485
 486   PRIORITY      The priority of the action, for deciding which of two actions
 487                 to perform.  Higher priorities are more likely to win.
 488
 489   Converting an Action to a string describes the action in a simple
 490   user-readable manner.  The `perform' method actually carries the action
 491   out.
 492   """
 493
 494   PRIORITY = 0
 495
 496   def __init__(me, master):
 497     "Stash the MASTER file name for later."
 498     me.master = master
 499
 500   def choose(me, him):
 501     "Choose either ME or HIM and return one."
 502     if him is None or me.PRIORITY > him.PRIORITY:
 503       return me
 504     else:
 505       return him
 506
 507 class CopyAction (Action):
 508   """
 509   An Action object for simply copying a file.
 510
 511   Actually we try to hardlink it first, falling back to a copy later.  This
 512   is both faster and more efficient with regard to disk space.
 513   """
 514
 515   ## Copying is good.  Linking is really good, but we can't tell the
 516   ## difference at this stage.
 517   PRIORITY = 10
 518
 519   def __init__(me, master, targetdir):
 520     "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
 521     Action.__init__(me, master)
 522     me.target = OS.path.join(targetdir, OS.path.basename(master))
 523
 524   def __str__(me):
 525     return 'copy/link'
 526
 527   def perform(me):
 528     "Actually perform a CopyAction."
 529     try:
 530       STATUS.set(filestatus(me.master, 'link'))
 531       OS.link(me.master, me.target)
 532     except OSError, err:
 533       if err.errno != E.EXDEV:
 534         raise
 535       STATUS.set(filestatus(me.master, 'copy'))
 536       new = me.target + '.new'
 537       SH.copyfile(me.master, new)
 538       OS.rename(new, me.target)
 539     STATUS.commit()
 540
 541 class ConvertAction (Action):
 542   """
 543   An Action object for converting a file to a given format.
 544
 545   Additional attributes:
 546
 547   id            The identification object for the master file.
 548
 549   format        The format to which we're meant to conver the master.
 550   """
 551
 552   def __init__(me, master, targetdir, id, format):
 553     "Initialize a ConvertAction."
 554     Action.__init__(me, master)
 555     stem, ext = OS.path.splitext(OS.path.basename(master))
 556     me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
 557     me.id = id
 558     me.format = format
 559
 560   def __str__(me):
 561     return 'convert to %s' % me.format.NAME
 562
 563   def perform(me):
 564     "Acually perform a ConvertAction."
 565     STATUS.set(filestatus(me.master, me))
 566     me.format.convert(me.master, me.id, me.target)
 567
 568 Policy = P.Forward()
 569
 570 class FormatPolicy (object):
 571   """
 572   A FormatPolicy object represents a set of rules for how to convert files.
 573
 574   Given a master file, the FormatPolicy will identify it and return a list of
 575   actions to be performed.  The methods required of a FormatPolicy are:
 576
 577   setcategory(CAT)
 578           Store CAT as the policy's category.  Check that this is consistent
 579           with the policy as stored.
 580
 581   actions(MASTER, TARGETDIR, ID, COHORT)
 582           Given a MASTER file, identified as ID, a target directory
 583           TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
 584           of the same category in the same directory, return a list of
 585           actions to be performed to get the target directory into the right
 586           form.  The list might be empty if the policy object /rejects/ the
 587           file.
 588   """
 589
 590 class AndPolicy (FormatPolicy):
 591   """
 592   A FormatPolicy which does the union of a bunch of other policies.
 593
 594   Each subsidiary policy is invoked in turn.  The highest-priority action for
 595   each target file is returned.
 596   """
 597
 598   def __init__(me, policies):
 599     me._policies = policies
 600
 601   def setcategory(me, cat):
 602     me.cat = cat
 603     for p in me._policies:
 604       p.setcategory(cat)
 605
 606   def actions(me, master, targetdir, id, cohort):
 607     tmap = {}
 608     for p in me._policies:
 609       for a in p.actions(master, targetdir, id, cohort):
 610         if a.target in tmap:
 611           tmap[a.target] = a.choose(tmap.get(a.target))
 612         else:
 613           tmap[a.target] = a
 614     return tmap.values()
 615
 616 And = K('and') - D('{') - R(Policy) - D('}')
 617 And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
 618
 619 class OrPolicy (FormatPolicy):
 620   """
 621   A FormatPolicy which tries other policies and uses the first that accepts.
 622
 623   Each subsidiary policy is invoked in turn.  If any accepts, the actions it
 624   proposes are turned and no further policies are invoked.  If none accepts
 625   then the file is rejected.
 626   """
 627
 628   def __init__(me, policies):
 629     me._policies = policies
 630
 631   def setcategory(me, cat):
 632     me.cat = cat
 633     for p in me._policies:
 634       p.setcategory(cat)
 635
 636   def actions(me, master, targetdir, id, cohort):
 637     for p in me._policies:
 638       aa = p.actions(master, targetdir, id, cohort)
 639       if aa:
 640         return aa
 641     else:
 642       return []
 643
 644 Or = K('or') - D('{') - R(Policy) - D('}')
 645 Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
 646
 647 class AcceptPolicy (FormatPolicy):
 648   """
 649   A FormatPolicy which copies files in a particular format.
 650
 651   If all of the files in a cohort are recognized as being in a particular
 652   format (including this one), then accept it with a CopyAction; otherwise
 653   reject.
 654   """
 655
 656   def __init__(me, format):
 657     me._format = format
 658
 659   def setcategory(me, cat):
 660     if me._format.CATEGORY is not cat:
 661       raise ValueError, \
 662             "Accept format `%s' has category `%s', not `%s'" % \
 663             (me._format.__class__.__name__,
 664              me._format.CATEGORY.name, cat.name)
 665     me.cat = cat
 666
 667   def actions(me, master, targetdir, id, cohort):
 668     if me._format.check(id) and \
 669        all(me._format.check(cid) for f, cid in cohort):
 670       return [CopyAction(master, targetdir)]
 671     else:
 672       return []
 673
 674 Accept = K('accept') - Format
 675 Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
 676
 677 class ConvertPolicy (FormatPolicy):
 678   """
 679   A FormatPolicy which copies files in a particular format or converts if
 680   necessary.
 681   """
 682   def __init__(me, format):
 683     me._format = format
 684
 685   def setcategory(me, cat):
 686     if me._format.CATEGORY is not cat:
 687       raise ValueError, \
 688             "Accept format `%s' has category `%s', not `%s'" % \
 689             (me._format.__class__.__name__,
 690              me._format.CATEGORY.name, cat.name)
 691     me.cat = cat
 692
 693   def actions(me, master, targetdir, id, cohort):
 694     if me._format.check(id):
 695       return [CopyAction(master, targetdir)]
 696     else:
 697       return [ConvertAction(master, targetdir, id, me._format)]
 698
 699 Convert = K('convert') - Format
 700 Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
 701
 702 Policy << (And | Or | Accept | Convert)
 703
 704 ###--------------------------------------------------------------------------
 705 ### Audio handling, based on GStreamer.
 706
 707 def make_element(factory, name = None, **props):
 708   "Return a new element from the FACTORY with the given NAME and PROPS."
 709   elt = GS.element_factory_make(factory, name)
 710   elt.set_properties(**props)
 711   return elt
 712
 713 class GStreamerProgressEyecandy (ProgressEyecandy):
 714   """
 715   Provide amusement while GStreamer is busy doing something.
 716
 717   The GStreamerProgressEyecandy object is a context manager.  Wrap it round
 718   your GStreamer loop to provide progress information for an operation.
 719   """
 720
 721   def __init__(me, what, elt, **kw):
 722     """
 723     Initialize a progress meter.
 724
 725     WHAT is a prefix string to be written before the progress eyecandy
 726     itself.  ELT is a GStreamer element to interrogate to find the progress
 727     information.
 728     """
 729     me._elt = elt
 730     ProgressEyecandy.__init__(me, what, **kw)
 731
 732   def _update(me):
 733     "Called by GLib main event loop to update the eyecandy."
 734     me.show()
 735     return True
 736
 737   def _timer(me):
 738     """
 739     Update the progress meter.
 740
 741     This is called periodically by the GLib main event-processing loop.
 742     """
 743     me.show()
 744     return True
 745
 746   def progress(me):
 747     "Return the current progress as a pair (CURRENT, MAX)."
 748
 749     ## Fetch the current progress information.  We get the duration each
 750     ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
 751     ## duration can change as we progress.  Hopefully it settles down fairly
 752     ## soon.
 753     try:
 754       t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
 755       end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
 756       return t, end
 757     except GS.QueryError:
 758       return None, None
 759
 760   def __enter__(me):
 761     "Enter context: attach progress meter display."
 762
 763     ## If we're not showing pointless frippery, don't bother at all.
 764     if not STATUS.eyecandyp:
 765       return
 766
 767     ## Update regularly.  The pipeline runs asynchronously.
 768     me._id = G.timeout_add(200, me._update)
 769
 770   def __exit__(me, ty, val, tb):
 771     "Leave context: remove display and report completion or failure."
 772
 773     ## If we're not showing pointless frippery, there's nothing to remove.
 774     if STATUS.eyecandyp:
 775       G.source_remove(me._id)
 776
 777     ## Report completion anyway.
 778     me.done(ty is None)
 779
 780     ## As you were.
 781     return False
 782
 783 class AudioIdentifier (object):
 784   """
 785   Analyses and identifies an audio file.
 786
 787   Important properties are:
 788
 789   cap     A capabilities structure describing the audio file data.  The most
 790           interesting thing in here is probably its name, which is a MIME
 791           type describing the data.
 792
 793   dcap    A capabilities structure describing the decoded audio data.  This
 794           is of interest during conversion.
 795
 796   tags    A dictionary containing metadata tags from the file.  These are in
 797           GStreamer's encoding-independent format.
 798
 799   bitrate An approximation to the stream's bitrate, in kilobits per second.
 800           This might be slow to work out for some files so it's computed on
 801           demand.
 802   """
 803
 804   def __init__(me, file, mime):
 805     "Initialize the object suitably for identifying FILE."
 806
 807     ## Make some initial GStreamer objects.  We'll want the pipeline later if
 808     ## we need to analyse a poorly tagged MP3 stream, so save it away.
 809     me._pipe = GS.Pipeline()
 810     me._file = file
 811     bus = me._pipe.get_bus()
 812     bus.add_signal_watch()
 813     loop = G.MainLoop()
 814
 815     ## The basic recognition kit is based around `decodebin'.  We must keep
 816     ## it happy by giving it sinks for the streams it's found, which it
 817     ## announces asynchronously.
 818     source = make_element('filesrc', 'file', location = file)
 819     decoder = make_element('decodebin', 'decode')
 820     sink = make_element('fakesink')
 821     def decoder_pad_arrived(elt, pad):
 822       if pad.get_caps()[0].get_name().startswith('audio/'):
 823         elt.link_pads(pad.get_name(), sink, 'sink')
 824     dpaid = decoder.connect('pad-added', decoder_pad_arrived)
 825     me._pipe.add(source, decoder, sink)
 826     GS.element_link_many(source, decoder)
 827
 828     ## Arrange to collect tags from the pipeline's bus as they're reported.
 829     ## If we reuse the pipeline later, we'll want different bus-message
 830     ## handling, so make sure we can take the signal handler away.
 831     tags = {}
 832     fail = []
 833     def bus_message(bus, msg):
 834       if msg.type == GS.MESSAGE_ERROR:
 835         fail[:] = (ValueError, msg.structure['debug'], None)
 836         loop.quit()
 837       elif msg.type == GS.MESSAGE_STATE_CHANGED:
 838         if msg.structure['new-state'] == GS.STATE_PAUSED and \
 839                msg.src == me._pipe:
 840           loop.quit()
 841       elif msg.type == GS.MESSAGE_TAG:
 842         tags.update(msg.structure)
 843     bmid = bus.connect('message', bus_message)
 844
 845     ## We want to identify the kind of stream this is.  (Hmm.  The MIME type
 846     ## recognizer has already done this work, but GStreamer is probably more
 847     ## reliable.)  The `decodebin' has a `typefind' element inside which will
 848     ## announce the identified media type.  All we need to do is find it and
 849     ## attach a signal handler.  (Note that the handler might be run in the
 850     ## thread context of the pipeline element, but Python's GIL will keep
 851     ## things from being too awful.)
 852     me.cap = None
 853     me.dcap = None
 854     for e in decoder.elements():
 855       if e.get_factory().get_name() == 'typefind':
 856         tfelt = e
 857         break
 858     else:
 859       assert False, 'failed to find typefind element'
 860
 861     ## Crank up most of the heavy machinery.  The message handler will stop
 862     ## the loop when things seem to be sufficiently well underway.
 863     me._pipe.set_state(GS.STATE_PAUSED)
 864     loop.run()
 865     bus.disconnect(bmid)
 866     decoder.disconnect(dpaid)
 867     if fail:
 868       me._pipe.set_state(GS.STATE_NULL)
 869       raise fail[0], fail[1], fail[2]
 870
 871     ## Store the collected tags.
 872     me.tags = tags
 873
 874     ## Gather the capabilities.  The `typefind' element knows the input data
 875     ## type.  The 'decodebin' knows the raw data type.
 876     me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
 877     me.mime = set([mime, me.cap.get_name()])
 878     me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
 879
 880     ## If we found a plausible bitrate then stash it.  Otherwise note that we
 881     ## failed.  If anybody asks then we'll work it out then.
 882     if 'nominal-bitrate' in tags:
 883       me._bitrate = tags['nominal-bitrate']/1000
 884     elif 'bitrate' in tags and tags['bitrate'] >= 80000:
 885       me._bitrate = tags['bitrate']/1000
 886     else:
 887       me._bitrate = None
 888
 889     ## The bitrate computation wants the file size.  Ideally we'd want the
 890     ## total size of the frames' contents, but that seems hard to dredge
 891     ## out.  If the framing overhead is small, this should be close enough
 892     ## for our purposes.
 893     me._bytes = OS.stat(file).st_size
 894
 895   def __del__(me):
 896     "Close the pipeline down so we don't leak file descriptors."
 897     me._pipe.set_state(GS.STATE_NULL)
 898
 899   @property
 900   def bitrate(me):
 901     """
 902     Return the approximate bit-rate of the input file.
 903
 904     This might take a while if we have to work it out the hard way.
 905     """
 906
 907     ## If we already know the answer then just return it.
 908     if me._bitrate is not None:
 909       return me._bitrate
 910
 911     ## Make up a new main loop.
 912     loop = G.MainLoop()
 913
 914     ## Watch for bus messages.  We'll stop when we reach the end of the
 915     ## stream: then we'll have a clear idea of how long the track was.
 916     fail = []
 917     def bus_message(bus, msg):
 918       if msg.type == GS.MESSAGE_ERROR:
 919         fail[:] = (ValueError, msg.structure['debug'], None)
 920         loop.quit()
 921       elif msg.type == GS.MESSAGE_EOS:
 922         loop.quit()
 923     bus = me._pipe.get_bus()
 924     bmid = bus.connect('message', bus_message)
 925
 926     ## Get everything moving, and keep the user amused while we work.
 927     me._pipe.set_state(GS.STATE_PLAYING)
 928     with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
 929                                    me._pipe,
 930                                    silentp = True):
 931       loop.run()
 932     bus.disconnect(bmid)
 933     if fail:
 934       me._pipe.set_state(GS.STATE_NULL)
 935       raise fail[0], fail[1], fail[2]
 936
 937     ## Now we should be able to find out our position accurately and work out
 938     ## a bitrate.  Cache it in case anybody asks again.
 939     t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
 940     me._bitrate = int(8*me._bytes*1e6/t)
 941
 942     ## Done.
 943     return me._bitrate
 944
 945 class AudioFormat (BaseFormat):
 946   """
 947   An AudioFormat is a kind of Format specialized for audio files.
 948
 949   Format checks are done on an AudioIdentifier object.
 950   """
 951
 952   PROPS = prop('bitrate', Num)
 953
 954   ## libmagic reports `application/ogg' for Ogg Vorbis files.  We've switched
 955   ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
 956   ## depending on how thorough it's trying to be.  Still, it doesn't do any
 957   ## harm here; the main risk is picking up Ogg Theora files by accident, and
 958   ## we'll probably be able to extract the audio from them anyway.
 959   CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
 960                           AudioIdentifier)
 961
 962   def __init__(me, bitrate = None):
 963     "Construct an object, requiring an approximate bitrate."
 964     me.bitrate = bitrate
 965
 966   def check(me, id):
 967     """
 968     Return whether the AudioIdentifier ID is suitable for our purposes.
 969
 970     Subclasses can either override this method or provide a property
 971     `MIMETYPES', which is a list (other thing that implements `__contains__')
 972     of GStreamer MIME types matching this format.
 973     """
 974     return id.mime & me.MIMETYPES and \
 975            (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
 976
 977   def encoder(me):
 978     """
 979     Constructs a GStreamer element to encode audio input.
 980
 981     Subclasses can either override this method (or replace `encode'
 982     entirely), or provide a method `encoder_chain' which returns a list of
 983     elements to be linked together in sequence.  The first element in the
 984     chain must have a pad named `sink' and the last must have a pad named
 985     `src'.
 986     """
 987     elts = me.encoder_chain()
 988     bin = GS.Bin()
 989     bin.add(*elts)
 990     GS.element_link_many(*elts)
 991     bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
 992     bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
 993     return bin
 994
 995   def convert(me, master, id, target):
 996     """
 997     Encode audio from MASTER, already identified as ID, writing it to TARGET.
 998
 999     See `encoder' for subclasses' responsibilities.
1000     """
1001
1002     ## Construct the necessary equipment.
1003     pipe = GS.Pipeline()
1004     bus = pipe.get_bus()
1005     bus.add_signal_watch()
1006     loop = G.MainLoop()
1007
1008     ## Make sure that there isn't anything in the way of our output.  We're
1009     ## going to write to a scratch file so that we don't get confused by
1010     ## half-written rubbish left by a crashed program.
1011     new = target + '.new'
1012     try:
1013       OS.unlink(new)
1014     except OSError, err:
1015       if err.errno != E.ENOENT:
1016         raise
1017
1018     ## Piece together our pipeline.  The annoying part is that the
1019     ## `decodebin' doesn't have any source pads yet, so our chain is in two
1020     ## halves for now.
1021     source = make_element('filesrc', 'source', location = master)
1022     decoder = make_element('decodebin', 'decode')
1023     convert = make_element('audioconvert', 'convert')
1024     encoder = me.encoder()
1025     sink = make_element('filesink', 'sink', location = new)
1026     pipe.add(source, decoder, convert, encoder, sink)
1027     GS.element_link_many(source, decoder)
1028     GS.element_link_many(convert, encoder, sink)
1029
1030     ## Some decoders (e.g., the AC3 decoder) include channel-position
1031     ## indicators in their output caps.  The Vorbis encoder interferes with
1032     ## this, and you end up with a beautifully encoded mono signal from a
1033     ## stereo source.  From a quick butchers at the `vorbisenc' source, I
1034     ## /think/ that this is only a problem with stereo signals: mono signals
1035     ## are mono already, and `vorbisenc' accepts channel positions if there
1036     ## are more than two channels.
1037     ##
1038     ## So we have this bodge.  We already collected the decoded audio caps
1039     ## during identification.  So if we see 2-channel audio with channel
1040     ## positions, we strip the positions off forcibly by adding a filter.
1041     if id.dcap.get_name().startswith('audio/x-raw-') and \
1042        id.dcap.has_field('channels') and \
1043        id.dcap['channels'] == 2 and \
1044        id.dcap.has_field('channel-positions'):
1045       dcap = GS.Caps()
1046       c = id.dcap.copy()
1047       c.remove_field('channel-positions')
1048       dcap.append(c)
1049     else:
1050       dcap = None
1051
1052     ## Hook onto the `decodebin' so we can link together the two halves of
1053     ## our encoding chain.  For now, we'll hope that there's only one audio
1054     ## stream in there, and just throw everything else away.
1055     def decoder_pad_arrived(elt, pad):
1056       if pad.get_caps()[0].get_name().startswith('audio/'):
1057         if dcap:
1058           elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1059         else:
1060           elt.link_pads(pad.get_name(), convert, 'sink')
1061     decoder.connect('pad-added', decoder_pad_arrived)
1062
1063     ## Watch the bus for completion messages.
1064     fail = []
1065     def bus_message(bus, msg):
1066       if msg.type == GS.MESSAGE_ERROR:
1067         fail[:] = (ValueError, msg.structure['debug'], None)
1068         loop.quit()
1069       elif msg.type == GS.MESSAGE_EOS:
1070         loop.quit()
1071     bmid = bus.connect('message', bus_message)
1072
1073     ## Get everything ready and let it go.
1074     pipe.set_state(GS.STATE_PLAYING)
1075     with GStreamerProgressEyecandy(filestatus(master,
1076                                               'convert to %s' % me.NAME),
1077                                    pipe):
1078       loop.run()
1079     pipe.set_state(GS.STATE_NULL)
1080     if fail:
1081       raise fail[0], fail[1], fail[2]
1082
1083     ## Fix up the output file if we have to.
1084     me.fixup(new)
1085
1086     ## We're done.
1087     OS.rename(new, target)
1088
1089 class OggVorbisFormat (AudioFormat):
1090   "AudioFormat object for Ogg Vorbis."
1091
1092   ## From http://en.wikipedia.org/wiki/Vorbis
1093   QMAP = [(-1,  45), ( 0,  64), ( 1,  80), ( 2,  96),
1094           ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1095           ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1096
1097   NAME = 'Ogg Vorbis'
1098   MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1099                    'audio/x-vorbis+ogg'])
1100   EXT = 'ogg'
1101
1102   def encoder_chain(me):
1103     for q, br in me.QMAP:
1104       if br >= me.bitrate:
1105         break
1106     else:
1107       raise ValueError, 'no suitable quality setting found'
1108     return [make_element('vorbisenc',
1109                          quality = q/10.0),
1110             make_element('oggmux')]
1111
1112 defformat('ogg-vorbis', OggVorbisFormat)
1113
1114 class MP3Format (AudioFormat):
1115   "AudioFormat object for MP3."
1116
1117   NAME = 'MP3'
1118   MIMETYPES = set(['audio/mpeg'])
1119   EXT = 'mp3'
1120
1121   def encoder_chain(me):
1122     return [make_element('lame',
1123                          vbr_mean_bitrate = me.bitrate,
1124                          vbr = 4),
1125             make_element('xingmux'),
1126             make_element('id3v2mux')]
1127
1128   def fixup(me, path):
1129     """
1130     Fix up MP3 files.
1131
1132     GStreamer produces ID3v2 tags, but not ID3v1.  This seems unnecessarily
1133     unkind to stupid players.
1134     """
1135     tag = E3.Tag()
1136     tag.link(path)
1137     tag.setTextEncoding(E3.UTF_8_ENCODING)
1138     try:
1139       tag.update(E3.ID3_V1_1)
1140     except (UnicodeEncodeError, E3.tag.GenreException):
1141       pass
1142
1143 defformat('mp3', MP3Format)
1144
1145 ###--------------------------------------------------------------------------
1146 ### Image handling, based on the Python Imaging Library.
1147
1148 class ImageIdentifier (object):
1149   """
1150   Analyses and identifies an image file.
1151
1152   Simply leaves an Image object in the `img' property which can be inspected.
1153   """
1154
1155   def __init__(me, file, mime):
1156
1157     ## Get PIL to open the file.  It will magically work out what kind of
1158     ## file it is.
1159     try:
1160       me.img = I.open(file)
1161     except IOError, exc:
1162
1163       ## Unhelpful thing to raise on identification failure.  We can
1164       ## distinguish this from an actual I/O error because it doesn't have an
1165       ## `errno'.
1166       if exc.errno is None:
1167         raise IdentificationFailure
1168       raise
1169
1170     me.mime = set([mime])
1171
1172 class ImageFormat (BaseFormat):
1173   """
1174   An ImageFormat is a kind of Format specialized for image files.
1175
1176   Subclasses don't need to provide anything other than the properties
1177   required by all concrete Format subclasses.  However, there is a
1178   requirement that the `NAME' property match PIL's `format' name for the
1179   format.
1180   """
1181
1182   PROPS = prop('size', Num)
1183   CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1184
1185   def __init__(me, size = None, **kw):
1186     """
1187     Initialize an ImageFormat object.
1188
1189     Additional keywords are used when encoding, and may be recognized by
1190     enhanced `check' methods in subclasses.
1191     """
1192     me._size = size
1193     me._props = kw
1194
1195   def check(me, id):
1196     "Check whether the ImageIdentifier ID matches our requirements."
1197     return id.img.format == me.NAME and \
1198            (me._size is None or
1199             (id.img.size[0] <= me._size and
1200              id.img.size[1] <= me._size))
1201
1202   def convert(me, master, id, target):
1203     "Encode the file MASTER, identified as ID, writing the result to TARGET."
1204
1205     ## Write to a scratch file.
1206     new = target + '.new'
1207
1208     ## The ImageIdentifier already contains a copy of the open file.  It
1209     ## would be wasteful not to use it.
1210     img = id.img
1211     STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1212
1213     ## If there's a stated maximum size then scale the image down to match.
1214     ## But thumbnailing clobbers the original, so take a copy.
1215     if me._size is not None and \
1216            (img.size[0] > me._size or img.size[1] > me._size):
1217       img = img.copy()
1218       img.thumbnail((me._size, me._size), I.ANTIALIAS)
1219
1220     ## Write the output image.
1221     img.save(new, me.NAME, **me._props)
1222
1223     ## Fix it up if necessary.
1224     me.fixup(new)
1225
1226     ## We're done.
1227     OS.rename(new, target)
1228     STATUS.commit()
1229
1230 class JPEGFormat (ImageFormat):
1231   """
1232   Image format for JPEG (actually JFIF) files.
1233
1234   Interesting properties to set:
1235
1236   optimize
1237           If present, take a second pass to select optimal encoder settings.
1238
1239   progression
1240           If present, make a progressive file.
1241
1242   quality Integer from 1--100 (worst to best); default is 75.
1243   """
1244   EXT = 'jpg'
1245   NAME = 'JPEG'
1246   PROPS = prop('optimize', None) \
1247     | prop('progressive', None, 'progression') \
1248     | prop('quality', Num)
1249
1250 defformat('jpeg', JPEGFormat)
1251
1252 class PNGFormat (ImageFormat):
1253   """
1254   Image format for PNG files.
1255
1256   Interesting properties:
1257
1258   optimize
1259           If present, make a special effort to minimize the output file.
1260   """
1261   EXT = 'png'
1262   NAME = 'PNG'
1263   PROPS = prop('optimize', None)
1264
1265 defformat('png', PNGFormat)
1266
1267 class BMPFormat (ImageFormat):
1268   """
1269   Image format for Windows BMP files, as used by RockBox.
1270
1271   No additional properties.
1272   """
1273   NAME = 'BMP'
1274   EXT = 'bmp'
1275
1276 defformat('bmp', BMPFormat)
1277
1278 ###--------------------------------------------------------------------------
1279 ### Remaining parsing machinery.
1280
1281 Type = K('type') - Name - D('{') - R(Policy) - D('}')
1282 def build_type(s, l, t):
1283   try:
1284     cat = CATEGORYMAP[t[0]]
1285   except KeyError:
1286     raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1287   pols = t[1]
1288   if len(pols) == 1: pol = pols[0]
1289   else: pol = AndPolicy(pols)
1290   pol.setcategory(cat)
1291   return pol
1292 Type.setParseAction(build_type)
1293
1294 TARGETS = []
1295 class TargetJob (object):
1296   def __init__(me, targetdir, policies):
1297     me.targetdir = targetdir
1298     me.policies = policies
1299   def perform(me):
1300     TARGETS.append(me)
1301
1302 Target = K('target') - String - D('{') - R(Type) - D('}')
1303 def build_target(s, l, t):
1304   return TargetJob(t[0], t[1])
1305 Target.setParseAction(build_target)
1306
1307 VARS = { 'master': None }
1308 class VarsJob (object):
1309   def __init__(me, vars):
1310     me.vars = vars
1311   def perform(me):
1312     for k, v in me.vars:
1313       VARS[k] = v
1314
1315 Var = prop('master', String)
1316 Vars = K('vars') - D('{') - R(Var) - D('}')
1317 def build_vars(s, l, t):
1318   return VarsJob(t[0])
1319 Vars.setParseAction(build_vars)
1320
1321 TopLevel = Vars | Target
1322 Config = R(TopLevel)
1323 Config.ignore(P.pythonStyleComment)
1324
1325 ###--------------------------------------------------------------------------
1326 ### The directory grobbler.
1327
1328 def grobble(master, targets, noact = False):
1329   """
1330   Work through the MASTER directory, writing converted files to TARGETS.
1331
1332   The TARGETS are a list of `TargetJob' objects, each describing a target
1333   directory and a policy to apply to it.
1334
1335   If NOACT is true, then don't actually do anything permanent to the
1336   filesystem.
1337   """
1338
1339   ## Transform the targets into a more convenient data structure.
1340   tpolmap = []
1341   for t in targets:
1342     pmap = {}
1343     tpolmap.append(pmap)
1344     for p in t.policies: pmap.setdefault(p.cat, []).append(p)
1345
1346   ## Keep track of the current position in the master tree.
1347   dirs = []
1348
1349   ## And the files which haven't worked.
1350   broken = []
1351
1352   def grobble_file(master, pmap, targetdir, cohorts):
1353     ## Convert MASTER, writing the result to TARGETDIR.
1354     ##
1355     ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1356     ## a list of (FILENAME, ID) pairs.
1357     ##
1358     ## Since this function might convert the MASTER file, the caller doesn't
1359     ## know the name of the output files, so we return then as a list.
1360
1361     done = set()
1362     st_m = OS.stat(master)
1363
1364     ## Work through each category listed and apply its policy.
1365     for cat, id, cohort in cohorts:
1366
1367       ## Go through the category's policies and see if any match.  If we fail
1368       ## here, see if there are more categories to try.
1369       for pol in pmap[cat]:
1370         acts = pol.actions(master, targetdir, id, cohort)
1371         if acts: break
1372       else:
1373         continue
1374
1375       ## Work through the targets one by one.
1376       for a in acts:
1377         done.add(a.target)
1378
1379         ## Find out whether the target file already exists and is up-to-date
1380         ## with respect to the master.  (Caution here with low-resolution
1381         ## timestamps.)  If it's OK, then just move on.
1382         try:
1383           st_t = OS.stat(a.target)
1384           if st_m.st_mtime < st_t.st_mtime or \
1385                  (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1386             continue
1387         except OSError, err:
1388           if err.errno not in (E.ENOENT, E.ENOTDIR):
1389             raise
1390
1391         ## We have real work to do.  If there's a current status message,
1392         ## it's the containing directory so flush it so that people know
1393         ## where we are.
1394         STATUS.commit()
1395
1396         ## Remove the target.  (A hardlink will fail if the target already
1397         ## exists.)
1398         if not noact:
1399           try:
1400             OS.unlink(a.target)
1401           except OSError, err:
1402             if err.errno not in (E.ENOENT, E.ENOTDIR):
1403               raise
1404
1405         ## Do whatever it is we decided to do.
1406         if noact:
1407           STATUS.commit(filestatus(master, a))
1408         else:
1409           a.perform()
1410
1411     ## We're done.  Return the names of the targets.
1412     return list(done)
1413
1414   @contextmanager
1415   def wrap(masterfile):
1416     ## Handle exceptions found while trying to convert a particular file or
1417     ## directory.
1418
1419     try:
1420       yield masterfile
1421
1422     ## Something bad happened.  Report the error, but continue.  (This list
1423     ## of exceptions needs a lot of work.)
1424     except (IOError, OSError), exc:
1425       STATUS.clear()
1426       STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
1427       broken.append((masterfile, exc))
1428
1429   def grobble_dir(master, targets):
1430     ## Recursively convert files in MASTER, writing them to the TARGETS.
1431
1432     ## Keep track of the subdirectories we encounter, because we'll need to
1433     ## do all of those in one go at the end.
1434     subdirs = set()
1435
1436     ## Work through each target directory in turn.
1437     for target, pmap in zip(targets, tpolmap):
1438
1439       ## Make sure the TARGET exists and is a directory.  It's a fundamental
1440       ## assumption of this program that the entire TARGET tree is
1441       ## disposable, so if something exists but isn't a directory, we should
1442       ## kill it.
1443       if OS.path.isdir(target):
1444         pass
1445       else:
1446         if OS.path.exists(target):
1447           STATUS.commit(filestatus(target, 'clear nondirectory'))
1448           if not noact:
1449             OS.unlink(target)
1450         STATUS.commit(filestatus(target, 'create directory'))
1451         if not noact:
1452           OS.mkdir(target)
1453
1454       ## Keep a list of things in the target.  As we convert files, we'll
1455       ## check them off.  Anything left over is rubbish and needs to be
1456       ## deleted.
1457       checklist = {}
1458       try:
1459         for i in OS.listdir(target):
1460           checklist[i] = False
1461       except OSError, err:
1462         if err.errno not in (E.ENOENT, E.ENOTDIR):
1463           raise
1464
1465       ## Keep track of the files in each category.
1466       catmap = {}
1467       todo = []
1468       done = []
1469
1470       ## Work through the master files.
1471       for f in sorted(OS.listdir(master)):
1472
1473         ## If the killswitch has been pulled then stop.  The whole idea is
1474         ## that we want to cause a clean shutdown if possible, so we don't
1475         ## want to do it in the middle of encoding because the encoding
1476         ## effort will have been wasted.  This is the only place we need to
1477         ## check.  If we've exited the loop, then clearing old files will
1478         ## probably be fast, and we'll either end up here when the recursive
1479         ## call returns or we'll be in the same boat as before, clearing old
1480         ## files, only up a level.  If worst comes to worst, we'll be killed
1481         ## forcibly somewhere inside `SH.rmtree', and that can continue where
1482         ## it left off.
1483         if KILLSWITCH.is_set():
1484           return
1485
1486         ## Do something with the file.
1487         with wrap(OS.path.join(master, f)) as masterfile:
1488
1489           ## If it's a directory then prepare to grobble it recursively, but
1490           ## don't do that yet.
1491           if OS.path.isdir(masterfile):
1492             subdirs.add(f)
1493             done.append(OS.path.join(target, f))
1494
1495           ## Otherwise it's a file.  Work out what kind, and stash it under
1496           ## the appropriate categories.  Later, we'll apply policy to the
1497           ## files, by category, and work out what to do with them all.
1498           else:
1499             gf = GIO.File(masterfile)
1500             mime = gf.query_info('standard::content-type').get_content_type()
1501             cats = []
1502             for cat in pmap.iterkeys():
1503               id = cat.identify(masterfile, mime)
1504               if id is None: continue
1505               catmap.setdefault(cat, []).append((masterfile, id))
1506               cats.append((cat, id))
1507             if not cats:
1508               catmap.setdefault(None, []).append((masterfile, id))
1509             todo.append((masterfile, cats))
1510
1511       ## Work through the categorized files to see what actions to do for
1512       ## them.
1513       for masterfile, cats in todo:
1514         with wrap(masterfile):
1515           done += grobble_file(masterfile, pmap, target,
1516                                [(cat, id, catmap[cat]) for cat, id in cats])
1517
1518       ## Check the results off the list so that we don't clear it later.
1519       for f in done:
1520         checklist[OS.path.basename(f)] = True
1521
1522       ## Maybe there's stuff in the target which isn't accounted for.  Delete
1523       ## it: either the master has changed, or the policy for this target has
1524       ## changed.  Either way, the old files aren't wanted.
1525       for f in checklist:
1526         if not checklist[f]:
1527           STATUS.commit(filestatus(f, 'clear bogus file'))
1528           if not noact:
1529             bogus = OS.path.join(target, f)
1530             try:
1531               if OS.path.isdir(bogus):
1532                 SH.rmtree(bogus)
1533               else:
1534                 OS.unlink(bogus)
1535             except OSError, err:
1536               if err.errno != E.ENOENT:
1537                 raise
1538
1539     ## If there are subdirectories which want processing then do those.
1540     ## Keep the user amused by telling him where we are in the tree.
1541     for d in sorted(subdirs):
1542       dirs.append(d)
1543       STATUS.set('/'.join(dirs))
1544       with wrap(OS.path.join(master, d)) as masterdir:
1545         try:
1546           grobble_dir(masterdir,
1547                       [OS.path.join(target, d) for target in targets])
1548         finally:
1549           dirs.pop()
1550           STATUS.set('/'.join(dirs))
1551
1552   ## Right.  We're ready to go.
1553   grobble_dir(master, [t.targetdir for t in targets])
1554   return broken
1555
1556 ###--------------------------------------------------------------------------
1557 ### Command-line interface.
1558
1559 QUIS = OS.path.basename(SYS.argv[0])
1560
1561 def moan(msg):
1562   "Report a warning message to the user."
1563   SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1564
1565 def die(msg):
1566   "Report a fatal error message to the user."
1567   moan(msg)
1568   SYS.exit(1)
1569
1570 def parse_opts(args):
1571   """
1572   Parse command-line arguments in ARGS.
1573
1574   Returns a Grobbler object and the MASTER and TARGET directories to be
1575   grobbled.
1576   """
1577
1578   ## Build the option parser object.
1579   op = OP.OptionParser(prog = QUIS, version = VERSION,
1580                        usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1581                                'CONFIG',
1582                        description = """\
1583 Convert a directory tree of files according to the configuration file
1584 CONFIG.
1585 """)
1586
1587   ## Timeout handling.
1588   def cb_time(opt, ostr, arg, op):
1589     m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1590     if not m:
1591       raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1592     t, u = m.groups()
1593     t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1594     setattr(op.values, opt.dest, t)
1595   op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1596                 dest = 'timeout',
1597                 help = 'stop processing nicely after SECS',
1598                 action = 'callback', callback = cb_time)
1599   op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1600                 dest = 'timeout_nasty',
1601                 help = 'stop processing unpleasantly after further SECS',
1602                 action = 'callback', callback = cb_time)
1603
1604   ## Other options.
1605   op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1606                 help = 'provide progress information')
1607   op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1608                 help = 'don\'t actually modify the filesystem')
1609
1610   ## Ready to rock.
1611   op.set_defaults(formats = [], noact = False,
1612                   timeout = None, timeout_nasty = 300)
1613   opts, args = op.parse_args(args)
1614
1615   ## Check that we got the non-option arguments that we want.
1616   if len(args) != 1:
1617     op.error('wrong number of arguments')
1618
1619   ## Act on the options.
1620   if opts.tty:
1621     STATUS.eyecandyp = True
1622   if opts.timeout is not None:
1623     to = TH.Thread(target = timeout,
1624                    args = (opts.timeout, opts.timeout_nasty))
1625     to.daemon = True
1626     to.start()
1627
1628   ## Parse the configuration file.
1629   with open(args[0]) as conf:
1630     jobs, = Config.parseFile(conf, True)
1631   for j in jobs:
1632     j.perform()
1633
1634   return opts
1635
1636 if __name__ == '__main__':
1637   opts = parse_opts(SYS.argv[1:])
1638   if 'master' not in VARS:
1639     die("no master directory set")
1640   broken = grobble(VARS['master'], TARGETS, opts.noact)
1641   if broken:
1642     moan('failed to convert some files:')
1643     for file, exc in broken:
1644       moan('%s: %s' % (file, exc))
1645     SYS.exit(1)
1646
1647   ## This is basically a successful completion: we did what we were asked to
1648   ## do.  It seems polite to report a message, though.
1649   ##
1650   ## Why don't we have a nonzero exit status?  The idea would be that a
1651   ## calling script would be interested that we used up all of our time, and
1652   ## not attempt to convert some other directory as well.  But that doesn't
1653   ## quite work.  Such a script would need to account correctly for time we
1654   ## had spent even if we complete successfully.  And if the script is having
1655   ## to watch the clock itself, it can do that without our help here.
1656   if KILLSWITCH.is_set():
1657     moan('killed by timeout')
1658
1659 ###----- That's all, folks --------------------------------------------------