3 ### Convert a directory tree of audio files
5 ### (c) 2010 Mark Wooding
8 ###----- Licensing notice ---------------------------------------------------
10 ### This program is free software; you can redistribute it and/or modify
11 ### it under the terms of the GNU General Public License as published by
12 ### the Free Software Foundation; either version 2 of the License, or
13 ### (at your option) any later version.
15 ### This program is distributed in the hope that it will be useful,
16 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ### GNU General Public License for more details.
20 ### You should have received a copy of the GNU General Public License
21 ### along with this program; if not, write to the Free Software Foundation,
22 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 ###--------------------------------------------------------------------------
25 ### External dependencies.
28 from __future__ import with_statement
30 ## Standard Python libraries.
35 import unicodedata as UD
40 import threading as TH
43 from contextlib import contextmanager
45 ## eyeD3 tag fettling.
48 ## Gstreamer. It picks up command-line arguments -- most notably `--help' --
49 ## and processes them itself. Of course, its help is completely wrong. This
50 ## kludge is due to Jonas Wagner.
51 _argv, SYS.argv = SYS.argv, []
58 from PIL import Image as I
63 ###--------------------------------------------------------------------------
64 ### Special initialization.
71 ###--------------------------------------------------------------------------
72 ### Eyecandy progress reports.
76 Return the width of S, in characters.
78 Specifically, this is the number of backspace characters required to
79 overprint the string S. If the current encoding for `stdout' appears to be
80 Unicode then do a complicated Unicode thing; otherwise assume that
81 characters take up one cell each.
83 None of this handles tab characters in any kind of useful way. Sorry.
86 ## If there's no encoding for stdout then we're doing something stupid.
87 if SYS.stdout.encoding is None: return len(s)
89 ## Turn the string into Unicode so we can hack on it properly. Maybe that
90 ## won't work out, in which case fall back to being stupid.
91 try: u = s.decode(SYS.stdout.encoding)
92 except UnicodeError: return len(s)
94 ## Our main problem is combining characters, but we should also try to
95 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
96 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
100 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
101 0x1160 <= cd <= 0x11ff: pass
102 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
106 #print ';; %r -> %d' % (s, w)
109 class StatusLine (object):
111 Maintains a status line containing ephemeral progress information.
113 The status line isn't especially important, but it keeps interactive users
116 There should be only one status line object in your program; otherwise
117 they'll interfere with each other and get confused.
119 The update algorithm (in `set') is fairly careful to do the right thing
120 with long status `lines', and to work properly in an Emacs `shell' buffer.
124 "Initialize the status line."
127 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
131 Set the status line contents to LINE, replacing what was there before.
133 This only produces actual output if stdout is interactive.
140 #print ';; new status %r' % line
142 ## If the old line was longer, we need to clobber its tail, so work out
143 ## what that involves.
145 b = charwidth(me._last[n:])
150 ## Now figure out the length of the common prefix between what we had
151 ## before and what we have now. This reduces the amount of I/O done,
152 ## which keeps network traffic down on SSH links, and keeps down the
153 ## amount of work slow terminal emulators like Emacs have to do.
155 m = min(n, me._lastlen)
156 while i < m and line[i] == me._last[i]:
159 ## Actually do the output, all in one syscall.
160 b = charwidth(me._last[i:])
161 SYS.stdout.write(pre + '\b'*b + line[i:])
162 #print ';; => %r' % (pre + '\b'*b + line[i:])
165 ## Update our idea of what's gone on.
170 "Clear the status line. Just like set('')."
173 def commit(me, line = None):
175 Commit the current status line, and maybe the string LINE.
177 If the current status line is nonempty, then commit it to the transcript.
178 If LINE is not None, then commit that to the transcript too.
180 After all of this, we clear the status line to get back to a clean state.
184 SYS.stdout.write('\n')
186 SYS.stdout.write(me._last + '\n')
188 SYS.stdout.write(line + '\n')
192 STATUS = StatusLine()
194 def filestatus(file, status):
195 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
197 class ProgressEyecandy (object):
199 Provide amusement while something big and complicated is happening.
201 This is an abstract class. Subclasses must provide a method `progress'
202 returning a pair (CURRENT, MAX) indicating the current progress through the
206 def __init__(me, what, silentp = False):
208 Initialize a progress meter.
210 WHAT is a prefix string to be written before the progress eyecandy
214 me._silentp = silentp
218 def _fmt_time(me, t):
219 "Format T as a time, in (maybe hours) minutes and seconds."
220 s, t = t % 60, int(t/60)
221 m, h = t % 60, int(t/60)
223 return '%d:%02d:%02d' % (h, m, s)
225 return '%02d:%02d' % (m, s)
228 "Show the current level of progress."
230 ## If we're not showing pointless frippery, don't bother at all.
231 if not STATUS.eyecandyp:
234 ## Update the spinner index.
235 me._spinner = (me._spinner + 1)%4
237 ## Fetch the current progress information. Note that we always fetch
238 ## both the current and maximum levels, because both might change if an
239 ## operation revises its idea of how much work needs doing.
240 cur, max = me.progress()
242 ## If we couldn't get progress information, display something vaguely
244 if cur is None or max is None:
245 STATUS.set('%s %c [unknown progress]' %
246 (me._what, r'/-\|'[me._spinner]))
249 ## Work out -- well, guess -- the time remaining.
252 eta = me._fmt_time((t - me._start)*(max - cur)/cur)
256 ## Set the status bar.
258 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
260 r'/-\|'[me._spinner],
265 def done(me, win = True):
266 "Show a completion notice, or a failure if WIN is false."
268 STATUS.set('%s FAILED!' % me._what)
269 elif not me._silentp:
270 STATUS.set('%s done (%s)' %
272 me._fmt_time(T.time() - me._start)))
277 ###--------------------------------------------------------------------------
278 ### Timeout handling.
280 KILLSWITCH = TH.Event()
286 moan('dying messily due to timeout')
289 ###--------------------------------------------------------------------------
290 ### Parsing utilities.
292 ## Allow hyphens in identifiers.
293 IDCHARS = P.alphanums + '-_'
294 P.Keyword.setDefaultKeywordChars(IDCHARS)
296 ## Some common kinds of tokens.
297 Name = P.Word(IDCHARS)
298 Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
299 String = P.QuotedString('"', '\\')
301 ## Handy abbreviations for constructed parser elements.
302 def K(k): return P.Keyword(k).suppress()
303 def D(d): return P.Literal(d).suppress()
305 def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
308 ###--------------------------------------------------------------------------
309 ### Format identification and conversion.
311 class IdentificationFailure (Exception):
314 class FileCategory (object):
316 A FileCategory represents a class of files.
318 For example, it's sensible to consider audio, or image files as a
319 category. A file category knows how to recognize member files from
323 def __init__(me, name, mime_pats, ident):
325 Construct a new category.
327 The PATS are a list of `fnmatch' patterns to be compared with a MIME
328 type. The IDENT is a function which produces an identification object
329 given a file's name and first-guess MIME type. The object is passed to a
330 Format's `check' method to see whether a file needs re-encoding, and to
331 `convert' to assist with the conversion.
333 An identification object must have an attribute `mime' which is a set of
334 possible MIME types accumulated for the object.
337 me._mime_pats = mime_pats
339 CATEGORYMAP[name] = me
341 def identify(me, file, mime):
343 Attempt to identify FILE, given its apparent MIME type.
345 If identification succeeds, return an identification object which can be
346 used by associated file formats; otherwise return None.
348 for p in me._mime_pats:
349 if not FN.fnmatchcase(mime, p):
352 return me._ident(file, mime)
353 except IdentificationFailure:
357 class BaseFormat (object):
359 A BaseFormat object represents a particular encoding and parameters.
361 The object can verify (the `check' method) whether a particular file
362 matches its requirements, and if necessary (`encode') re-encode a file.
364 Subclasses should define the following methods.
367 Answer whether the file identified by ID is acceptable according to
368 the receiver's parameters.
370 convert(MASTER, ID, TARGET)
371 Convert the file MASTER, which has been identified as ID, according
372 to the receiver's parameters, writing the output to TARGET.
374 Subclasses should also provide these attributes.
377 A FileCategory object for the category of files that this format
380 EXT A file extension to be applied to encoded output files.
382 NAME A user-facing name for the format.
384 PROPS A parser element to parse a property definition. It should produce
385 a pair NAME, VALUE to be stored in a dictionary.
387 Subclasses for different kinds of file may introduce more subclass
392 """Post-encoding fixups."""
398 def defformat(name, cls):
399 "Define a format NAME using class CLS."
400 if not hasattr(cls, 'NAME'):
401 raise ValueError, 'abstract class'
402 if not hasattr(cls, 'CATEGORY'):
403 raise ValueError, 'no category'
404 FORMATMAP[name] = cls
406 class FormatParser (P.ParserElement):
408 Parse a format specifier:
410 format-spec ::= string [format-properties]
411 format-properties ::= `{' format-property (`,' format-property)* `}'
413 The syntax of a format-property is determined by the PROPS attribute on the
414 named format and its superclasses.
417 ## We cache the parser elements we generate to avoid enormous consing.
420 def parseImpl(me, s, loc, actp = True):
422 ## Firstly, determine the format name.
423 loc, r = Name._parse(s, loc, actp)
426 ## Look up the format class.
427 try: fcls = FORMATMAP[fmt]
429 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
431 ## Fetch the property-list parser from the cache, if possible; else
440 except AttributeError: continue
441 if p in seen: continue
442 if prop is None: prop = p
446 pp = me.CACHE[fmt] = None
448 props = P.delimitedList(prop)
449 props.setParseAction(lambda s, l, t: dict(t.asList()))
450 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
452 ## Parse the properties.
456 loc, r = pp._parse(s, loc, actp)
460 ## Construct the format object and return it.
461 return loc, fcls(**pd)
463 Format = FormatParser()
465 def prop(kw, pval, tag = None):
466 if tag is None: tag = kw
469 p.setParseAction(lambda s, l, t: (tag, True))
471 p = K(kw) + D('=') + pval
472 p.setParseAction(lambda s, l, t: (tag, t[0]))
475 ###--------------------------------------------------------------------------
476 ### Policies and actions.
478 class Action (object):
480 An Action object represents a conversion action to be performed.
482 This class isn't intended to be instantiated directly. It exists to define
483 some protocol common to all Action objects.
485 Action objects have the following attributes.
487 master The name of the master (source) file.
489 target The name of the target (destination) file.
491 PRIORITY The priority of the action, for deciding which of two actions
492 to perform. Higher priorities are more likely to win.
494 Converting an Action to a string describes the action in a simple
495 user-readable manner. The `perform' method actually carries the action
501 def __init__(me, master):
502 "Stash the MASTER file name for later."
506 "Choose either ME or HIM and return one."
507 if him is None or me.PRIORITY > him.PRIORITY:
512 class CopyAction (Action):
514 An Action object for simply copying a file.
516 Actually we try to hardlink it first, falling back to a copy later. This
517 is both faster and more efficient with regard to disk space.
520 ## Copying is good. Linking is really good, but we can't tell the
521 ## difference at this stage.
524 def __init__(me, master, targetdir):
525 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
526 Action.__init__(me, master)
527 me.target = OS.path.join(targetdir, OS.path.basename(master))
533 "Actually perform a CopyAction."
535 STATUS.set(filestatus(me.master, 'link'))
536 OS.link(me.master, me.target)
538 if err.errno != E.EXDEV:
540 STATUS.set(filestatus(me.master, 'copy'))
541 new = me.target + '.new'
542 SH.copyfile(me.master, new)
543 OS.rename(new, me.target)
546 class ConvertAction (Action):
548 An Action object for converting a file to a given format.
550 Additional attributes:
552 id The identification object for the master file.
554 format The format to which we're meant to conver the master.
557 def __init__(me, master, targetdir, id, format):
558 "Initialize a ConvertAction."
559 Action.__init__(me, master)
560 stem, ext = OS.path.splitext(OS.path.basename(master))
561 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
566 return 'convert to %s' % me.format.NAME
569 "Acually perform a ConvertAction."
570 STATUS.set(filestatus(me.master, me))
571 me.format.convert(me.master, me.id, me.target)
575 class FormatPolicy (object):
577 A FormatPolicy object represents a set of rules for how to convert files.
579 Given a master file, the FormatPolicy will identify it and return a list of
580 actions to be performed. The methods required of a FormatPolicy are:
583 Store CAT as the policy's category. Check that this is consistent
584 with the policy as stored.
586 actions(MASTER, TARGETDIR, ID, COHORT)
587 Given a MASTER file, identified as ID, a target directory
588 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
589 of the same category in the same directory, return a list of
590 actions to be performed to get the target directory into the right
591 form. The list might be empty if the policy object /rejects/ the
595 class AndPolicy (FormatPolicy):
597 A FormatPolicy which does the union of a bunch of other policies.
599 Each subsidiary policy is invoked in turn. The highest-priority action for
600 each target file is returned.
603 def __init__(me, policies):
604 me._policies = policies
606 def setcategory(me, cat):
608 for p in me._policies:
611 def actions(me, master, targetdir, id, cohort):
613 for p in me._policies:
614 for a in p.actions(master, targetdir, id, cohort):
616 tmap[a.target] = a.choose(tmap.get(a.target))
621 And = K('and') - D('{') - R(Policy) - D('}')
622 And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
624 class OrPolicy (FormatPolicy):
626 A FormatPolicy which tries other policies and uses the first that accepts.
628 Each subsidiary policy is invoked in turn. If any accepts, the actions it
629 proposes are turned and no further policies are invoked. If none accepts
630 then the file is rejected.
633 def __init__(me, policies):
634 me._policies = policies
636 def setcategory(me, cat):
638 for p in me._policies:
641 def actions(me, master, targetdir, id, cohort):
642 for p in me._policies:
643 aa = p.actions(master, targetdir, id, cohort)
649 Or = K('or') - D('{') - R(Policy) - D('}')
650 Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
652 class AcceptPolicy (FormatPolicy):
654 A FormatPolicy which copies files in a particular format.
656 If all of the files in a cohort are recognized as being in a particular
657 format (including this one), then accept it with a CopyAction; otherwise
661 def __init__(me, format):
664 def setcategory(me, cat):
665 if me._format.CATEGORY is not cat:
667 "Accept format `%s' has category `%s', not `%s'" % \
668 (me._format.__class__.__name__,
669 me._format.CATEGORY.name, cat.name)
672 def actions(me, master, targetdir, id, cohort):
673 if me._format.check(id) and \
674 all(me._format.check(cid) for f, cid in cohort):
675 return [CopyAction(master, targetdir)]
679 Accept = K('accept') - Format
680 Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
682 class ConvertPolicy (FormatPolicy):
684 A FormatPolicy which copies files in a particular format or converts if
687 def __init__(me, format):
690 def setcategory(me, cat):
691 if me._format.CATEGORY is not cat:
693 "Accept format `%s' has category `%s', not `%s'" % \
694 (me._format.__class__.__name__,
695 me._format.CATEGORY.name, cat.name)
698 def actions(me, master, targetdir, id, cohort):
699 if me._format.check(id):
700 return [CopyAction(master, targetdir)]
702 return [ConvertAction(master, targetdir, id, me._format)]
704 Convert = K('convert') - Format
705 Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
707 Policy << (And | Or | Accept | Convert)
709 ###--------------------------------------------------------------------------
710 ### Audio handling, based on GStreamer.
712 def make_element(factory, name = None, **props):
713 "Return a new element from the FACTORY with the given NAME and PROPS."
714 elt = GS.element_factory_make(factory, name)
715 elt.set_properties(**props)
718 class GStreamerProgressEyecandy (ProgressEyecandy):
720 Provide amusement while GStreamer is busy doing something.
722 The GStreamerProgressEyecandy object is a context manager. Wrap it round
723 your GStreamer loop to provide progress information for an operation.
726 def __init__(me, what, elt, **kw):
728 Initialize a progress meter.
730 WHAT is a prefix string to be written before the progress eyecandy
731 itself. ELT is a GStreamer element to interrogate to find the progress
735 ProgressEyecandy.__init__(me, what, **kw)
738 "Called by GLib main event loop to update the eyecandy."
744 Update the progress meter.
746 This is called periodically by the GLib main event-processing loop.
752 "Return the current progress as a pair (CURRENT, MAX)."
754 ## Fetch the current progress information. We get the duration each
755 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
756 ## duration can change as we progress. Hopefully it settles down fairly
759 t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
760 end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
762 except GS.QueryError:
766 "Enter context: attach progress meter display."
768 ## If we're not showing pointless frippery, don't bother at all.
769 if not STATUS.eyecandyp:
772 ## Update regularly. The pipeline runs asynchronously.
773 me._id = G.timeout_add(200, me._update)
775 def __exit__(me, ty, val, tb):
776 "Leave context: remove display and report completion or failure."
778 ## If we're not showing pointless frippery, there's nothing to remove.
780 G.source_remove(me._id)
782 ## Report completion anyway.
788 class AudioIdentifier (object):
790 Analyses and identifies an audio file.
792 Important properties are:
794 cap A capabilities structure describing the audio file data. The most
795 interesting thing in here is probably its name, which is a MIME
796 type describing the data.
798 dcap A capabilities structure describing the decoded audio data. This
799 is of interest during conversion.
801 tags A dictionary containing metadata tags from the file. These are in
802 GStreamer's encoding-independent format.
804 bitrate An approximation to the stream's bitrate, in kilobits per second.
805 This might be slow to work out for some files so it's computed on
809 def __init__(me, file, mime):
810 "Initialize the object suitably for identifying FILE."
812 ## Make some initial GStreamer objects. We'll want the pipeline later if
813 ## we need to analyse a poorly tagged MP3 stream, so save it away.
814 me._pipe = GS.Pipeline()
816 bus = me._pipe.get_bus()
817 bus.add_signal_watch()
820 ## The basic recognition kit is based around `decodebin'. We must keep
821 ## it happy by giving it sinks for the streams it's found, which it
822 ## announces asynchronously.
823 source = make_element('filesrc', 'file', location = file)
824 decoder = make_element('decodebin', 'decode')
825 sink = make_element('fakesink')
826 def decoder_pad_arrived(elt, pad):
827 if pad.get_caps()[0].get_name().startswith('audio/'):
828 elt.link_pads(pad.get_name(), sink, 'sink')
829 dpaid = decoder.connect('pad-added', decoder_pad_arrived)
830 me._pipe.add(source, decoder, sink)
831 GS.element_link_many(source, decoder)
833 ## Arrange to collect tags from the pipeline's bus as they're reported.
834 ## If we reuse the pipeline later, we'll want different bus-message
835 ## handling, so make sure we can take the signal handler away.
838 def bus_message(bus, msg):
839 if msg.type == GS.MESSAGE_ERROR:
840 fail[:] = (ValueError, msg.structure['debug'], None)
842 elif msg.type == GS.MESSAGE_STATE_CHANGED:
843 if msg.structure['new-state'] == GS.STATE_PAUSED and \
846 elif msg.type == GS.MESSAGE_TAG:
847 tags.update(msg.structure)
848 bmid = bus.connect('message', bus_message)
850 ## We want to identify the kind of stream this is. (Hmm. The MIME type
851 ## recognizer has already done this work, but GStreamer is probably more
852 ## reliable.) The `decodebin' has a `typefind' element inside which will
853 ## announce the identified media type. All we need to do is find it and
854 ## attach a signal handler. (Note that the handler might be run in the
855 ## thread context of the pipeline element, but Python's GIL will keep
856 ## things from being too awful.)
859 for e in decoder.elements():
860 if e.get_factory().get_name() == 'typefind':
864 assert False, 'failed to find typefind element'
866 ## Crank up most of the heavy machinery. The message handler will stop
867 ## the loop when things seem to be sufficiently well underway.
868 me._pipe.set_state(GS.STATE_PAUSED)
871 decoder.disconnect(dpaid)
873 me._pipe.set_state(GS.STATE_NULL)
874 raise fail[0], fail[1], fail[2]
876 ## Store the collected tags.
879 ## Gather the capabilities. The `typefind' element knows the input data
880 ## type. The 'decodebin' knows the raw data type.
881 me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
882 me.mime = set([mime, me.cap.get_name()])
883 me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
885 ## If we found a plausible bitrate then stash it. Otherwise note that we
886 ## failed. If anybody asks then we'll work it out then.
887 if 'nominal-bitrate' in tags:
888 me._bitrate = tags['nominal-bitrate']/1000
889 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
890 me._bitrate = tags['bitrate']/1000
894 ## The bitrate computation wants the file size. Ideally we'd want the
895 ## total size of the frames' contents, but that seems hard to dredge
896 ## out. If the framing overhead is small, this should be close enough
898 me._bytes = OS.stat(file).st_size
901 "Close the pipeline down so we don't leak file descriptors."
902 me._pipe.set_state(GS.STATE_NULL)
907 Return the approximate bit-rate of the input file.
909 This might take a while if we have to work it out the hard way.
912 ## If we already know the answer then just return it.
913 if me._bitrate is not None:
916 ## Make up a new main loop.
919 ## Watch for bus messages. We'll stop when we reach the end of the
920 ## stream: then we'll have a clear idea of how long the track was.
922 def bus_message(bus, msg):
923 if msg.type == GS.MESSAGE_ERROR:
924 fail[:] = (ValueError, msg.structure['debug'], None)
926 elif msg.type == GS.MESSAGE_EOS:
928 bus = me._pipe.get_bus()
929 bmid = bus.connect('message', bus_message)
931 ## Get everything moving, and keep the user amused while we work.
932 me._pipe.set_state(GS.STATE_PLAYING)
933 with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
939 me._pipe.set_state(GS.STATE_NULL)
940 raise fail[0], fail[1], fail[2]
942 ## Now we should be able to find out our position accurately and work out
943 ## a bitrate. Cache it in case anybody asks again.
944 t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
945 me._bitrate = int(8*me._bytes*1e6/t)
950 class AudioFormat (BaseFormat):
952 An AudioFormat is a kind of Format specialized for audio files.
954 Format checks are done on an AudioIdentifier object.
957 PROPS = prop('bitrate', Num)
959 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
960 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
961 ## depending on how thorough it's trying to be. Still, it doesn't do any
962 ## harm here; the main risk is picking up Ogg Theora files by accident, and
963 ## we'll probably be able to extract the audio from them anyway.
964 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
967 def __init__(me, bitrate = None):
968 "Construct an object, requiring an approximate bitrate."
973 Return whether the AudioIdentifier ID is suitable for our purposes.
975 Subclasses can either override this method or provide a property
976 `MIMETYPES', which is a list (other thing that implements `__contains__')
977 of GStreamer MIME types matching this format.
979 return id.mime & me.MIMETYPES and \
980 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
984 Constructs a GStreamer element to encode audio input.
986 Subclasses can either override this method (or replace `encode'
987 entirely), or provide a method `encoder_chain' which returns a list of
988 elements to be linked together in sequence. The first element in the
989 chain must have a pad named `sink' and the last must have a pad named
992 elts = me.encoder_chain()
995 GS.element_link_many(*elts)
996 bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
997 bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
1000 def convert(me, master, id, target):
1002 Encode audio from MASTER, already identified as ID, writing it to TARGET.
1004 See `encoder' for subclasses' responsibilities.
1007 ## Construct the necessary equipment.
1008 pipe = GS.Pipeline()
1009 bus = pipe.get_bus()
1010 bus.add_signal_watch()
1013 ## Make sure that there isn't anything in the way of our output. We're
1014 ## going to write to a scratch file so that we don't get confused by
1015 ## half-written rubbish left by a crashed program.
1016 new = target + '.new'
1019 except OSError, err:
1020 if err.errno != E.ENOENT:
1023 ## Piece together our pipeline. The annoying part is that the
1024 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1026 source = make_element('filesrc', 'source', location = master)
1027 decoder = make_element('decodebin', 'decode')
1028 convert = make_element('audioconvert', 'convert')
1029 encoder = me.encoder()
1030 sink = make_element('filesink', 'sink', location = new)
1031 pipe.add(source, decoder, convert, encoder, sink)
1032 GS.element_link_many(source, decoder)
1033 GS.element_link_many(convert, encoder, sink)
1035 ## Some decoders (e.g., the AC3 decoder) include channel-position
1036 ## indicators in their output caps. The Vorbis encoder interferes with
1037 ## this, and you end up with a beautifully encoded mono signal from a
1038 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1039 ## /think/ that this is only a problem with stereo signals: mono signals
1040 ## are mono already, and `vorbisenc' accepts channel positions if there
1041 ## are more than two channels.
1043 ## So we have this bodge. We already collected the decoded audio caps
1044 ## during identification. So if we see 2-channel audio with channel
1045 ## positions, we strip the positions off forcibly by adding a filter.
1046 if id.dcap.get_name().startswith('audio/x-raw-') and \
1047 id.dcap.has_field('channels') and \
1048 id.dcap['channels'] == 2 and \
1049 id.dcap.has_field('channel-positions'):
1052 c.remove_field('channel-positions')
1057 ## Hook onto the `decodebin' so we can link together the two halves of
1058 ## our encoding chain. For now, we'll hope that there's only one audio
1059 ## stream in there, and just throw everything else away.
1060 def decoder_pad_arrived(elt, pad):
1061 if pad.get_caps()[0].get_name().startswith('audio/'):
1063 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1065 elt.link_pads(pad.get_name(), convert, 'sink')
1066 decoder.connect('pad-added', decoder_pad_arrived)
1068 ## Watch the bus for completion messages.
1070 def bus_message(bus, msg):
1071 if msg.type == GS.MESSAGE_ERROR:
1072 fail[:] = (ValueError, msg.structure['debug'], None)
1074 elif msg.type == GS.MESSAGE_EOS:
1076 bmid = bus.connect('message', bus_message)
1078 ## Get everything ready and let it go.
1079 pipe.set_state(GS.STATE_PLAYING)
1080 with GStreamerProgressEyecandy(filestatus(master,
1081 'convert to %s' % me.NAME),
1084 pipe.set_state(GS.STATE_NULL)
1086 raise fail[0], fail[1], fail[2]
1088 ## Fix up the output file if we have to.
1092 OS.rename(new, target)
1094 class OggVorbisFormat (AudioFormat):
1095 "AudioFormat object for Ogg Vorbis."
1097 ## From http://en.wikipedia.org/wiki/Vorbis
1098 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1099 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1100 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1103 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1104 'audio/x-vorbis+ogg'])
1107 def encoder_chain(me):
1108 for q, br in me.QMAP:
1109 if br >= me.bitrate:
1112 raise ValueError, 'no suitable quality setting found'
1113 return [make_element('vorbisenc',
1115 make_element('oggmux')]
1117 defformat('ogg-vorbis', OggVorbisFormat)
1119 class MP3Format (AudioFormat):
1120 "AudioFormat object for MP3."
1123 MIMETYPES = set(['audio/mpeg'])
1126 def encoder_chain(me):
1127 return [make_element('lame',
1128 vbr_mean_bitrate = me.bitrate,
1130 make_element('xingmux'),
1131 make_element('id3v2mux')]
1133 def fixup(me, path):
1137 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1138 unkind to stupid players.
1142 tag.setTextEncoding(E3.UTF_8_ENCODING)
1144 tag.update(E3.ID3_V1_1)
1145 except (UnicodeEncodeError, E3.tag.GenreException):
1148 defformat('mp3', MP3Format)
1150 ###--------------------------------------------------------------------------
1151 ### Image handling, based on the Python Imaging Library.
1153 class ImageIdentifier (object):
1155 Analyses and identifies an image file.
1157 Simply leaves an Image object in the `img' property which can be inspected.
1160 def __init__(me, file, mime):
1162 ## Get PIL to open the file. It will magically work out what kind of
1165 me.img = I.open(file)
1166 except IOError, exc:
1168 ## Unhelpful thing to raise on identification failure. We can
1169 ## distinguish this from an actual I/O error because it doesn't have an
1171 if exc.errno is None:
1172 raise IdentificationFailure
1175 me.mime = set([mime])
1177 class ImageFormat (BaseFormat):
1179 An ImageFormat is a kind of Format specialized for image files.
1181 Subclasses don't need to provide anything other than the properties
1182 required by all concrete Format subclasses. However, there is a
1183 requirement that the `NAME' property match PIL's `format' name for the
1187 PROPS = prop('size', Num)
1188 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1190 def __init__(me, size = None, **kw):
1192 Initialize an ImageFormat object.
1194 Additional keywords are used when encoding, and may be recognized by
1195 enhanced `check' methods in subclasses.
1201 "Check whether the ImageIdentifier ID matches our requirements."
1202 return id.img.format == me.NAME and \
1203 (me._size is None or
1204 (id.img.size[0] <= me._size and
1205 id.img.size[1] <= me._size))
1207 def convert(me, master, id, target):
1208 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1210 ## Write to a scratch file.
1211 new = target + '.new'
1213 ## The ImageIdentifier already contains a copy of the open file. It
1214 ## would be wasteful not to use it.
1216 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1218 ## If there's a stated maximum size then scale the image down to match.
1219 ## But thumbnailing clobbers the original, so take a copy.
1220 if me._size is not None and \
1221 (img.size[0] > me._size or img.size[1] > me._size):
1223 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1225 ## Write the output image.
1226 img.save(new, me.NAME, **me._props)
1228 ## Fix it up if necessary.
1232 OS.rename(new, target)
1235 class JPEGFormat (ImageFormat):
1237 Image format for JPEG (actually JFIF) files.
1239 Interesting properties to set:
1242 If present, take a second pass to select optimal encoder settings.
1245 If present, make a progressive file.
1247 quality Integer from 1--100 (worst to best); default is 75.
1251 PROPS = prop('optimize', None) \
1252 | prop('progressive', None, 'progression') \
1253 | prop('quality', Num)
1255 defformat('jpeg', JPEGFormat)
1257 class PNGFormat (ImageFormat):
1259 Image format for PNG files.
1261 Interesting properties:
1264 If present, make a special effort to minimize the output file.
1268 PROPS = prop('optimize', None)
1270 defformat('png', PNGFormat)
1272 class BMPFormat (ImageFormat):
1274 Image format for Windows BMP files, as used by RockBox.
1276 No additional properties.
1281 defformat('bmp', BMPFormat)
1283 ###--------------------------------------------------------------------------
1284 ### Remaining parsing machinery.
1286 Type = K('type') - Name - D('{') - R(Policy) - D('}')
1287 def build_type(s, l, t):
1289 cat = CATEGORYMAP[t[0]]
1291 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1293 if len(pols) == 1: pol = pols[0]
1294 else: pol = AndPolicy(pols)
1295 pol.setcategory(cat)
1297 Type.setParseAction(build_type)
1300 class TargetJob (object):
1301 def __init__(me, targetdir, policies):
1302 me.targetdir = targetdir
1303 me.policies = policies
1307 Target = K('target') - String - D('{') - R(Type) - D('}')
1308 def build_target(s, l, t):
1309 return TargetJob(t[0], t[1])
1310 Target.setParseAction(build_target)
1312 VARS = { 'master': None }
1313 class VarsJob (object):
1314 def __init__(me, vars):
1317 for k, v in me.vars:
1320 Var = prop('master', String)
1321 Vars = K('vars') - D('{') - R(Var) - D('}')
1322 def build_vars(s, l, t):
1323 return VarsJob(t[0])
1324 Vars.setParseAction(build_vars)
1326 TopLevel = Vars | Target
1327 Config = R(TopLevel)
1328 Config.ignore(P.pythonStyleComment)
1330 ###--------------------------------------------------------------------------
1331 ### The directory grobbler.
1333 def grobble(master, targets, noact = False):
1335 Work through the MASTER directory, writing converted files to TARGETS.
1337 The TARGETS are a list of `TargetJob' objects, each describing a target
1338 directory and a policy to apply to it.
1340 If NOACT is true, then don't actually do anything permanent to the
1344 ## Transform the targets into a more convenient data structure.
1348 tpolmap.append(pmap)
1349 for p in t.policies: pmap.setdefault(p.cat, []).append(p)
1351 ## Keep track of the current position in the master tree.
1354 ## And the files which haven't worked.
1357 def grobble_file(master, pmap, targetdir, cohorts):
1358 ## Convert MASTER, writing the result to TARGETDIR.
1360 ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1361 ## a list of (FILENAME, ID) pairs.
1363 ## Since this function might convert the MASTER file, the caller doesn't
1364 ## know the name of the output files, so we return then as a list.
1367 st_m = OS.stat(master)
1369 ## Work through each category listed and apply its policy.
1370 for cat, id, cohort in cohorts:
1372 ## Go through the category's policies and see if any match. If we fail
1373 ## here, see if there are more categories to try.
1374 for pol in pmap[cat]:
1375 acts = pol.actions(master, targetdir, id, cohort)
1380 ## Work through the targets one by one.
1384 ## Find out whether the target file already exists and is up-to-date
1385 ## with respect to the master. (Caution here with low-resolution
1386 ## timestamps.) If it's OK, then just move on.
1388 st_t = OS.stat(a.target)
1389 if st_m.st_mtime < st_t.st_mtime or \
1390 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1392 except OSError, err:
1393 if err.errno not in (E.ENOENT, E.ENOTDIR):
1396 ## We have real work to do. If there's a current status message,
1397 ## it's the containing directory so flush it so that people know
1401 ## Remove the target. (A hardlink will fail if the target already
1406 except OSError, err:
1407 if err.errno not in (E.ENOENT, E.ENOTDIR):
1410 ## Do whatever it is we decided to do.
1412 STATUS.commit(filestatus(master, a))
1416 ## We're done. Return the names of the targets.
1420 def wrap(masterfile):
1421 ## Handle exceptions found while trying to convert a particular file or
1427 ## Something bad happened. Report the error, but continue. (This list
1428 ## of exceptions needs a lot of work.)
1429 except (IOError, OSError), exc:
1431 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
1432 broken.append((masterfile, exc))
1434 def grobble_dir(master, targets):
1435 ## Recursively convert files in MASTER, writing them to the TARGETS.
1437 ## Keep track of the subdirectories we encounter, because we'll need to
1438 ## do all of those in one go at the end.
1441 ## Work through each target directory in turn.
1442 for target, pmap in zip(targets, tpolmap):
1444 ## Make sure the TARGET exists and is a directory. It's a fundamental
1445 ## assumption of this program that the entire TARGET tree is
1446 ## disposable, so if something exists but isn't a directory, we should
1448 if OS.path.isdir(target):
1451 if OS.path.exists(target):
1452 STATUS.commit(filestatus(target, 'clear nondirectory'))
1455 STATUS.commit(filestatus(target, 'create directory'))
1459 ## Keep a list of things in the target. As we convert files, we'll
1460 ## check them off. Anything left over is rubbish and needs to be
1464 for i in OS.listdir(target):
1465 checklist[i] = False
1466 except OSError, err:
1467 if err.errno not in (E.ENOENT, E.ENOTDIR):
1470 ## Keep track of the files in each category.
1475 ## Work through the master files.
1476 for f in sorted(OS.listdir(master)):
1478 ## If the killswitch has been pulled then stop. The whole idea is
1479 ## that we want to cause a clean shutdown if possible, so we don't
1480 ## want to do it in the middle of encoding because the encoding
1481 ## effort will have been wasted. This is the only place we need to
1482 ## check. If we've exited the loop, then clearing old files will
1483 ## probably be fast, and we'll either end up here when the recursive
1484 ## call returns or we'll be in the same boat as before, clearing old
1485 ## files, only up a level. If worst comes to worst, we'll be killed
1486 ## forcibly somewhere inside `SH.rmtree', and that can continue where
1488 if KILLSWITCH.is_set():
1491 ## Do something with the file.
1492 with wrap(OS.path.join(master, f)) as masterfile:
1494 ## If it's a directory then prepare to grobble it recursively, but
1495 ## don't do that yet.
1496 if OS.path.isdir(masterfile):
1498 done.append(OS.path.join(target, f))
1500 ## Otherwise it's a file. Work out what kind, and stash it under
1501 ## the appropriate categories. Later, we'll apply policy to the
1502 ## files, by category, and work out what to do with them all.
1504 gf = GIO.File(masterfile)
1505 mime = gf.query_info('standard::content-type').get_content_type()
1507 for cat in pmap.iterkeys():
1508 id = cat.identify(masterfile, mime)
1509 if id is None: continue
1510 catmap.setdefault(cat, []).append((masterfile, id))
1511 cats.append((cat, id))
1513 catmap.setdefault(None, []).append((masterfile, id))
1514 todo.append((masterfile, cats))
1516 ## Work through the categorized files to see what actions to do for
1518 for masterfile, cats in todo:
1519 with wrap(masterfile):
1520 done += grobble_file(masterfile, pmap, target,
1521 [(cat, id, catmap[cat]) for cat, id in cats])
1523 ## Check the results off the list so that we don't clear it later.
1525 checklist[OS.path.basename(f)] = True
1527 ## Maybe there's stuff in the target which isn't accounted for. Delete
1528 ## it: either the master has changed, or the policy for this target has
1529 ## changed. Either way, the old files aren't wanted.
1531 if not checklist[f]:
1532 STATUS.commit(filestatus(f, 'clear bogus file'))
1534 bogus = OS.path.join(target, f)
1536 if OS.path.isdir(bogus):
1540 except OSError, err:
1541 if err.errno != E.ENOENT:
1544 ## If there are subdirectories which want processing then do those.
1545 ## Keep the user amused by telling him where we are in the tree.
1546 for d in sorted(subdirs):
1548 STATUS.set('/'.join(dirs))
1549 with wrap(OS.path.join(master, d)) as masterdir:
1551 grobble_dir(masterdir,
1552 [OS.path.join(target, d) for target in targets])
1555 STATUS.set('/'.join(dirs))
1557 ## Right. We're ready to go.
1558 grobble_dir(master, [t.targetdir for t in targets])
1561 ###--------------------------------------------------------------------------
1562 ### Command-line interface.
1564 QUIS = OS.path.basename(SYS.argv[0])
1567 "Report a warning message to the user."
1568 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1571 "Report a fatal error message to the user."
1575 def parse_opts(args):
1577 Parse command-line arguments in ARGS.
1579 Returns a Grobbler object and the MASTER and TARGET directories to be
1583 ## Build the option parser object.
1584 op = OP.OptionParser(prog = QUIS, version = VERSION,
1585 usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1588 Convert a directory tree of files according to the configuration file
1592 ## Timeout handling.
1593 def cb_time(opt, ostr, arg, op):
1594 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1596 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1598 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1599 setattr(op.values, opt.dest, t)
1600 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1602 help = 'stop processing nicely after SECS',
1603 action = 'callback', callback = cb_time)
1604 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1605 dest = 'timeout_nasty',
1606 help = 'stop processing unpleasantly after further SECS',
1607 action = 'callback', callback = cb_time)
1610 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1611 help = 'provide progress information')
1612 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1613 help = 'don\'t actually modify the filesystem')
1616 op.set_defaults(formats = [], noact = False,
1617 timeout = None, timeout_nasty = 300)
1618 opts, args = op.parse_args(args)
1620 ## Check that we got the non-option arguments that we want.
1622 op.error('wrong number of arguments')
1624 ## Act on the options.
1626 STATUS.eyecandyp = True
1627 if opts.timeout is not None:
1628 to = TH.Thread(target = timeout,
1629 args = (opts.timeout, opts.timeout_nasty))
1633 ## Parse the configuration file.
1634 with open(args[0]) as conf:
1635 jobs, = Config.parseFile(conf, True)
1641 if __name__ == '__main__':
1642 opts = parse_opts(SYS.argv[1:])
1643 if 'master' not in VARS:
1644 die("no master directory set")
1645 broken = grobble(VARS['master'], TARGETS, opts.noact)
1647 moan('failed to convert some files:')
1648 for file, exc in broken:
1649 moan('%s: %s' % (file, exc))
1652 ## This is basically a successful completion: we did what we were asked to
1653 ## do. It seems polite to report a message, though.
1655 ## Why don't we have a nonzero exit status? The idea would be that a
1656 ## calling script would be interested that we used up all of our time, and
1657 ## not attempt to convert some other directory as well. But that doesn't
1658 ## quite work. Such a script would need to account correctly for time we
1659 ## had spent even if we complete successfully. And if the script is having
1660 ## to watch the clock itself, it can do that without our help here.
1661 if KILLSWITCH.is_set():
1662 moan('killed by timeout')
1664 ###----- That's all, folks --------------------------------------------------