3 ### Convert a directory tree of audio files
5 ### (c) 2010 Mark Wooding
8 ###----- Licensing notice ---------------------------------------------------
10 ### This program is free software; you can redistribute it and/or modify
11 ### it under the terms of the GNU General Public License as published by
12 ### the Free Software Foundation; either version 2 of the License, or
13 ### (at your option) any later version.
15 ### This program is distributed in the hope that it will be useful,
16 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ### GNU General Public License for more details.
20 ### You should have received a copy of the GNU General Public License
21 ### along with this program; if not, write to the Free Software Foundation,
22 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 ###--------------------------------------------------------------------------
25 ### External dependencies.
28 from __future__ import with_statement
30 ## Standard Python libraries.
35 import unicodedata as UD
40 import threading as TH
43 from contextlib import contextmanager
45 ## eyeD3 tag fettling.
48 ## Gstreamer. It picks up command-line arguments -- most notably `--help' --
49 ## and processes them itself. Of course, its help is completely wrong. This
50 ## kludge is due to Jonas Wagner.
51 _argv, SYS.argv = SYS.argv, []
58 from PIL import Image as I
63 ###--------------------------------------------------------------------------
64 ### Special initialization.
71 ###--------------------------------------------------------------------------
72 ### Eyecandy progress reports.
76 Return the width of S, in characters.
78 Specifically, this is the number of backspace characters required to
79 overprint the string S. If the current encoding for `stdout' appears to be
80 Unicode then do a complicated Unicode thing; otherwise assume that
81 characters take up one cell each.
83 None of this handles tab characters in any kind of useful way. Sorry.
86 ## If there's no encoding for stdout then we're doing something stupid.
87 if SYS.stdout.encoding is None: return len(s)
89 ## Turn the string into Unicode so we can hack on it properly. Maybe that
90 ## won't work out, in which case fall back to being stupid.
91 try: u = s.decode(SYS.stdout.encoding)
92 except UnicodeError: return len(s)
94 ## Our main problem is combining characters, but we should also try to
95 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
96 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
100 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
101 0x1160 <= cd <= 0x11ff: pass
102 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
108 class StatusLine (object):
110 Maintains a status line containing ephemeral progress information.
112 The status line isn't especially important, but it keeps interactive users
115 There should be only one status line object in your program; otherwise
116 they'll interfere with each other and get confused.
118 The update algorithm (in `set') is fairly careful to do the right thing
119 with long status `lines', and to work properly in an Emacs `shell' buffer.
123 "Initialize the status line."
126 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
130 Set the status line contents to LINE, replacing what was there before.
132 This only produces actual output if stdout is interactive.
139 ## If the old line was longer, we need to clobber its tail, so work out
140 ## what that involves.
142 b = charwidth(me._last[n:])
147 ## Now figure out the length of the common prefix between what we had
148 ## before and what we have now. This reduces the amount of I/O done,
149 ## which keeps network traffic down on SSH links, and keeps down the
150 ## amount of work slow terminal emulators like Emacs have to do.
152 m = min(n, me._lastlen)
153 while i < m and line[i] == me._last[i]:
156 ## Actually do the output, all in one syscall.
157 b = charwidth(me._last[i:])
158 SYS.stdout.write(pre + '\b'*b + line[i:])
161 ## Update our idea of what's gone on.
166 "Clear the status line. Just like set('')."
169 def commit(me, line = None):
171 Commit the current status line, and maybe the string LINE.
173 If the current status line is nonempty, then commit it to the transcript.
174 If LINE is not None, then commit that to the transcript too.
176 After all of this, we clear the status line to get back to a clean state.
180 SYS.stdout.write('\n')
182 SYS.stdout.write(me._last + '\n')
184 SYS.stdout.write(line + '\n')
188 STATUS = StatusLine()
190 def filestatus(file, status):
191 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
193 class ProgressEyecandy (object):
195 Provide amusement while something big and complicated is happening.
197 This is an abstract class. Subclasses must provide a method `progress'
198 returning a pair (CURRENT, MAX) indicating the current progress through the
202 def __init__(me, what, silentp = False):
204 Initialize a progress meter.
206 WHAT is a prefix string to be written before the progress eyecandy
210 me._silentp = silentp
214 def _fmt_time(me, t):
215 "Format T as a time, in (maybe hours) minutes and seconds."
216 s, t = t % 60, int(t/60)
217 m, h = t % 60, int(t/60)
219 return '%d:%02d:%02d' % (h, m, s)
221 return '%02d:%02d' % (m, s)
224 "Show the current level of progress."
226 ## If we're not showing pointless frippery, don't bother at all.
227 if not STATUS.eyecandyp:
230 ## Update the spinner index.
231 me._spinner = (me._spinner + 1)%4
233 ## Fetch the current progress information. Note that we always fetch
234 ## both the current and maximum levels, because both might change if an
235 ## operation revises its idea of how much work needs doing.
236 cur, max = me.progress()
238 ## If we couldn't get progress information, display something vaguely
240 if cur is None or max is None:
241 STATUS.set('%s %c [unknown progress]' %
242 (me._what, r'/-\|'[me._spinner]))
245 ## Work out -- well, guess -- the time remaining.
248 eta = me._fmt_time((t - me._start)*(max - cur)/cur)
252 ## Set the status bar.
254 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
256 r'/-\|'[me._spinner],
261 def done(me, win = True):
262 "Show a completion notice, or a failure if WIN is false."
264 STATUS.set('%s FAILED!' % me._what)
265 elif not me._silentp:
266 STATUS.set('%s done (%s)' %
268 me._fmt_time(T.time() - me._start)))
273 ###--------------------------------------------------------------------------
274 ### Timeout handling.
276 KILLSWITCH = TH.Event()
282 moan('dying messily due to timeout')
285 ###--------------------------------------------------------------------------
286 ### Parsing utilities.
288 ## Allow hyphens in identifiers.
289 IDCHARS = P.alphanums + '-_'
290 P.Keyword.setDefaultKeywordChars(IDCHARS)
292 ## Some common kinds of tokens.
293 Name = P.Word(IDCHARS)
294 Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
295 String = P.QuotedString('"', '\\')
297 ## Handy abbreviations for constructed parser elements.
298 def K(k): return P.Keyword(k).suppress()
299 def D(d): return P.Literal(d).suppress()
300 def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
303 ###--------------------------------------------------------------------------
304 ### Format identification and conversion.
306 class IdentificationFailure (Exception):
309 class FileCategory (object):
311 A FileCategory represents a class of files.
313 For example, it's sensible to consider audio, or image files as a
314 category. A file category knows how to recognize member files from
318 def __init__(me, name, mime_pats, ident):
320 Construct a new category.
322 The PATS are a list of `fnmatch' patterns to be compared with a MIME
323 type. The IDENT is a function which produces an identification object
324 given a file's name and first-guess MIME type. The object is passed to a
325 Format's `check' method to see whether a file needs re-encoding, and to
326 `convert' to assist with the conversion.
328 An identification object must have an attribute `mime' which is a set of
329 possible MIME types accumulated for the object.
332 me._mime_pats = mime_pats
334 CATEGORYMAP[name] = me
336 def identify(me, file, mime):
338 Attempt to identify FILE, given its apparent MIME type.
340 If identification succeeds, return an identification object which can be
341 used by associated file formats; otherwise return None.
343 for p in me._mime_pats:
344 if not FN.fnmatchcase(mime, p):
347 return me._ident(file, mime)
348 except IdentificationFailure:
352 class BaseFormat (object):
354 A BaseFormat object represents a particular encoding and parameters.
356 The object can verify (the `check' method) whether a particular file
357 matches its requirements, and if necessary (`encode') re-encode a file.
359 Subclasses should define the following methods.
362 Answer whether the file identified by ID is acceptable according to
363 the receiver's parameters.
365 convert(MASTER, ID, TARGET)
366 Convert the file MASTER, which has been identified as ID, according
367 to the receiver's parameters, writing the output to TARGET.
369 Subclasses should also provide these attributes.
372 A FileCategory object for the category of files that this format
375 EXT A file extension to be applied to encoded output files.
377 NAME A user-facing name for the format.
379 PROPS A parser element to parse a property definition. It should produce
380 a pair NAME, VALUE to be stored in a dictionary.
382 Subclasses for different kinds of file may introduce more subclass
387 """Post-encoding fixups."""
393 def defformat(name, cls):
394 "Define a format NAME using class CLS."
395 if not hasattr(cls, 'NAME'):
396 raise ValueError, 'abstract class'
397 if not hasattr(cls, 'CATEGORY'):
398 raise ValueError, 'no category'
399 FORMATMAP[name] = cls
401 class FormatParser (P.ParserElement):
403 Parse a format specifier:
405 format-spec ::= string [format-properties]
406 format-properties ::= `{' format-property (`,' format-property)* `}'
408 The syntax of a format-property is determined by the PROPS attribute on the
409 named format and its superclasses.
412 ## We cache the parser elements we generate to avoid enormous consing.
415 def parseImpl(me, s, loc, actp = True):
417 ## Firstly, determine the format name.
418 loc, r = Name._parse(s, loc, actp)
421 ## Look up the format class.
422 try: fcls = FORMATMAP[fmt]
424 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
426 ## Fetch the property-list parser from the cache, if possible; else
435 except AttributeError: continue
436 if p in seen: continue
437 if prop is None: prop = p
441 pp = me.CACHE[fmt] = None
443 props = P.delimitedList(prop)
444 props.setParseAction(lambda s, l, t: dict(t.asList()))
445 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
447 ## Parse the properties.
451 loc, r = pp._parse(s, loc, actp)
455 ## Construct the format object and return it.
456 return loc, fcls(**pd)
458 Format = FormatParser()
460 def prop(kw, pval, tag = None):
461 if tag is None: tag = kw
464 p.setParseAction(lambda s, l, t: (tag, True))
466 p = K(kw) + D('=') + pval
467 p.setParseAction(lambda s, l, t: (tag, t[0]))
470 ###--------------------------------------------------------------------------
471 ### Policies and actions.
473 class Action (object):
475 An Action object represents a conversion action to be performed.
477 This class isn't intended to be instantiated directly. It exists to define
478 some protocol common to all Action objects.
480 Action objects have the following attributes.
482 master The name of the master (source) file.
484 target The name of the target (destination) file.
486 PRIORITY The priority of the action, for deciding which of two actions
487 to perform. Higher priorities are more likely to win.
489 Converting an Action to a string describes the action in a simple
490 user-readable manner. The `perform' method actually carries the action
496 def __init__(me, master):
497 "Stash the MASTER file name for later."
501 "Choose either ME or HIM and return one."
502 if him is None or me.PRIORITY > him.PRIORITY:
507 class CopyAction (Action):
509 An Action object for simply copying a file.
511 Actually we try to hardlink it first, falling back to a copy later. This
512 is both faster and more efficient with regard to disk space.
515 ## Copying is good. Linking is really good, but we can't tell the
516 ## difference at this stage.
519 def __init__(me, master, targetdir):
520 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
521 Action.__init__(me, master)
522 me.target = OS.path.join(targetdir, OS.path.basename(master))
528 "Actually perform a CopyAction."
530 STATUS.set(filestatus(me.master, 'link'))
531 OS.link(me.master, me.target)
533 if err.errno != E.EXDEV:
535 STATUS.set(filestatus(me.master, 'copy'))
536 new = me.target + '.new'
537 SH.copyfile(me.master, new)
538 OS.rename(new, me.target)
541 class ConvertAction (Action):
543 An Action object for converting a file to a given format.
545 Additional attributes:
547 id The identification object for the master file.
549 format The format to which we're meant to conver the master.
552 def __init__(me, master, targetdir, id, format):
553 "Initialize a ConvertAction."
554 Action.__init__(me, master)
555 stem, ext = OS.path.splitext(OS.path.basename(master))
556 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
561 return 'convert to %s' % me.format.NAME
564 "Acually perform a ConvertAction."
565 STATUS.set(filestatus(me.master, me))
566 me.format.convert(me.master, me.id, me.target)
570 class FormatPolicy (object):
572 A FormatPolicy object represents a set of rules for how to convert files.
574 Given a master file, the FormatPolicy will identify it and return a list of
575 actions to be performed. The methods required of a FormatPolicy are:
578 Store CAT as the policy's category. Check that this is consistent
579 with the policy as stored.
581 actions(MASTER, TARGETDIR, ID, COHORT)
582 Given a MASTER file, identified as ID, a target directory
583 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
584 of the same category in the same directory, return a list of
585 actions to be performed to get the target directory into the right
586 form. The list might be empty if the policy object /rejects/ the
590 class AndPolicy (FormatPolicy):
592 A FormatPolicy which does the union of a bunch of other policies.
594 Each subsidiary policy is invoked in turn. The highest-priority action for
595 each target file is returned.
598 def __init__(me, policies):
599 me._policies = policies
601 def setcategory(me, cat):
603 for p in me._policies:
606 def actions(me, master, targetdir, id, cohort):
608 for p in me._policies:
609 for a in p.actions(master, targetdir, id, cohort):
611 tmap[a.target] = a.choose(tmap.get(a.target))
616 And = K('and') - D('{') - R(Policy) - D('}')
617 And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
619 class OrPolicy (FormatPolicy):
621 A FormatPolicy which tries other policies and uses the first that accepts.
623 Each subsidiary policy is invoked in turn. If any accepts, the actions it
624 proposes are turned and no further policies are invoked. If none accepts
625 then the file is rejected.
628 def __init__(me, policies):
629 me._policies = policies
631 def setcategory(me, cat):
633 for p in me._policies:
636 def actions(me, master, targetdir, id, cohort):
637 for p in me._policies:
638 aa = p.actions(master, targetdir, id, cohort)
644 Or = K('or') - D('{') - R(Policy) - D('}')
645 Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
647 class AcceptPolicy (FormatPolicy):
649 A FormatPolicy which copies files in a particular format.
651 If all of the files in a cohort are recognized as being in a particular
652 format (including this one), then accept it with a CopyAction; otherwise
656 def __init__(me, format):
659 def setcategory(me, cat):
660 if me._format.CATEGORY is not cat:
662 "Accept format `%s' has category `%s', not `%s'" % \
663 (me._format.__class__.__name__,
664 me._format.CATEGORY.name, cat.name)
667 def actions(me, master, targetdir, id, cohort):
668 if me._format.check(id) and \
669 all(me._format.check(cid) for f, cid in cohort):
670 return [CopyAction(master, targetdir)]
674 Accept = K('accept') - Format
675 Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
677 class ConvertPolicy (FormatPolicy):
679 A FormatPolicy which copies files in a particular format or converts if
682 def __init__(me, format):
685 def setcategory(me, cat):
686 if me._format.CATEGORY is not cat:
688 "Accept format `%s' has category `%s', not `%s'" % \
689 (me._format.__class__.__name__,
690 me._format.CATEGORY.name, cat.name)
693 def actions(me, master, targetdir, id, cohort):
694 if me._format.check(id):
695 return [CopyAction(master, targetdir)]
697 return [ConvertAction(master, targetdir, id, me._format)]
699 Convert = K('convert') - Format
700 Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
702 Policy << (And | Or | Accept | Convert)
704 ###--------------------------------------------------------------------------
705 ### Audio handling, based on GStreamer.
707 def make_element(factory, name = None, **props):
708 "Return a new element from the FACTORY with the given NAME and PROPS."
709 elt = GS.element_factory_make(factory, name)
710 elt.set_properties(**props)
713 class GStreamerProgressEyecandy (ProgressEyecandy):
715 Provide amusement while GStreamer is busy doing something.
717 The GStreamerProgressEyecandy object is a context manager. Wrap it round
718 your GStreamer loop to provide progress information for an operation.
721 def __init__(me, what, elt, **kw):
723 Initialize a progress meter.
725 WHAT is a prefix string to be written before the progress eyecandy
726 itself. ELT is a GStreamer element to interrogate to find the progress
730 ProgressEyecandy.__init__(me, what, **kw)
733 "Called by GLib main event loop to update the eyecandy."
739 Update the progress meter.
741 This is called periodically by the GLib main event-processing loop.
747 "Return the current progress as a pair (CURRENT, MAX)."
749 ## Fetch the current progress information. We get the duration each
750 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
751 ## duration can change as we progress. Hopefully it settles down fairly
754 t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
755 end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
757 except GS.QueryError:
761 "Enter context: attach progress meter display."
763 ## If we're not showing pointless frippery, don't bother at all.
764 if not STATUS.eyecandyp:
767 ## Update regularly. The pipeline runs asynchronously.
768 me._id = G.timeout_add(200, me._update)
770 def __exit__(me, ty, val, tb):
771 "Leave context: remove display and report completion or failure."
773 ## If we're not showing pointless frippery, there's nothing to remove.
775 G.source_remove(me._id)
777 ## Report completion anyway.
783 class AudioIdentifier (object):
785 Analyses and identifies an audio file.
787 Important properties are:
789 cap A capabilities structure describing the audio file data. The most
790 interesting thing in here is probably its name, which is a MIME
791 type describing the data.
793 dcap A capabilities structure describing the decoded audio data. This
794 is of interest during conversion.
796 tags A dictionary containing metadata tags from the file. These are in
797 GStreamer's encoding-independent format.
799 bitrate An approximation to the stream's bitrate, in kilobits per second.
800 This might be slow to work out for some files so it's computed on
804 def __init__(me, file, mime):
805 "Initialize the object suitably for identifying FILE."
807 ## Make some initial GStreamer objects. We'll want the pipeline later if
808 ## we need to analyse a poorly tagged MP3 stream, so save it away.
809 me._pipe = GS.Pipeline()
811 bus = me._pipe.get_bus()
812 bus.add_signal_watch()
815 ## The basic recognition kit is based around `decodebin'. We must keep
816 ## it happy by giving it sinks for the streams it's found, which it
817 ## announces asynchronously.
818 source = make_element('filesrc', 'file', location = file)
819 decoder = make_element('decodebin', 'decode')
820 sink = make_element('fakesink')
821 def decoder_pad_arrived(elt, pad):
822 if pad.get_caps()[0].get_name().startswith('audio/'):
823 elt.link_pads(pad.get_name(), sink, 'sink')
824 dpaid = decoder.connect('pad-added', decoder_pad_arrived)
825 me._pipe.add(source, decoder, sink)
826 GS.element_link_many(source, decoder)
828 ## Arrange to collect tags from the pipeline's bus as they're reported.
829 ## If we reuse the pipeline later, we'll want different bus-message
830 ## handling, so make sure we can take the signal handler away.
833 def bus_message(bus, msg):
834 if msg.type == GS.MESSAGE_ERROR:
835 fail[:] = (ValueError, msg.structure['debug'], None)
837 elif msg.type == GS.MESSAGE_STATE_CHANGED:
838 if msg.structure['new-state'] == GS.STATE_PAUSED and \
841 elif msg.type == GS.MESSAGE_TAG:
842 tags.update(msg.structure)
843 bmid = bus.connect('message', bus_message)
845 ## We want to identify the kind of stream this is. (Hmm. The MIME type
846 ## recognizer has already done this work, but GStreamer is probably more
847 ## reliable.) The `decodebin' has a `typefind' element inside which will
848 ## announce the identified media type. All we need to do is find it and
849 ## attach a signal handler. (Note that the handler might be run in the
850 ## thread context of the pipeline element, but Python's GIL will keep
851 ## things from being too awful.)
854 for e in decoder.elements():
855 if e.get_factory().get_name() == 'typefind':
859 assert False, 'failed to find typefind element'
861 ## Crank up most of the heavy machinery. The message handler will stop
862 ## the loop when things seem to be sufficiently well underway.
863 me._pipe.set_state(GS.STATE_PAUSED)
866 decoder.disconnect(dpaid)
868 me._pipe.set_state(GS.STATE_NULL)
869 raise fail[0], fail[1], fail[2]
871 ## Store the collected tags.
874 ## Gather the capabilities. The `typefind' element knows the input data
875 ## type. The 'decodebin' knows the raw data type.
876 me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
877 me.mime = set([mime, me.cap.get_name()])
878 me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
880 ## If we found a plausible bitrate then stash it. Otherwise note that we
881 ## failed. If anybody asks then we'll work it out then.
882 if 'nominal-bitrate' in tags:
883 me._bitrate = tags['nominal-bitrate']/1000
884 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
885 me._bitrate = tags['bitrate']/1000
889 ## The bitrate computation wants the file size. Ideally we'd want the
890 ## total size of the frames' contents, but that seems hard to dredge
891 ## out. If the framing overhead is small, this should be close enough
893 me._bytes = OS.stat(file).st_size
896 "Close the pipeline down so we don't leak file descriptors."
897 me._pipe.set_state(GS.STATE_NULL)
902 Return the approximate bit-rate of the input file.
904 This might take a while if we have to work it out the hard way.
907 ## If we already know the answer then just return it.
908 if me._bitrate is not None:
911 ## Make up a new main loop.
914 ## Watch for bus messages. We'll stop when we reach the end of the
915 ## stream: then we'll have a clear idea of how long the track was.
917 def bus_message(bus, msg):
918 if msg.type == GS.MESSAGE_ERROR:
919 fail[:] = (ValueError, msg.structure['debug'], None)
921 elif msg.type == GS.MESSAGE_EOS:
923 bus = me._pipe.get_bus()
924 bmid = bus.connect('message', bus_message)
926 ## Get everything moving, and keep the user amused while we work.
927 me._pipe.set_state(GS.STATE_PLAYING)
928 with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
934 me._pipe.set_state(GS.STATE_NULL)
935 raise fail[0], fail[1], fail[2]
937 ## Now we should be able to find out our position accurately and work out
938 ## a bitrate. Cache it in case anybody asks again.
939 t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
940 me._bitrate = int(8*me._bytes*1e6/t)
945 class AudioFormat (BaseFormat):
947 An AudioFormat is a kind of Format specialized for audio files.
949 Format checks are done on an AudioIdentifier object.
952 PROPS = prop('bitrate', Num)
954 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
955 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
956 ## depending on how thorough it's trying to be. Still, it doesn't do any
957 ## harm here; the main risk is picking up Ogg Theora files by accident, and
958 ## we'll probably be able to extract the audio from them anyway.
959 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
962 def __init__(me, bitrate = None):
963 "Construct an object, requiring an approximate bitrate."
968 Return whether the AudioIdentifier ID is suitable for our purposes.
970 Subclasses can either override this method or provide a property
971 `MIMETYPES', which is a list (other thing that implements `__contains__')
972 of GStreamer MIME types matching this format.
974 return id.mime & me.MIMETYPES and \
975 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
979 Constructs a GStreamer element to encode audio input.
981 Subclasses can either override this method (or replace `encode'
982 entirely), or provide a method `encoder_chain' which returns a list of
983 elements to be linked together in sequence. The first element in the
984 chain must have a pad named `sink' and the last must have a pad named
987 elts = me.encoder_chain()
990 GS.element_link_many(*elts)
991 bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
992 bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
995 def convert(me, master, id, target):
997 Encode audio from MASTER, already identified as ID, writing it to TARGET.
999 See `encoder' for subclasses' responsibilities.
1002 ## Construct the necessary equipment.
1003 pipe = GS.Pipeline()
1004 bus = pipe.get_bus()
1005 bus.add_signal_watch()
1008 ## Make sure that there isn't anything in the way of our output. We're
1009 ## going to write to a scratch file so that we don't get confused by
1010 ## half-written rubbish left by a crashed program.
1011 new = target + '.new'
1014 except OSError, err:
1015 if err.errno != E.ENOENT:
1018 ## Piece together our pipeline. The annoying part is that the
1019 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1021 source = make_element('filesrc', 'source', location = master)
1022 decoder = make_element('decodebin', 'decode')
1023 convert = make_element('audioconvert', 'convert')
1024 encoder = me.encoder()
1025 sink = make_element('filesink', 'sink', location = new)
1026 pipe.add(source, decoder, convert, encoder, sink)
1027 GS.element_link_many(source, decoder)
1028 GS.element_link_many(convert, encoder, sink)
1030 ## Some decoders (e.g., the AC3 decoder) include channel-position
1031 ## indicators in their output caps. The Vorbis encoder interferes with
1032 ## this, and you end up with a beautifully encoded mono signal from a
1033 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1034 ## /think/ that this is only a problem with stereo signals: mono signals
1035 ## are mono already, and `vorbisenc' accepts channel positions if there
1036 ## are more than two channels.
1038 ## So we have this bodge. We already collected the decoded audio caps
1039 ## during identification. So if we see 2-channel audio with channel
1040 ## positions, we strip the positions off forcibly by adding a filter.
1041 if id.dcap.get_name().startswith('audio/x-raw-') and \
1042 id.dcap.has_field('channels') and \
1043 id.dcap['channels'] == 2 and \
1044 id.dcap.has_field('channel-positions'):
1047 c.remove_field('channel-positions')
1052 ## Hook onto the `decodebin' so we can link together the two halves of
1053 ## our encoding chain. For now, we'll hope that there's only one audio
1054 ## stream in there, and just throw everything else away.
1055 def decoder_pad_arrived(elt, pad):
1056 if pad.get_caps()[0].get_name().startswith('audio/'):
1058 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1060 elt.link_pads(pad.get_name(), convert, 'sink')
1061 decoder.connect('pad-added', decoder_pad_arrived)
1063 ## Watch the bus for completion messages.
1065 def bus_message(bus, msg):
1066 if msg.type == GS.MESSAGE_ERROR:
1067 fail[:] = (ValueError, msg.structure['debug'], None)
1069 elif msg.type == GS.MESSAGE_EOS:
1071 bmid = bus.connect('message', bus_message)
1073 ## Get everything ready and let it go.
1074 pipe.set_state(GS.STATE_PLAYING)
1075 with GStreamerProgressEyecandy(filestatus(master,
1076 'convert to %s' % me.NAME),
1079 pipe.set_state(GS.STATE_NULL)
1081 raise fail[0], fail[1], fail[2]
1083 ## Fix up the output file if we have to.
1087 OS.rename(new, target)
1089 class OggVorbisFormat (AudioFormat):
1090 "AudioFormat object for Ogg Vorbis."
1092 ## From http://en.wikipedia.org/wiki/Vorbis
1093 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1094 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1095 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1098 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1099 'audio/x-vorbis+ogg'])
1102 def encoder_chain(me):
1103 for q, br in me.QMAP:
1104 if br >= me.bitrate:
1107 raise ValueError, 'no suitable quality setting found'
1108 return [make_element('vorbisenc',
1110 make_element('oggmux')]
1112 defformat('ogg-vorbis', OggVorbisFormat)
1114 class MP3Format (AudioFormat):
1115 "AudioFormat object for MP3."
1118 MIMETYPES = set(['audio/mpeg'])
1121 def encoder_chain(me):
1122 return [make_element('lame',
1123 vbr_mean_bitrate = me.bitrate,
1125 make_element('xingmux'),
1126 make_element('id3v2mux')]
1128 def fixup(me, path):
1132 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1133 unkind to stupid players.
1137 tag.setTextEncoding(E3.UTF_8_ENCODING)
1139 tag.update(E3.ID3_V1_1)
1140 except (UnicodeEncodeError, E3.tag.GenreException):
1143 defformat('mp3', MP3Format)
1145 ###--------------------------------------------------------------------------
1146 ### Image handling, based on the Python Imaging Library.
1148 class ImageIdentifier (object):
1150 Analyses and identifies an image file.
1152 Simply leaves an Image object in the `img' property which can be inspected.
1155 def __init__(me, file, mime):
1157 ## Get PIL to open the file. It will magically work out what kind of
1160 me.img = I.open(file)
1161 except IOError, exc:
1163 ## Unhelpful thing to raise on identification failure. We can
1164 ## distinguish this from an actual I/O error because it doesn't have an
1166 if exc.errno is None:
1167 raise IdentificationFailure
1170 me.mime = set([mime])
1172 class ImageFormat (BaseFormat):
1174 An ImageFormat is a kind of Format specialized for image files.
1176 Subclasses don't need to provide anything other than the properties
1177 required by all concrete Format subclasses. However, there is a
1178 requirement that the `NAME' property match PIL's `format' name for the
1182 PROPS = prop('size', Num)
1183 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1185 def __init__(me, size = None, **kw):
1187 Initialize an ImageFormat object.
1189 Additional keywords are used when encoding, and may be recognized by
1190 enhanced `check' methods in subclasses.
1196 "Check whether the ImageIdentifier ID matches our requirements."
1197 return id.img.format == me.NAME and \
1198 (me._size is None or
1199 (id.img.size[0] <= me._size and
1200 id.img.size[1] <= me._size))
1202 def convert(me, master, id, target):
1203 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1205 ## Write to a scratch file.
1206 new = target + '.new'
1208 ## The ImageIdentifier already contains a copy of the open file. It
1209 ## would be wasteful not to use it.
1211 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1213 ## If there's a stated maximum size then scale the image down to match.
1214 ## But thumbnailing clobbers the original, so take a copy.
1215 if me._size is not None and \
1216 (img.size[0] > me._size or img.size[1] > me._size):
1218 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1220 ## Write the output image.
1221 img.save(new, me.NAME, **me._props)
1223 ## Fix it up if necessary.
1227 OS.rename(new, target)
1230 class JPEGFormat (ImageFormat):
1232 Image format for JPEG (actually JFIF) files.
1234 Interesting properties to set:
1237 If present, take a second pass to select optimal encoder settings.
1240 If present, make a progressive file.
1242 quality Integer from 1--100 (worst to best); default is 75.
1246 PROPS = prop('optimize', None) \
1247 | prop('progressive', None, 'progression') \
1248 | prop('quality', Num)
1250 defformat('jpeg', JPEGFormat)
1252 class PNGFormat (ImageFormat):
1254 Image format for PNG files.
1256 Interesting properties:
1259 If present, make a special effort to minimize the output file.
1263 PROPS = prop('optimize', None)
1265 defformat('png', PNGFormat)
1267 class BMPFormat (ImageFormat):
1269 Image format for Windows BMP files, as used by RockBox.
1271 No additional properties.
1276 defformat('bmp', BMPFormat)
1278 ###--------------------------------------------------------------------------
1279 ### Remaining parsing machinery.
1281 Type = K('type') - Name - D('{') - R(Policy) - D('}')
1282 def build_type(s, l, t):
1284 cat = CATEGORYMAP[t[0]]
1286 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1288 if len(pols) == 1: pol = pols[0]
1289 else: pol = AndPolicy(pols)
1290 pol.setcategory(cat)
1292 Type.setParseAction(build_type)
1295 class TargetJob (object):
1296 def __init__(me, targetdir, policies):
1297 me.targetdir = targetdir
1298 me.policies = policies
1302 Target = K('target') - String - D('{') - R(Type) - D('}')
1303 def build_target(s, l, t):
1304 return TargetJob(t[0], t[1])
1305 Target.setParseAction(build_target)
1307 VARS = { 'master': None }
1308 class VarsJob (object):
1309 def __init__(me, vars):
1312 for k, v in me.vars:
1315 Var = prop('master', String)
1316 Vars = K('vars') - D('{') - R(Var) - D('}')
1317 def build_vars(s, l, t):
1318 return VarsJob(t[0])
1319 Vars.setParseAction(build_vars)
1321 TopLevel = Vars | Target
1322 Config = R(TopLevel)
1323 Config.ignore(P.pythonStyleComment)
1325 ###--------------------------------------------------------------------------
1326 ### The directory grobbler.
1328 def grobble(master, targets, noact = False):
1330 Work through the MASTER directory, writing converted files to TARGETS.
1332 The TARGETS are a list of `TargetJob' objects, each describing a target
1333 directory and a policy to apply to it.
1335 If NOACT is true, then don't actually do anything permanent to the
1339 ## Transform the targets into a more convenient data structure.
1343 tpolmap.append(pmap)
1344 for p in t.policies: pmap.setdefault(p.cat, []).append(p)
1346 ## Keep track of the current position in the master tree.
1349 ## And the files which haven't worked.
1352 def grobble_file(master, pmap, targetdir, cohorts):
1353 ## Convert MASTER, writing the result to TARGETDIR.
1355 ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1356 ## a list of (FILENAME, ID) pairs.
1358 ## Since this function might convert the MASTER file, the caller doesn't
1359 ## know the name of the output files, so we return then as a list.
1362 st_m = OS.stat(master)
1364 ## Work through each category listed and apply its policy.
1365 for cat, id, cohort in cohorts:
1367 ## Go through the category's policies and see if any match. If we fail
1368 ## here, see if there are more categories to try.
1369 for pol in pmap[cat]:
1370 acts = pol.actions(master, targetdir, id, cohort)
1375 ## Work through the targets one by one.
1379 ## Find out whether the target file already exists and is up-to-date
1380 ## with respect to the master. (Caution here with low-resolution
1381 ## timestamps.) If it's OK, then just move on.
1383 st_t = OS.stat(a.target)
1384 if st_m.st_mtime < st_t.st_mtime or \
1385 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1387 except OSError, err:
1388 if err.errno not in (E.ENOENT, E.ENOTDIR):
1391 ## We have real work to do. If there's a current status message,
1392 ## it's the containing directory so flush it so that people know
1396 ## Remove the target. (A hardlink will fail if the target already
1401 except OSError, err:
1402 if err.errno not in (E.ENOENT, E.ENOTDIR):
1405 ## Do whatever it is we decided to do.
1407 STATUS.commit(filestatus(master, a))
1411 ## We're done. Return the names of the targets.
1415 def wrap(masterfile):
1416 ## Handle exceptions found while trying to convert a particular file or
1422 ## Something bad happened. Report the error, but continue. (This list
1423 ## of exceptions needs a lot of work.)
1424 except (IOError, OSError), exc:
1426 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
1427 broken.append((masterfile, exc))
1429 def grobble_dir(master, targets):
1430 ## Recursively convert files in MASTER, writing them to the TARGETS.
1432 ## Keep track of the subdirectories we encounter, because we'll need to
1433 ## do all of those in one go at the end.
1436 ## Work through each target directory in turn.
1437 for target, pmap in zip(targets, tpolmap):
1439 ## Make sure the TARGET exists and is a directory. It's a fundamental
1440 ## assumption of this program that the entire TARGET tree is
1441 ## disposable, so if something exists but isn't a directory, we should
1443 if OS.path.isdir(target):
1446 if OS.path.exists(target):
1447 STATUS.commit(filestatus(target, 'clear nondirectory'))
1450 STATUS.commit(filestatus(target, 'create directory'))
1454 ## Keep a list of things in the target. As we convert files, we'll
1455 ## check them off. Anything left over is rubbish and needs to be
1459 for i in OS.listdir(target):
1460 checklist[i] = False
1461 except OSError, err:
1462 if err.errno not in (E.ENOENT, E.ENOTDIR):
1465 ## Keep track of the files in each category.
1470 ## Work through the master files.
1471 for f in sorted(OS.listdir(master)):
1473 ## If the killswitch has been pulled then stop. The whole idea is
1474 ## that we want to cause a clean shutdown if possible, so we don't
1475 ## want to do it in the middle of encoding because the encoding
1476 ## effort will have been wasted. This is the only place we need to
1477 ## check. If we've exited the loop, then clearing old files will
1478 ## probably be fast, and we'll either end up here when the recursive
1479 ## call returns or we'll be in the same boat as before, clearing old
1480 ## files, only up a level. If worst comes to worst, we'll be killed
1481 ## forcibly somewhere inside `SH.rmtree', and that can continue where
1483 if KILLSWITCH.is_set():
1486 ## Do something with the file.
1487 with wrap(OS.path.join(master, f)) as masterfile:
1489 ## If it's a directory then prepare to grobble it recursively, but
1490 ## don't do that yet.
1491 if OS.path.isdir(masterfile):
1493 done.append(OS.path.join(target, f))
1495 ## Otherwise it's a file. Work out what kind, and stash it under
1496 ## the appropriate categories. Later, we'll apply policy to the
1497 ## files, by category, and work out what to do with them all.
1499 gf = GIO.File(masterfile)
1500 mime = gf.query_info('standard::content-type').get_content_type()
1502 for cat in pmap.iterkeys():
1503 id = cat.identify(masterfile, mime)
1504 if id is None: continue
1505 catmap.setdefault(cat, []).append((masterfile, id))
1506 cats.append((cat, id))
1508 catmap.setdefault(None, []).append((masterfile, id))
1509 todo.append((masterfile, cats))
1511 ## Work through the categorized files to see what actions to do for
1513 for masterfile, cats in todo:
1514 with wrap(masterfile):
1515 done += grobble_file(masterfile, pmap, target,
1516 [(cat, id, catmap[cat]) for cat, id in cats])
1518 ## Check the results off the list so that we don't clear it later.
1520 checklist[OS.path.basename(f)] = True
1522 ## Maybe there's stuff in the target which isn't accounted for. Delete
1523 ## it: either the master has changed, or the policy for this target has
1524 ## changed. Either way, the old files aren't wanted.
1526 if not checklist[f]:
1527 STATUS.commit(filestatus(f, 'clear bogus file'))
1529 bogus = OS.path.join(target, f)
1531 if OS.path.isdir(bogus):
1535 except OSError, err:
1536 if err.errno != E.ENOENT:
1539 ## If there are subdirectories which want processing then do those.
1540 ## Keep the user amused by telling him where we are in the tree.
1541 for d in sorted(subdirs):
1543 STATUS.set('/'.join(dirs))
1544 with wrap(OS.path.join(master, d)) as masterdir:
1546 grobble_dir(masterdir,
1547 [OS.path.join(target, d) for target in targets])
1550 STATUS.set('/'.join(dirs))
1552 ## Right. We're ready to go.
1553 grobble_dir(master, [t.targetdir for t in targets])
1556 ###--------------------------------------------------------------------------
1557 ### Command-line interface.
1559 QUIS = OS.path.basename(SYS.argv[0])
1562 "Report a warning message to the user."
1563 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1566 "Report a fatal error message to the user."
1570 def parse_opts(args):
1572 Parse command-line arguments in ARGS.
1574 Returns a Grobbler object and the MASTER and TARGET directories to be
1578 ## Build the option parser object.
1579 op = OP.OptionParser(prog = QUIS, version = VERSION,
1580 usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1583 Convert a directory tree of files according to the configuration file
1587 ## Timeout handling.
1588 def cb_time(opt, ostr, arg, op):
1589 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1591 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1593 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1594 setattr(op.values, opt.dest, t)
1595 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1597 help = 'stop processing nicely after SECS',
1598 action = 'callback', callback = cb_time)
1599 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1600 dest = 'timeout_nasty',
1601 help = 'stop processing unpleasantly after further SECS',
1602 action = 'callback', callback = cb_time)
1605 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1606 help = 'provide progress information')
1607 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1608 help = 'don\'t actually modify the filesystem')
1611 op.set_defaults(formats = [], noact = False,
1612 timeout = None, timeout_nasty = 300)
1613 opts, args = op.parse_args(args)
1615 ## Check that we got the non-option arguments that we want.
1617 op.error('wrong number of arguments')
1619 ## Act on the options.
1621 STATUS.eyecandyp = True
1622 if opts.timeout is not None:
1623 to = TH.Thread(target = timeout,
1624 args = (opts.timeout, opts.timeout_nasty))
1628 ## Parse the configuration file.
1629 with open(args[0]) as conf:
1630 jobs, = Config.parseFile(conf, True)
1636 if __name__ == '__main__':
1637 opts = parse_opts(SYS.argv[1:])
1638 if 'master' not in VARS:
1639 die("no master directory set")
1640 broken = grobble(VARS['master'], TARGETS, opts.noact)
1642 moan('failed to convert some files:')
1643 for file, exc in broken:
1644 moan('%s: %s' % (file, exc))
1647 ## This is basically a successful completion: we did what we were asked to
1648 ## do. It seems polite to report a message, though.
1650 ## Why don't we have a nonzero exit status? The idea would be that a
1651 ## calling script would be interested that we used up all of our time, and
1652 ## not attempt to convert some other directory as well. But that doesn't
1653 ## quite work. Such a script would need to account correctly for time we
1654 ## had spent even if we complete successfully. And if the script is having
1655 ## to watch the clock itself, it can do that without our help here.
1656 if KILLSWITCH.is_set():
1657 moan('killed by timeout')
1659 ###----- That's all, folks --------------------------------------------------