chiark / gitweb /
gremlin/gremlin.in: Add function for linking a sequence of elements.
[autoys] / gremlin / gremlin.in
CommitLineData
d4a7d7b5 1#! @PYTHON@
583b7e4a
MW
2###
3### Convert a directory tree of audio files
4###
5### (c) 2010 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
9e3a516f
MW
10### This file is part of the `autoys' audio tools collection.
11###
12### `autoys' is free software; you can redistribute it and/or modify
583b7e4a
MW
13### it under the terms of the GNU General Public License as published by
14### the Free Software Foundation; either version 2 of the License, or
15### (at your option) any later version.
16###
9e3a516f 17### `autoys' is distributed in the hope that it will be useful,
583b7e4a
MW
18### but WITHOUT ANY WARRANTY; without even the implied warranty of
19### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20### GNU General Public License for more details.
21###
22### You should have received a copy of the GNU General Public License
9e3a516f 23### along with `autoys'; if not, write to the Free Software Foundation,
583b7e4a
MW
24### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26###--------------------------------------------------------------------------
27### External dependencies.
28
29## Language features.
30from __future__ import with_statement
31
32## Standard Python libraries.
33import sys as SYS
34import os as OS
35import errno as E
36import time as T
37import unicodedata as UD
38import fnmatch as FN
39import re as RX
40import shutil as SH
41import optparse as OP
42import threading as TH
43import shlex as L
00beb9e5 44from math import sqrt, ceil
583b7e4a
MW
45from contextlib import contextmanager
46
47## eyeD3 tag fettling.
608b936e 48import eyed3 as E3
583b7e4a
MW
49
50## Gstreamer. It picks up command-line arguments -- most notably `--help' --
51## and processes them itself. Of course, its help is completely wrong. This
52## kludge is due to Jonas Wagner.
53_argv, SYS.argv = SYS.argv, []
54import gobject as G
55import gio as GIO
56import gst as GS
57SYS.argv = _argv
58
59## Python Imaging.
60from PIL import Image as I
61
62## Python parsing.
63import pyparsing as P
64
65###--------------------------------------------------------------------------
66### Special initialization.
67
d4a7d7b5 68VERSION = '@VERSION@'
583b7e4a
MW
69
70## GLib.
71G.threads_init()
72
73###--------------------------------------------------------------------------
74### Eyecandy progress reports.
75
76def charwidth(s):
77 """
78 Return the width of S, in characters.
79
80 Specifically, this is the number of backspace characters required to
81 overprint the string S. If the current encoding for `stdout' appears to be
82 Unicode then do a complicated Unicode thing; otherwise assume that
83 characters take up one cell each.
84
85 None of this handles tab characters in any kind of useful way. Sorry.
86 """
87
88 ## If there's no encoding for stdout then we're doing something stupid.
89 if SYS.stdout.encoding is None: return len(s)
90
91 ## Turn the string into Unicode so we can hack on it properly. Maybe that
92 ## won't work out, in which case fall back to being stupid.
93 try: u = s.decode(SYS.stdout.encoding)
94 except UnicodeError: return len(s)
95
96 ## Our main problem is combining characters, but we should also try to
97 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
98 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
99 w = 0
100 for ch in u:
101 cd = ord(ch)
102 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
103 0x1160 <= cd <= 0x11ff: pass
104 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
105 else: w += 1
106
107 ## Done.
583b7e4a
MW
108 return w
109
110class StatusLine (object):
111 """
112 Maintains a status line containing ephemeral progress information.
113
114 The status line isn't especially important, but it keeps interactive users
115 amused.
116
117 There should be only one status line object in your program; otherwise
118 they'll interfere with each other and get confused.
119
120 The update algorithm (in `set') is fairly careful to do the right thing
121 with long status `lines', and to work properly in an Emacs `shell' buffer.
122 """
123
124 def __init__(me):
125 "Initialize the status line."
126 me._last = ''
127 me._lastlen = 0
128 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
129
130 def set(me, line):
131 """
132 Set the status line contents to LINE, replacing what was there before.
133
134 This only produces actual output if stdout is interactive.
135 """
136 n = len(line)
137
138 ## Eyecandy update.
139 if me.eyecandyp:
583b7e4a
MW
140
141 ## If the old line was longer, we need to clobber its tail, so work out
142 ## what that involves.
143 if n < me._lastlen:
144 b = charwidth(me._last[n:])
145 pre = '\b'*b + ' '*b
146 else:
147 pre = ''
148
149 ## Now figure out the length of the common prefix between what we had
150 ## before and what we have now. This reduces the amount of I/O done,
151 ## which keeps network traffic down on SSH links, and keeps down the
152 ## amount of work slow terminal emulators like Emacs have to do.
153 i = 0
154 m = min(n, me._lastlen)
155 while i < m and line[i] == me._last[i]:
156 i += 1
157
158 ## Actually do the output, all in one syscall.
159 b = charwidth(me._last[i:])
160 SYS.stdout.write(pre + '\b'*b + line[i:])
583b7e4a
MW
161 SYS.stdout.flush()
162
163 ## Update our idea of what's gone on.
164 me._lastlen = n
165 me._last = line
166
167 def clear(me):
168 "Clear the status line. Just like set('')."
169 me.set('')
170
171 def commit(me, line = None):
172 """
173 Commit the current status line, and maybe the string LINE.
174
175 If the current status line is nonempty, then commit it to the transcript.
176 If LINE is not None, then commit that to the transcript too.
177
178 After all of this, we clear the status line to get back to a clean state.
179 """
180 if me._last:
181 if me.eyecandyp:
182 SYS.stdout.write('\n')
183 else:
184 SYS.stdout.write(me._last + '\n')
185 if line is not None:
186 SYS.stdout.write(line + '\n')
187 me._lastlen = 0
188 me._last = ''
189
190STATUS = StatusLine()
191
192def filestatus(file, status):
193 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
194
195class ProgressEyecandy (object):
196 """
197 Provide amusement while something big and complicated is happening.
198
199 This is an abstract class. Subclasses must provide a method `progress'
200 returning a pair (CURRENT, MAX) indicating the current progress through the
201 operation.
202 """
203
204 def __init__(me, what, silentp = False):
205 """
206 Initialize a progress meter.
207
208 WHAT is a prefix string to be written before the progress eyecandy
209 itself.
210 """
211 me._what = what
212 me._silentp = silentp
213 me._spinner = 0
214 me._start = T.time()
215
216 def _fmt_time(me, t):
217 "Format T as a time, in (maybe hours) minutes and seconds."
218 s, t = t % 60, int(t/60)
219 m, h = t % 60, int(t/60)
220 if h > 0:
221 return '%d:%02d:%02d' % (h, m, s)
222 else:
223 return '%02d:%02d' % (m, s)
224
225 def show(me):
226 "Show the current level of progress."
227
228 ## If we're not showing pointless frippery, don't bother at all.
229 if not STATUS.eyecandyp:
230 return
231
232 ## Update the spinner index.
233 me._spinner = (me._spinner + 1)%4
234
235 ## Fetch the current progress information. Note that we always fetch
236 ## both the current and maximum levels, because both might change if an
237 ## operation revises its idea of how much work needs doing.
238 cur, max = me.progress()
239
240 ## If we couldn't get progress information, display something vaguely
241 ## amusing anyway.
242 if cur is None or max is None:
243 STATUS.set('%s %c [unknown progress]' %
244 (me._what, r'/-\|'[me._spinner]))
245 return
246
247 ## Work out -- well, guess -- the time remaining.
248 if cur:
249 t = T.time()
00beb9e5 250 eta = me._fmt_time(ceil((t - me._start)*(max - cur)/cur))
583b7e4a
MW
251 else:
252 eta = '???'
253
254 ## Set the status bar.
255 n = 40*cur/max
256 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
257 (me._what,
258 r'/-\|'[me._spinner],
259 '='*n, ' '*(40 - n),
260 100*cur/max,
261 eta))
262
263 def done(me, win = True):
264 "Show a completion notice, or a failure if WIN is false."
265 if not win:
266 STATUS.set('%s FAILED!' % me._what)
267 elif not me._silentp:
268 STATUS.set('%s done (%s)' %
269 (me._what,
270 me._fmt_time(T.time() - me._start)))
271 else:
272 return
273 STATUS.commit()
274
275###--------------------------------------------------------------------------
276### Timeout handling.
277
278KILLSWITCH = TH.Event()
279
280def timeout(t0, t1):
281 T.sleep(t0)
282 KILLSWITCH.set()
283 T.sleep(t1)
284 moan('dying messily due to timeout')
285 OS._exit(3)
286
287###--------------------------------------------------------------------------
288### Parsing utilities.
289
290## Allow hyphens in identifiers.
291IDCHARS = P.alphanums + '-_'
292P.Keyword.setDefaultKeywordChars(IDCHARS)
293
294## Some common kinds of tokens.
295Name = P.Word(IDCHARS)
296Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
297String = P.QuotedString('"', '\\')
298
299## Handy abbreviations for constructed parser elements.
300def K(k): return P.Keyword(k).suppress()
301def D(d): return P.Literal(d).suppress()
583b7e4a
MW
302def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
303O = P.Optional
304
305###--------------------------------------------------------------------------
306### Format identification and conversion.
307
308class IdentificationFailure (Exception):
309 pass
310
311class FileCategory (object):
312 """
313 A FileCategory represents a class of files.
314
315 For example, it's sensible to consider audio, or image files as a
316 category. A file category knows how to recognize member files from
317 MIME content types.
318 """
319
320 def __init__(me, name, mime_pats, ident):
321 """
322 Construct a new category.
323
324 The PATS are a list of `fnmatch' patterns to be compared with a MIME
325 type. The IDENT is a function which produces an identification object
326 given a file's name and first-guess MIME type. The object is passed to a
327 Format's `check' method to see whether a file needs re-encoding, and to
328 `convert' to assist with the conversion.
329
330 An identification object must have an attribute `mime' which is a set of
331 possible MIME types accumulated for the object.
332 """
333 me.name = name
334 me._mime_pats = mime_pats
335 me._ident = ident
336 CATEGORYMAP[name] = me
337
338 def identify(me, file, mime):
339 """
340 Attempt to identify FILE, given its apparent MIME type.
341
342 If identification succeeds, return an identification object which can be
343 used by associated file formats; otherwise return None.
344 """
345 for p in me._mime_pats:
346 if not FN.fnmatchcase(mime, p):
347 continue
348 try:
349 return me._ident(file, mime)
350 except IdentificationFailure:
351 pass
352 return None
353
354class BaseFormat (object):
355 """
356 A BaseFormat object represents a particular encoding and parameters.
357
358 The object can verify (the `check' method) whether a particular file
359 matches its requirements, and if necessary (`encode') re-encode a file.
360
361 Subclasses should define the following methods.
362
363 check(ID)
364 Answer whether the file identified by ID is acceptable according to
365 the receiver's parameters.
366
367 convert(MASTER, ID, TARGET)
368 Convert the file MASTER, which has been identified as ID, according
369 to the receiver's parameters, writing the output to TARGET.
370
371 Subclasses should also provide these attributes.
372
373 CATEGORY
374 A FileCategory object for the category of files that this format
375 lives within.
376
377 EXT A file extension to be applied to encoded output files.
378
379 NAME A user-facing name for the format.
380
381 PROPS A parser element to parse a property definition. It should produce
382 a pair NAME, VALUE to be stored in a dictionary.
383
384 Subclasses for different kinds of file may introduce more subclass
385 protocol.
386 """
387
388 def fixup(me, path):
389 """Post-encoding fixups."""
390 pass
391
392FORMATMAP = {}
393CATEGORYMAP = {}
394
395def defformat(name, cls):
396 "Define a format NAME using class CLS."
397 if not hasattr(cls, 'NAME'):
398 raise ValueError, 'abstract class'
399 if not hasattr(cls, 'CATEGORY'):
400 raise ValueError, 'no category'
401 FORMATMAP[name] = cls
402
403class FormatParser (P.ParserElement):
404 """
405 Parse a format specifier:
406
407 format-spec ::= string [format-properties]
408 format-properties ::= `{' format-property (`,' format-property)* `}'
409
410 The syntax of a format-property is determined by the PROPS attribute on the
411 named format and its superclasses.
412 """
413
483b52d0
MW
414 name = 'format-spec'
415
583b7e4a
MW
416 ## We cache the parser elements we generate to avoid enormous consing.
417 CACHE = {}
418
419 def parseImpl(me, s, loc, actp = True):
420
421 ## Firstly, determine the format name.
422 loc, r = Name._parse(s, loc, actp)
423 fmt = r[0]
424
425 ## Look up the format class.
426 try: fcls = FORMATMAP[fmt]
427 except KeyError:
428 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
429
430 ## Fetch the property-list parser from the cache, if possible; else
431 ## construct it.
432 try:
433 pp = me.CACHE[fmt]
434 except KeyError:
435 seen = set()
436 prop = None
437 for c in fcls.mro():
438 try: p = c.PROPS
439 except AttributeError: continue
440 if p in seen: continue
441 if prop is None: prop = p
442 else: prop |= p
443 seen.add(p)
444 if prop is None:
445 pp = me.CACHE[fmt] = None
446 else:
447 props = P.delimitedList(prop)
448 props.setParseAction(lambda s, l, t: dict(t.asList()))
449 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
450
451 ## Parse the properties.
452 if pp is None:
453 pd = {}
454 else:
455 loc, r = pp._parse(s, loc, actp)
456 if r: pd = r[0]
457 else: pd = {}
458
459 ## Construct the format object and return it.
460 return loc, fcls(**pd)
461
462Format = FormatParser()
463
464def prop(kw, pval, tag = None):
465 if tag is None: tag = kw
466 if pval is None:
467 p = K(kw)
468 p.setParseAction(lambda s, l, t: (tag, True))
469 else:
470 p = K(kw) + D('=') + pval
471 p.setParseAction(lambda s, l, t: (tag, t[0]))
472 return p
473
474###--------------------------------------------------------------------------
475### Policies and actions.
476
477class Action (object):
478 """
479 An Action object represents a conversion action to be performed.
480
481 This class isn't intended to be instantiated directly. It exists to define
482 some protocol common to all Action objects.
483
484 Action objects have the following attributes.
485
486 master The name of the master (source) file.
487
488 target The name of the target (destination) file.
489
490 PRIORITY The priority of the action, for deciding which of two actions
491 to perform. Higher priorities are more likely to win.
492
493 Converting an Action to a string describes the action in a simple
494 user-readable manner. The `perform' method actually carries the action
495 out.
496 """
497
498 PRIORITY = 0
499
500 def __init__(me, master):
501 "Stash the MASTER file name for later."
502 me.master = master
503
504 def choose(me, him):
505 "Choose either ME or HIM and return one."
506 if him is None or me.PRIORITY > him.PRIORITY:
507 return me
508 else:
509 return him
510
511class CopyAction (Action):
512 """
513 An Action object for simply copying a file.
514
515 Actually we try to hardlink it first, falling back to a copy later. This
516 is both faster and more efficient with regard to disk space.
517 """
518
519 ## Copying is good. Linking is really good, but we can't tell the
520 ## difference at this stage.
521 PRIORITY = 10
522
523 def __init__(me, master, targetdir):
524 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
525 Action.__init__(me, master)
526 me.target = OS.path.join(targetdir, OS.path.basename(master))
527
528 def __str__(me):
529 return 'copy/link'
530
531 def perform(me):
532 "Actually perform a CopyAction."
533 try:
534 STATUS.set(filestatus(me.master, 'link'))
535 OS.link(me.master, me.target)
536 except OSError, err:
537 if err.errno != E.EXDEV:
538 raise
539 STATUS.set(filestatus(me.master, 'copy'))
540 new = me.target + '.new'
541 SH.copyfile(me.master, new)
542 OS.rename(new, me.target)
543 STATUS.commit()
544
545class ConvertAction (Action):
546 """
547 An Action object for converting a file to a given format.
548
549 Additional attributes:
550
551 id The identification object for the master file.
552
553 format The format to which we're meant to conver the master.
554 """
555
556 def __init__(me, master, targetdir, id, format):
557 "Initialize a ConvertAction."
558 Action.__init__(me, master)
559 stem, ext = OS.path.splitext(OS.path.basename(master))
560 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
561 me.id = id
562 me.format = format
563
564 def __str__(me):
565 return 'convert to %s' % me.format.NAME
566
567 def perform(me):
568 "Acually perform a ConvertAction."
569 STATUS.set(filestatus(me.master, me))
570 me.format.convert(me.master, me.id, me.target)
571
572Policy = P.Forward()
573
574class FormatPolicy (object):
575 """
576 A FormatPolicy object represents a set of rules for how to convert files.
577
578 Given a master file, the FormatPolicy will identify it and return a list of
579 actions to be performed. The methods required of a FormatPolicy are:
580
581 setcategory(CAT)
582 Store CAT as the policy's category. Check that this is consistent
583 with the policy as stored.
584
585 actions(MASTER, TARGETDIR, ID, COHORT)
586 Given a MASTER file, identified as ID, a target directory
587 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
588 of the same category in the same directory, return a list of
589 actions to be performed to get the target directory into the right
590 form. The list might be empty if the policy object /rejects/ the
591 file.
592 """
593
594class AndPolicy (FormatPolicy):
595 """
596 A FormatPolicy which does the union of a bunch of other policies.
597
598 Each subsidiary policy is invoked in turn. The highest-priority action for
599 each target file is returned.
600 """
601
602 def __init__(me, policies):
603 me._policies = policies
604
605 def setcategory(me, cat):
606 me.cat = cat
607 for p in me._policies:
608 p.setcategory(cat)
609
610 def actions(me, master, targetdir, id, cohort):
611 tmap = {}
612 for p in me._policies:
613 for a in p.actions(master, targetdir, id, cohort):
614 if a.target in tmap:
615 tmap[a.target] = a.choose(tmap.get(a.target))
616 else:
617 tmap[a.target] = a
618 return tmap.values()
619
620And = K('and') - D('{') - R(Policy) - D('}')
621And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
622
623class OrPolicy (FormatPolicy):
624 """
625 A FormatPolicy which tries other policies and uses the first that accepts.
626
627 Each subsidiary policy is invoked in turn. If any accepts, the actions it
628 proposes are turned and no further policies are invoked. If none accepts
629 then the file is rejected.
630 """
631
632 def __init__(me, policies):
633 me._policies = policies
634
635 def setcategory(me, cat):
636 me.cat = cat
637 for p in me._policies:
638 p.setcategory(cat)
639
640 def actions(me, master, targetdir, id, cohort):
641 for p in me._policies:
642 aa = p.actions(master, targetdir, id, cohort)
643 if aa:
644 return aa
645 else:
646 return []
647
648Or = K('or') - D('{') - R(Policy) - D('}')
649Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
650
651class AcceptPolicy (FormatPolicy):
652 """
653 A FormatPolicy which copies files in a particular format.
654
655 If all of the files in a cohort are recognized as being in a particular
656 format (including this one), then accept it with a CopyAction; otherwise
657 reject.
658 """
659
660 def __init__(me, format):
661 me._format = format
662
663 def setcategory(me, cat):
664 if me._format.CATEGORY is not cat:
665 raise ValueError, \
666 "Accept format `%s' has category `%s', not `%s'" % \
667 (me._format.__class__.__name__,
668 me._format.CATEGORY.name, cat.name)
669 me.cat = cat
670
671 def actions(me, master, targetdir, id, cohort):
672 if me._format.check(id) and \
673 all(me._format.check(cid) for f, cid in cohort):
674 return [CopyAction(master, targetdir)]
675 else:
676 return []
677
678Accept = K('accept') - Format
679Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
680
681class ConvertPolicy (FormatPolicy):
682 """
683 A FormatPolicy which copies files in a particular format or converts if
684 necessary.
685 """
686 def __init__(me, format):
687 me._format = format
688
689 def setcategory(me, cat):
690 if me._format.CATEGORY is not cat:
691 raise ValueError, \
692 "Accept format `%s' has category `%s', not `%s'" % \
693 (me._format.__class__.__name__,
694 me._format.CATEGORY.name, cat.name)
695 me.cat = cat
696
697 def actions(me, master, targetdir, id, cohort):
698 if me._format.check(id):
699 return [CopyAction(master, targetdir)]
700 else:
701 return [ConvertAction(master, targetdir, id, me._format)]
702
703Convert = K('convert') - Format
704Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
705
706Policy << (And | Or | Accept | Convert)
707
708###--------------------------------------------------------------------------
709### Audio handling, based on GStreamer.
710
711def make_element(factory, name = None, **props):
712 "Return a new element from the FACTORY with the given NAME and PROPS."
713 elt = GS.element_factory_make(factory, name)
714 elt.set_properties(**props)
715 return elt
716
0dbdd41c
MW
717def link_elements(elts):
718 "Link the elements ELTS together, in order."
719 GS.element_link_many(*elts)
720
583b7e4a
MW
721class GStreamerProgressEyecandy (ProgressEyecandy):
722 """
723 Provide amusement while GStreamer is busy doing something.
724
725 The GStreamerProgressEyecandy object is a context manager. Wrap it round
726 your GStreamer loop to provide progress information for an operation.
727 """
728
729 def __init__(me, what, elt, **kw):
730 """
731 Initialize a progress meter.
732
733 WHAT is a prefix string to be written before the progress eyecandy
734 itself. ELT is a GStreamer element to interrogate to find the progress
735 information.
736 """
737 me._elt = elt
738 ProgressEyecandy.__init__(me, what, **kw)
739
740 def _update(me):
741 "Called by GLib main event loop to update the eyecandy."
742 me.show()
743 return True
744
745 def _timer(me):
746 """
747 Update the progress meter.
748
749 This is called periodically by the GLib main event-processing loop.
750 """
751 me.show()
752 return True
753
754 def progress(me):
755 "Return the current progress as a pair (CURRENT, MAX)."
756
757 ## Fetch the current progress information. We get the duration each
758 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
759 ## duration can change as we progress. Hopefully it settles down fairly
760 ## soon.
761 try:
762 t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
763 end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
764 return t, end
765 except GS.QueryError:
766 return None, None
767
768 def __enter__(me):
769 "Enter context: attach progress meter display."
770
771 ## If we're not showing pointless frippery, don't bother at all.
772 if not STATUS.eyecandyp:
773 return
774
775 ## Update regularly. The pipeline runs asynchronously.
776 me._id = G.timeout_add(200, me._update)
777
778 def __exit__(me, ty, val, tb):
779 "Leave context: remove display and report completion or failure."
780
781 ## If we're not showing pointless frippery, there's nothing to remove.
782 if STATUS.eyecandyp:
783 G.source_remove(me._id)
784
785 ## Report completion anyway.
786 me.done(ty is None)
787
788 ## As you were.
789 return False
790
791class AudioIdentifier (object):
792 """
793 Analyses and identifies an audio file.
794
795 Important properties are:
796
797 cap A capabilities structure describing the audio file data. The most
798 interesting thing in here is probably its name, which is a MIME
799 type describing the data.
800
801 dcap A capabilities structure describing the decoded audio data. This
802 is of interest during conversion.
803
804 tags A dictionary containing metadata tags from the file. These are in
805 GStreamer's encoding-independent format.
806
807 bitrate An approximation to the stream's bitrate, in kilobits per second.
808 This might be slow to work out for some files so it's computed on
809 demand.
810 """
811
812 def __init__(me, file, mime):
813 "Initialize the object suitably for identifying FILE."
814
815 ## Make some initial GStreamer objects. We'll want the pipeline later if
816 ## we need to analyse a poorly tagged MP3 stream, so save it away.
817 me._pipe = GS.Pipeline()
818 me._file = file
819 bus = me._pipe.get_bus()
820 bus.add_signal_watch()
821 loop = G.MainLoop()
822
823 ## The basic recognition kit is based around `decodebin'. We must keep
824 ## it happy by giving it sinks for the streams it's found, which it
825 ## announces asynchronously.
826 source = make_element('filesrc', 'file', location = file)
827 decoder = make_element('decodebin', 'decode')
828 sink = make_element('fakesink')
829 def decoder_pad_arrived(elt, pad):
830 if pad.get_caps()[0].get_name().startswith('audio/'):
831 elt.link_pads(pad.get_name(), sink, 'sink')
832 dpaid = decoder.connect('pad-added', decoder_pad_arrived)
833 me._pipe.add(source, decoder, sink)
0dbdd41c 834 link_elements([source, decoder])
583b7e4a
MW
835
836 ## Arrange to collect tags from the pipeline's bus as they're reported.
837 ## If we reuse the pipeline later, we'll want different bus-message
838 ## handling, so make sure we can take the signal handler away.
839 tags = {}
840 fail = []
841 def bus_message(bus, msg):
842 if msg.type == GS.MESSAGE_ERROR:
843 fail[:] = (ValueError, msg.structure['debug'], None)
844 loop.quit()
845 elif msg.type == GS.MESSAGE_STATE_CHANGED:
846 if msg.structure['new-state'] == GS.STATE_PAUSED and \
847 msg.src == me._pipe:
848 loop.quit()
849 elif msg.type == GS.MESSAGE_TAG:
850 tags.update(msg.structure)
851 bmid = bus.connect('message', bus_message)
852
853 ## We want to identify the kind of stream this is. (Hmm. The MIME type
854 ## recognizer has already done this work, but GStreamer is probably more
855 ## reliable.) The `decodebin' has a `typefind' element inside which will
856 ## announce the identified media type. All we need to do is find it and
857 ## attach a signal handler. (Note that the handler might be run in the
858 ## thread context of the pipeline element, but Python's GIL will keep
859 ## things from being too awful.)
860 me.cap = None
861 me.dcap = None
862 for e in decoder.elements():
863 if e.get_factory().get_name() == 'typefind':
864 tfelt = e
865 break
866 else:
867 assert False, 'failed to find typefind element'
868
869 ## Crank up most of the heavy machinery. The message handler will stop
870 ## the loop when things seem to be sufficiently well underway.
871 me._pipe.set_state(GS.STATE_PAUSED)
872 loop.run()
873 bus.disconnect(bmid)
874 decoder.disconnect(dpaid)
875 if fail:
876 me._pipe.set_state(GS.STATE_NULL)
877 raise fail[0], fail[1], fail[2]
878
879 ## Store the collected tags.
880 me.tags = tags
881
882 ## Gather the capabilities. The `typefind' element knows the input data
883 ## type. The 'decodebin' knows the raw data type.
884 me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
885 me.mime = set([mime, me.cap.get_name()])
886 me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
887
888 ## If we found a plausible bitrate then stash it. Otherwise note that we
889 ## failed. If anybody asks then we'll work it out then.
890 if 'nominal-bitrate' in tags:
891 me._bitrate = tags['nominal-bitrate']/1000
892 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
893 me._bitrate = tags['bitrate']/1000
894 else:
895 me._bitrate = None
896
897 ## The bitrate computation wants the file size. Ideally we'd want the
898 ## total size of the frames' contents, but that seems hard to dredge
899 ## out. If the framing overhead is small, this should be close enough
900 ## for our purposes.
901 me._bytes = OS.stat(file).st_size
902
903 def __del__(me):
904 "Close the pipeline down so we don't leak file descriptors."
905 me._pipe.set_state(GS.STATE_NULL)
906
907 @property
908 def bitrate(me):
909 """
910 Return the approximate bit-rate of the input file.
911
912 This might take a while if we have to work it out the hard way.
913 """
914
915 ## If we already know the answer then just return it.
916 if me._bitrate is not None:
917 return me._bitrate
918
919 ## Make up a new main loop.
920 loop = G.MainLoop()
921
922 ## Watch for bus messages. We'll stop when we reach the end of the
923 ## stream: then we'll have a clear idea of how long the track was.
924 fail = []
925 def bus_message(bus, msg):
926 if msg.type == GS.MESSAGE_ERROR:
927 fail[:] = (ValueError, msg.structure['debug'], None)
928 loop.quit()
929 elif msg.type == GS.MESSAGE_EOS:
930 loop.quit()
931 bus = me._pipe.get_bus()
932 bmid = bus.connect('message', bus_message)
933
934 ## Get everything moving, and keep the user amused while we work.
935 me._pipe.set_state(GS.STATE_PLAYING)
936 with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
937 me._pipe,
938 silentp = True):
939 loop.run()
940 bus.disconnect(bmid)
941 if fail:
942 me._pipe.set_state(GS.STATE_NULL)
943 raise fail[0], fail[1], fail[2]
944
945 ## Now we should be able to find out our position accurately and work out
946 ## a bitrate. Cache it in case anybody asks again.
947 t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
948 me._bitrate = int(8*me._bytes*1e6/t)
949
950 ## Done.
951 return me._bitrate
952
953class AudioFormat (BaseFormat):
954 """
955 An AudioFormat is a kind of Format specialized for audio files.
956
957 Format checks are done on an AudioIdentifier object.
958 """
959
960 PROPS = prop('bitrate', Num)
961
962 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
963 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
964 ## depending on how thorough it's trying to be. Still, it doesn't do any
965 ## harm here; the main risk is picking up Ogg Theora files by accident, and
966 ## we'll probably be able to extract the audio from them anyway.
967 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
968 AudioIdentifier)
969
970 def __init__(me, bitrate = None):
971 "Construct an object, requiring an approximate bitrate."
972 me.bitrate = bitrate
973
974 def check(me, id):
975 """
976 Return whether the AudioIdentifier ID is suitable for our purposes.
977
978 Subclasses can either override this method or provide a property
979 `MIMETYPES', which is a list (other thing that implements `__contains__')
980 of GStreamer MIME types matching this format.
981 """
982 return id.mime & me.MIMETYPES and \
983 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
984
985 def encoder(me):
986 """
987 Constructs a GStreamer element to encode audio input.
988
989 Subclasses can either override this method (or replace `encode'
990 entirely), or provide a method `encoder_chain' which returns a list of
991 elements to be linked together in sequence. The first element in the
992 chain must have a pad named `sink' and the last must have a pad named
993 `src'.
994 """
995 elts = me.encoder_chain()
996 bin = GS.Bin()
997 bin.add(*elts)
0dbdd41c 998 link_elements(elts)
583b7e4a
MW
999 bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
1000 bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
1001 return bin
1002
1003 def convert(me, master, id, target):
1004 """
1005 Encode audio from MASTER, already identified as ID, writing it to TARGET.
1006
1007 See `encoder' for subclasses' responsibilities.
1008 """
1009
1010 ## Construct the necessary equipment.
1011 pipe = GS.Pipeline()
1012 bus = pipe.get_bus()
1013 bus.add_signal_watch()
1014 loop = G.MainLoop()
1015
1016 ## Make sure that there isn't anything in the way of our output. We're
1017 ## going to write to a scratch file so that we don't get confused by
1018 ## half-written rubbish left by a crashed program.
1019 new = target + '.new'
1020 try:
1021 OS.unlink(new)
1022 except OSError, err:
1023 if err.errno != E.ENOENT:
1024 raise
1025
1026 ## Piece together our pipeline. The annoying part is that the
1027 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1028 ## halves for now.
1029 source = make_element('filesrc', 'source', location = master)
1030 decoder = make_element('decodebin', 'decode')
1031 convert = make_element('audioconvert', 'convert')
1032 encoder = me.encoder()
1033 sink = make_element('filesink', 'sink', location = new)
1034 pipe.add(source, decoder, convert, encoder, sink)
0dbdd41c
MW
1035 link_elements([source, decoder])
1036 link_elements([convert, encoder, sink])
583b7e4a
MW
1037
1038 ## Some decoders (e.g., the AC3 decoder) include channel-position
1039 ## indicators in their output caps. The Vorbis encoder interferes with
1040 ## this, and you end up with a beautifully encoded mono signal from a
1041 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1042 ## /think/ that this is only a problem with stereo signals: mono signals
1043 ## are mono already, and `vorbisenc' accepts channel positions if there
1044 ## are more than two channels.
1045 ##
1046 ## So we have this bodge. We already collected the decoded audio caps
1047 ## during identification. So if we see 2-channel audio with channel
1048 ## positions, we strip the positions off forcibly by adding a filter.
1049 if id.dcap.get_name().startswith('audio/x-raw-') and \
1050 id.dcap.has_field('channels') and \
1051 id.dcap['channels'] == 2 and \
1052 id.dcap.has_field('channel-positions'):
1053 dcap = GS.Caps()
1054 c = id.dcap.copy()
1055 c.remove_field('channel-positions')
1056 dcap.append(c)
1057 else:
1058 dcap = None
1059
1060 ## Hook onto the `decodebin' so we can link together the two halves of
1061 ## our encoding chain. For now, we'll hope that there's only one audio
1062 ## stream in there, and just throw everything else away.
1063 def decoder_pad_arrived(elt, pad):
1064 if pad.get_caps()[0].get_name().startswith('audio/'):
1065 if dcap:
1066 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1067 else:
1068 elt.link_pads(pad.get_name(), convert, 'sink')
1069 decoder.connect('pad-added', decoder_pad_arrived)
1070
1071 ## Watch the bus for completion messages.
1072 fail = []
1073 def bus_message(bus, msg):
1074 if msg.type == GS.MESSAGE_ERROR:
1075 fail[:] = (ValueError, msg.structure['debug'], None)
1076 loop.quit()
1077 elif msg.type == GS.MESSAGE_EOS:
1078 loop.quit()
1079 bmid = bus.connect('message', bus_message)
1080
1081 ## Get everything ready and let it go.
1082 pipe.set_state(GS.STATE_PLAYING)
1083 with GStreamerProgressEyecandy(filestatus(master,
1084 'convert to %s' % me.NAME),
1085 pipe):
1086 loop.run()
1087 pipe.set_state(GS.STATE_NULL)
1088 if fail:
1089 raise fail[0], fail[1], fail[2]
1090
1091 ## Fix up the output file if we have to.
1092 me.fixup(new)
1093
1094 ## We're done.
1095 OS.rename(new, target)
1096
1097class OggVorbisFormat (AudioFormat):
1098 "AudioFormat object for Ogg Vorbis."
1099
00eb0a5b 1100 ## From https://en.wikipedia.org/wiki/Vorbis
583b7e4a
MW
1101 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1102 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1103 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1104
1105 NAME = 'Ogg Vorbis'
1106 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1107 'audio/x-vorbis+ogg'])
1108 EXT = 'ogg'
1109
1110 def encoder_chain(me):
3589c4a4
MW
1111 encprops = {}
1112 if me.bitrate is not None:
1113 for q, br in me.QMAP:
1114 if br >= me.bitrate:
1115 break
cf3c562e
MW
1116 else:
1117 raise ValueError, 'no suitable quality setting found'
3589c4a4
MW
1118 encprops['quality'] = q/10.0
1119 return [make_element('vorbisenc', **encprops),
583b7e4a
MW
1120 make_element('oggmux')]
1121
1122defformat('ogg-vorbis', OggVorbisFormat)
1123
1124class MP3Format (AudioFormat):
1125 "AudioFormat object for MP3."
1126
1127 NAME = 'MP3'
1128 MIMETYPES = set(['audio/mpeg'])
1129 EXT = 'mp3'
1130
1131 def encoder_chain(me):
3589c4a4
MW
1132 encprops = {}
1133 if me.bitrate is not None: encprops['vbr_mean_bitrate'] = me.bitrate
1134 return [make_element('lame', vbr = 4, **encprops),
583b7e4a
MW
1135 make_element('xingmux'),
1136 make_element('id3v2mux')]
1137
1138 def fixup(me, path):
1139 """
1140 Fix up MP3 files.
1141
1142 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1143 unkind to stupid players.
1144 """
608b936e
MW
1145 f = E3.load(path)
1146 if f is None: return
1147 t = f.tag
1148 if t is None: return
1149 for v in [E3.id3.ID3_V2_3, E3.id3.ID3_V1]:
1150 try: f.tag.save(version = v)
1151 except (UnicodeEncodeError,
1152 E3.id3.GenreException,
1153 E3.id3.TagException):
1154 pass
583b7e4a
MW
1155
1156defformat('mp3', MP3Format)
1157
1158###--------------------------------------------------------------------------
1159### Image handling, based on the Python Imaging Library.
1160
1161class ImageIdentifier (object):
1162 """
1163 Analyses and identifies an image file.
1164
1165 Simply leaves an Image object in the `img' property which can be inspected.
1166 """
1167
1168 def __init__(me, file, mime):
1169
1170 ## Get PIL to open the file. It will magically work out what kind of
1171 ## file it is.
1172 try:
1173 me.img = I.open(file)
1174 except IOError, exc:
1175
1176 ## Unhelpful thing to raise on identification failure. We can
1177 ## distinguish this from an actual I/O error because it doesn't have an
1178 ## `errno'.
1179 if exc.errno is None:
1180 raise IdentificationFailure
1181 raise
1182
1183 me.mime = set([mime])
1184
1185class ImageFormat (BaseFormat):
1186 """
1187 An ImageFormat is a kind of Format specialized for image files.
1188
1189 Subclasses don't need to provide anything other than the properties
1190 required by all concrete Format subclasses. However, there is a
1191 requirement that the `NAME' property match PIL's `format' name for the
1192 format.
1193 """
1194
1195 PROPS = prop('size', Num)
1196 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1197
1198 def __init__(me, size = None, **kw):
1199 """
1200 Initialize an ImageFormat object.
1201
1202 Additional keywords are used when encoding, and may be recognized by
1203 enhanced `check' methods in subclasses.
1204 """
1205 me._size = size
1206 me._props = kw
1207
1208 def check(me, id):
1209 "Check whether the ImageIdentifier ID matches our requirements."
1210 return id.img.format == me.NAME and \
1211 (me._size is None or
1212 (id.img.size[0] <= me._size and
1213 id.img.size[1] <= me._size))
1214
1215 def convert(me, master, id, target):
1216 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1217
1218 ## Write to a scratch file.
1219 new = target + '.new'
1220
1221 ## The ImageIdentifier already contains a copy of the open file. It
1222 ## would be wasteful not to use it.
1223 img = id.img
1224 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1225
1226 ## If there's a stated maximum size then scale the image down to match.
1227 ## But thumbnailing clobbers the original, so take a copy.
1228 if me._size is not None and \
1229 (img.size[0] > me._size or img.size[1] > me._size):
1230 img = img.copy()
1231 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1232
1233 ## Write the output image.
1234 img.save(new, me.NAME, **me._props)
1235
1236 ## Fix it up if necessary.
1237 me.fixup(new)
1238
1239 ## We're done.
1240 OS.rename(new, target)
1241 STATUS.commit()
1242
1243class JPEGFormat (ImageFormat):
1244 """
1245 Image format for JPEG (actually JFIF) files.
1246
1247 Interesting properties to set:
1248
1249 optimize
1250 If present, take a second pass to select optimal encoder settings.
1251
b524aa9d 1252 progressive
583b7e4a
MW
1253 If present, make a progressive file.
1254
1255 quality Integer from 1--100 (worst to best); default is 75.
1256 """
1257 EXT = 'jpg'
1258 NAME = 'JPEG'
1259 PROPS = prop('optimize', None) \
1260 | prop('progressive', None, 'progression') \
1261 | prop('quality', Num)
1262
1263defformat('jpeg', JPEGFormat)
1264
1265class PNGFormat (ImageFormat):
1266 """
1267 Image format for PNG files.
1268
1269 Interesting properties:
1270
1271 optimize
1272 If present, make a special effort to minimize the output file.
1273 """
1274 EXT = 'png'
1275 NAME = 'PNG'
1276 PROPS = prop('optimize', None)
1277
1278defformat('png', PNGFormat)
1279
1280class BMPFormat (ImageFormat):
1281 """
1282 Image format for Windows BMP files, as used by RockBox.
1283
1284 No additional properties.
1285 """
1286 NAME = 'BMP'
1287 EXT = 'bmp'
1288
1289defformat('bmp', BMPFormat)
1290
e0361afb
MW
1291###--------------------------------------------------------------------------
1292### Remaining parsing machinery.
1293
1294Type = K('type') - Name - D('{') - R(Policy) - D('}')
1295def build_type(s, l, t):
1296 try:
1297 cat = CATEGORYMAP[t[0]]
1298 except KeyError:
1299 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1300 pols = t[1]
1301 if len(pols) == 1: pol = pols[0]
1302 else: pol = AndPolicy(pols)
1303 pol.setcategory(cat)
1304 return pol
1305Type.setParseAction(build_type)
1306
1307TARGETS = []
1308class TargetJob (object):
1309 def __init__(me, targetdir, policies):
1310 me.targetdir = targetdir
1311 me.policies = policies
1312 def perform(me):
1313 TARGETS.append(me)
1314
1315Target = K('target') - String - D('{') - R(Type) - D('}')
1316def build_target(s, l, t):
1317 return TargetJob(t[0], t[1])
1318Target.setParseAction(build_target)
1319
1320VARS = { 'master': None }
1321class VarsJob (object):
1322 def __init__(me, vars):
1323 me.vars = vars
1324 def perform(me):
1325 for k, v in me.vars:
1326 VARS[k] = v
1327
1328Var = prop('master', String)
1329Vars = K('vars') - D('{') - R(Var) - D('}')
1330def build_vars(s, l, t):
1331 return VarsJob(t[0])
1332Vars.setParseAction(build_vars)
1333
1334TopLevel = Vars | Target
1335Config = R(TopLevel)
1336Config.ignore(P.pythonStyleComment)
1337
583b7e4a
MW
1338###--------------------------------------------------------------------------
1339### The directory grobbler.
1340
e0361afb 1341def grobble(master, targets, noact = False):
583b7e4a 1342 """
e0361afb
MW
1343 Work through the MASTER directory, writing converted files to TARGETS.
1344
1345 The TARGETS are a list of `TargetJob' objects, each describing a target
1346 directory and a policy to apply to it.
1347
1348 If NOACT is true, then don't actually do anything permanent to the
1349 filesystem.
583b7e4a
MW
1350 """
1351
e0361afb
MW
1352 ## Transform the targets into a more convenient data structure.
1353 tpolmap = []
1354 for t in targets:
1355 pmap = {}
1356 tpolmap.append(pmap)
1357 for p in t.policies: pmap.setdefault(p.cat, []).append(p)
583b7e4a 1358
e0361afb
MW
1359 ## Keep track of the current position in the master tree.
1360 dirs = []
583b7e4a 1361
e0361afb
MW
1362 ## And the files which haven't worked.
1363 broken = []
583b7e4a 1364
e0361afb
MW
1365 def grobble_file(master, pmap, targetdir, cohorts):
1366 ## Convert MASTER, writing the result to TARGETDIR.
1367 ##
1368 ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1369 ## a list of (FILENAME, ID) pairs.
1370 ##
1371 ## Since this function might convert the MASTER file, the caller doesn't
1372 ## know the name of the output files, so we return then as a list.
583b7e4a
MW
1373
1374 done = set()
1375 st_m = OS.stat(master)
1376
1377 ## Work through each category listed and apply its policy.
1378 for cat, id, cohort in cohorts:
1379
1380 ## Go through the category's policies and see if any match. If we fail
1381 ## here, see if there are more categories to try.
e0361afb 1382 for pol in pmap[cat]:
583b7e4a
MW
1383 acts = pol.actions(master, targetdir, id, cohort)
1384 if acts: break
1385 else:
1386 continue
1387
1388 ## Work through the targets one by one.
1389 for a in acts:
1390 done.add(a.target)
1391
1392 ## Find out whether the target file already exists and is up-to-date
1393 ## with respect to the master. (Caution here with low-resolution
1394 ## timestamps.) If it's OK, then just move on.
1395 try:
1396 st_t = OS.stat(a.target)
1397 if st_m.st_mtime < st_t.st_mtime or \
1398 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1399 continue
1400 except OSError, err:
1401 if err.errno not in (E.ENOENT, E.ENOTDIR):
1402 raise
1403
1404 ## We have real work to do. If there's a current status message,
1405 ## it's the containing directory so flush it so that people know
1406 ## where we are.
1407 STATUS.commit()
1408
1409 ## Remove the target. (A hardlink will fail if the target already
1410 ## exists.)
e0361afb 1411 if not noact:
583b7e4a
MW
1412 try:
1413 OS.unlink(a.target)
1414 except OSError, err:
1415 if err.errno not in (E.ENOENT, E.ENOTDIR):
1416 raise
1417
1418 ## Do whatever it is we decided to do.
e0361afb 1419 if noact:
583b7e4a
MW
1420 STATUS.commit(filestatus(master, a))
1421 else:
1422 a.perform()
1423
1424 ## We're done. Return the names of the targets.
1425 return list(done)
1426
1427 @contextmanager
e0361afb
MW
1428 def wrap(masterfile):
1429 ## Handle exceptions found while trying to convert a particular file or
1430 ## directory.
583b7e4a
MW
1431
1432 try:
1433 yield masterfile
1434
1435 ## Something bad happened. Report the error, but continue. (This list
1436 ## of exceptions needs a lot of work.)
1437 except (IOError, OSError), exc:
1438 STATUS.clear()
1439 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
e0361afb 1440 broken.append((masterfile, exc))
583b7e4a 1441
e0361afb
MW
1442 def grobble_dir(master, targets):
1443 ## Recursively convert files in MASTER, writing them to the TARGETS.
583b7e4a 1444
e0361afb
MW
1445 ## Keep track of the subdirectories we encounter, because we'll need to
1446 ## do all of those in one go at the end.
1447 subdirs = set()
583b7e4a 1448
e0361afb
MW
1449 ## Work through each target directory in turn.
1450 for target, pmap in zip(targets, tpolmap):
583b7e4a 1451
e0361afb
MW
1452 ## Make sure the TARGET exists and is a directory. It's a fundamental
1453 ## assumption of this program that the entire TARGET tree is
1454 ## disposable, so if something exists but isn't a directory, we should
1455 ## kill it.
1456 if OS.path.isdir(target):
1457 pass
1458 else:
1459 if OS.path.exists(target):
1460 STATUS.commit(filestatus(target, 'clear nondirectory'))
1461 if not noact:
1462 OS.unlink(target)
1463 STATUS.commit(filestatus(target, 'create directory'))
1464 if not noact:
1465 OS.mkdir(target)
1466
1467 ## Keep a list of things in the target. As we convert files, we'll
1468 ## check them off. Anything left over is rubbish and needs to be
1469 ## deleted.
1470 checklist = {}
1471 try:
1472 for i in OS.listdir(target):
1473 checklist[i] = False
1474 except OSError, err:
1475 if err.errno not in (E.ENOENT, E.ENOTDIR):
1476 raise
1477
1478 ## Keep track of the files in each category.
1479 catmap = {}
1480 todo = []
1481 done = []
1482
1483 ## Work through the master files.
1484 for f in sorted(OS.listdir(master)):
1485
1486 ## If the killswitch has been pulled then stop. The whole idea is
1487 ## that we want to cause a clean shutdown if possible, so we don't
1488 ## want to do it in the middle of encoding because the encoding
1489 ## effort will have been wasted. This is the only place we need to
1490 ## check. If we've exited the loop, then clearing old files will
1491 ## probably be fast, and we'll either end up here when the recursive
1492 ## call returns or we'll be in the same boat as before, clearing old
1493 ## files, only up a level. If worst comes to worst, we'll be killed
1494 ## forcibly somewhere inside `SH.rmtree', and that can continue where
1495 ## it left off.
1496 if KILLSWITCH.is_set():
1497 return
1498
1499 ## Do something with the file.
1500 with wrap(OS.path.join(master, f)) as masterfile:
1501
1502 ## If it's a directory then prepare to grobble it recursively, but
1503 ## don't do that yet.
1504 if OS.path.isdir(masterfile):
1505 subdirs.add(f)
1506 done.append(OS.path.join(target, f))
1507
1508 ## Otherwise it's a file. Work out what kind, and stash it under
1509 ## the appropriate categories. Later, we'll apply policy to the
1510 ## files, by category, and work out what to do with them all.
1511 else:
1512 gf = GIO.File(masterfile)
1513 mime = gf.query_info('standard::content-type').get_content_type()
1514 cats = []
1515 for cat in pmap.iterkeys():
1516 id = cat.identify(masterfile, mime)
1517 if id is None: continue
1518 catmap.setdefault(cat, []).append((masterfile, id))
1519 cats.append((cat, id))
1520 if not cats:
1521 catmap.setdefault(None, []).append((masterfile, id))
1522 todo.append((masterfile, cats))
1523
1524 ## Work through the categorized files to see what actions to do for
1525 ## them.
1526 for masterfile, cats in todo:
1527 with wrap(masterfile):
1528 done += grobble_file(masterfile, pmap, target,
1529 [(cat, id, catmap[cat]) for cat, id in cats])
1530
1531 ## Check the results off the list so that we don't clear it later.
1532 for f in done:
1533 checklist[OS.path.basename(f)] = True
1534
1535 ## Maybe there's stuff in the target which isn't accounted for. Delete
1536 ## it: either the master has changed, or the policy for this target has
1537 ## changed. Either way, the old files aren't wanted.
1538 for f in checklist:
1539 if not checklist[f]:
1540 STATUS.commit(filestatus(f, 'clear bogus file'))
1541 if not noact:
1542 bogus = OS.path.join(target, f)
1543 try:
1544 if OS.path.isdir(bogus):
1545 SH.rmtree(bogus)
1546 else:
1547 OS.unlink(bogus)
1548 except OSError, err:
1549 if err.errno != E.ENOENT:
1550 raise
1551
1552 ## If there are subdirectories which want processing then do those.
1553 ## Keep the user amused by telling him where we are in the tree.
1554 for d in sorted(subdirs):
1555 dirs.append(d)
1556 STATUS.set('/'.join(dirs))
1557 with wrap(OS.path.join(master, d)) as masterdir:
1558 try:
1559 grobble_dir(masterdir,
1560 [OS.path.join(target, d) for target in targets])
1561 finally:
1562 dirs.pop()
1563 STATUS.set('/'.join(dirs))
583b7e4a 1564
e0361afb
MW
1565 ## Right. We're ready to go.
1566 grobble_dir(master, [t.targetdir for t in targets])
1567 return broken
583b7e4a
MW
1568
1569###--------------------------------------------------------------------------
1570### Command-line interface.
1571
1572QUIS = OS.path.basename(SYS.argv[0])
1573
1574def moan(msg):
1575 "Report a warning message to the user."
1576 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1577
1578def die(msg):
1579 "Report a fatal error message to the user."
1580 moan(msg)
1581 SYS.exit(1)
1582
1583def parse_opts(args):
1584 """
1585 Parse command-line arguments in ARGS.
1586
1587 Returns a Grobbler object and the MASTER and TARGET directories to be
1588 grobbled.
1589 """
1590
1591 ## Build the option parser object.
1592 op = OP.OptionParser(prog = QUIS, version = VERSION,
5379ab85
MW
1593 usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1594 'CONFIG',
583b7e4a
MW
1595 description = """\
1596Convert a directory tree of files according to the configuration file
1597CONFIG.
1598""")
1599
1600 ## Timeout handling.
1601 def cb_time(opt, ostr, arg, op):
1602 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1603 if not m:
1604 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1605 t, u = m.groups()
1606 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1607 setattr(op.values, opt.dest, t)
1608 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1609 dest = 'timeout',
1610 help = 'stop processing nicely after SECS',
1611 action = 'callback', callback = cb_time)
1612 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1613 dest = 'timeout_nasty',
1614 help = 'stop processing unpleasantly after further SECS',
1615 action = 'callback', callback = cb_time)
1616
1617 ## Other options.
1618 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1619 help = 'provide progress information')
1620 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1621 help = 'don\'t actually modify the filesystem')
1622
1623 ## Ready to rock.
1624 op.set_defaults(formats = [], noact = False,
1625 timeout = None, timeout_nasty = 300)
1626 opts, args = op.parse_args(args)
1627
1628 ## Check that we got the non-option arguments that we want.
1629 if len(args) != 1:
1630 op.error('wrong number of arguments')
1631
1632 ## Act on the options.
1633 if opts.tty:
1634 STATUS.eyecandyp = True
1635 if opts.timeout is not None:
1636 to = TH.Thread(target = timeout,
1637 args = (opts.timeout, opts.timeout_nasty))
1638 to.daemon = True
1639 to.start()
1640
1641 ## Parse the configuration file.
1642 with open(args[0]) as conf:
1643 jobs, = Config.parseFile(conf, True)
1644 for j in jobs:
1645 j.perform()
1646
1647 return opts
1648
1649if __name__ == '__main__':
1650 opts = parse_opts(SYS.argv[1:])
1651 if 'master' not in VARS:
1652 die("no master directory set")
e0361afb 1653 broken = grobble(VARS['master'], TARGETS, opts.noact)
583b7e4a
MW
1654 if broken:
1655 moan('failed to convert some files:')
1656 for file, exc in broken:
1657 moan('%s: %s' % (file, exc))
1658 SYS.exit(1)
1659
1660 ## This is basically a successful completion: we did what we were asked to
1661 ## do. It seems polite to report a message, though.
1662 ##
1663 ## Why don't we have a nonzero exit status? The idea would be that a
1664 ## calling script would be interested that we used up all of our time, and
1665 ## not attempt to convert some other directory as well. But that doesn't
1666 ## quite work. Such a script would need to account correctly for time we
1667 ## had spent even if we complete successfully. And if the script is having
1668 ## to watch the clock itself, it can do that without our help here.
1669 if KILLSWITCH.is_set():
1670 moan('killed by timeout')
1671
1672###----- That's all, folks --------------------------------------------------