chiark - git - mdw - autoys/blob - misc/ab-chop

   1 #! /usr/bin/python
   2 ###
   3 ### A simple program for doing blind A/B audio comparisons
   4 ###
   5 ### (c) 2010 Mark Wooding
   6 ###
   7
   8 ###----- Licensing notice ---------------------------------------------------
   9 ###
  10 ### This file is part of the `autoys' audio tools collection.
  11 ###
  12 ### `autoys' is free software; you can redistribute it and/or modify
  13 ### it under the terms of the GNU General Public License as published by
  14 ### the Free Software Foundation; either version 2 of the License, or
  15 ### (at your option) any later version.
  16 ###
  17 ### `autoys' is distributed in the hope that it will be useful,
  18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 ### GNU General Public License for more details.
  21 ###
  22 ### You should have received a copy of the GNU General Public License
  23 ### along with `autoys'; if not, write to the Free Software Foundation,
  24 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  25
  26 ###----- Usage --------------------------------------------------------------
  27 ###
  28 ### The command line syntax is:
  29 ###
  30 ###      ab-chop INPUT CAPS OUTPUT PIPELINE...
  31 ###
  32 ### This means that we should read INPUT, decode it (using a GStreamer
  33 ### `decodebin', so it should be able to handle most things you care to throw
  34 ### at it), and then re-encode it according to each PIPELINE in turn, decode
  35 ### /that/ again, and stash the resulting raw PCM data.  When we've finished,
  36 ### we line up the PCM data streams side-by-side, chop them into chunks, and
  37 ### then stitch chunks from randomly chosen streams together to make a new
  38 ### PCM stream.  Finally, we encode that mixed-up stream as FLAC, and write
  39 ### it to OUTPUT.  It also writes a file OUTPUT.sequence which is a list of
  40 ### numbers indicating which pipeline each chunk of the original came from.
  41 ###
  42 ### The motivation is that we want to test encoder quality.  So you take a
  43 ### reference source (as good as you can find), and use that as your INPUT.
  44 ### You then write GStreamer pipeline fragments for the encoders you want to
  45 ### compare; say `identity' if you want the unmodified original reference to
  46 ### be mixed in.
  47 ###
  48 ### The only tricky bit is the CAPS, which is a GStreamer capabilities string
  49 ### describing the raw PCM format to use as an intermediate representation.
  50 ### (This is far too low-level and cumbersome for real use, but it's OK for
  51 ### now.)  You need to say something like
  52 ###
  53 ###   audio/x-raw-int,width=16,rate=44100,channels=2,depth=16,
  54 ###     endianness=1234,signed=true
  55 ###
  56 ### for standard CD audio.
  57
  58 ###--------------------------------------------------------------------------
  59 ### External dependencies.
  60
  61 ## Standard Python libraries.
  62 import sys as SYS
  63 import os as OS
  64 import shutil as SH
  65 import fnmatch as FN
  66 import random as R
  67
  68 SR = R.SystemRandom()
  69
  70 ## GObject and GStreamer.
  71 import gobject as G
  72 import gst as GS
  73
  74 ###--------------------------------------------------------------------------
  75 ### GStreamer utilities.
  76
  77 def link_on_demand(src, sink, sinkpad = None, cap = None):
  78   """
  79   Link SINK to SRC when a pad appears.
  80
  81   More precisely, when SRC reports that a pad with media type matching the
  82   `fnmatch' pattern CAP has appeared, link the pad of SINK named SINKPAD (or
  83   some sensible pad by default).
  84   """
  85   def _link(src, srcpad):
  86     if cap is None or FN.fnmatchcase(srcpad.get_caps()[0].get_name(), cap):
  87       src.link_pads(srcpad.get_name(), sink, sinkpad)
  88   src.connect('pad-added', _link)
  89
  90 def make_element(factory, name = None, **props):
  91   """
  92   Return an element made by FACTORY with properties specified by PROPS.
  93   """
  94   elt = GS.element_factory_make(factory, name)
  95   elt.set_properties(**props)
  96   return elt
  97
  98 def dump_pipeline(pipe, indent = 0):
  99   done = {}
 100   q = []
 101   for e in pipe.iterate_sources():
 102     q = [e]
 103     while q:
 104       e, q = q[0], q[1:]
 105       if e in done:
 106         continue
 107       done[e] = True
 108       print
 109       print '%s%s %s' % ('  '*indent, type(e).__name__, e.get_name())
 110       for p in e.pads():
 111         c = p.get_negotiated_caps()
 112         peer = p.get_peer()
 113         print '%s  Pad %s %s (%s)' % \
 114               ('  '*(indent + 1),
 115                p.get_name(),
 116                peer and ('<-> %s.%s' % (peer.get_parent().get_name(),
 117                                         peer.get_name()))
 118                     or 'unconnected',
 119                c and c.to_string() or 'no-negotiated-caps')
 120         if peer:
 121           q.append(peer.get_parent())
 122         if isinstance(e, GS.Bin):
 123           dump_pipeline(e, indent + 1)
 124
 125 def run_pipe(pipe, what):
 126   """
 127   Run a GStreamer pipeline PIPE until it finishes.
 128   """
 129   loop = G.MainLoop()
 130   bus = pipe.get_bus()
 131   bus.add_signal_watch()
 132   def _bus_message(bus, msg):
 133     if msg.type == GS.MESSAGE_ERROR:
 134       SYS.stderr.write('error from pipeline: %s\n' % msg)
 135       SYS.exit(1)
 136     elif msg.type == GS.MESSAGE_STATE_CHANGED and \
 137          msg.src == pipe and \
 138          msg.structure['new-state'] == GS.STATE_PAUSED:
 139       dump_pipeline(pipe)
 140     elif msg.type == GS.MESSAGE_EOS:
 141       loop.quit()
 142   bus.connect('message', _bus_message)
 143
 144   pipe.set_state(GS.STATE_PLAYING)
 145   loop.run()
 146   GS.DEBUG_BIN_TO_DOT_FILE(pipe, 3, what)
 147   pipe.set_state(GS.STATE_NULL)
 148
 149 ###--------------------------------------------------------------------------
 150 ### Main program.
 151
 152 ## Read the command line arguments.
 153 input = SYS.argv[1]
 154 caps = GS.caps_from_string(SYS.argv[2])
 155 output = SYS.argv[3]
 156
 157 ## We want a temporary place to keep things.  This provokes a warning, but
 158 ## `mkdir' is atomic and sane so it's not a worry.
 159 tmp = OS.tmpnam()
 160 OS.mkdir(tmp)
 161 try:
 162
 163   ## First step: produce raw PCM files from the original source and the
 164   ## requested encoders.
 165   q = 0
 166   temps = []
 167   for i in SYS.argv[4:]:
 168     temp = OS.path.join(tmp, '%d.raw' % q)
 169     temps.append(temp)
 170     pipe = GS.Pipeline()
 171     origin = make_element('filesrc', location = input)
 172     decode_1 = make_element('decodebin')
 173     convert_1 = make_element('audioconvert')
 174     encode = GS.parse_bin_from_description(i, True)
 175     decode_2 = make_element('decodebin')
 176     convert_2 = make_element('audioconvert')
 177     target = make_element('filesink', location = temp)
 178     pipe.add(origin, decode_1, convert_1, encode,
 179              decode_2, convert_2, target)
 180     origin.link(decode_1)
 181     link_on_demand(decode_1, convert_1)
 182     ##convert_1.link(encode, GS.caps_from_string('audio/x-raw-float, channels=2'))
 183     convert_1.link(encode)
 184     encode.link(decode_2)
 185     link_on_demand(decode_2, convert_2)
 186     convert_2.link(target, caps)
 187
 188     run_pipe(pipe, 'input-%d' % q)
 189     del pipe
 190     print 'done %s' % i
 191     q += 1
 192   step = 1763520
 193   lens = [OS.stat(i).st_size for i in temps]
 194   blocks = (max(*lens) + step - 1)//step
 195   while True:
 196     seq = []
 197     done = {}
 198     for i in xrange(blocks):
 199       j = SR.randrange(q)
 200       done[j] = True
 201       seq.append(j)
 202     ok = True
 203     for i in xrange(q):
 204       if i not in done:
 205         ok = False
 206         break
 207     if ok:
 208       break
 209   ff = [open(i, 'rb') for i in temps]
 210   mix = OS.path.join(tmp, 'mix.raw')
 211   out = open(mix, 'wb')
 212   pos = 0
 213   for i in seq:
 214     f = ff[i]
 215     f.seek(pos)
 216     buf = f.read(step)
 217     out.write(buf)
 218     if len(buf) < step:
 219       break
 220     pos += step
 221   out.close()
 222   for f in ff:
 223     f.close()
 224
 225   f = open(output + '.sequence', 'w')
 226   f.write(', '.join([str(i) for i in seq]) + '\n')
 227   f.close()
 228
 229   pipe = GS.Pipeline()
 230   origin = make_element('filesrc', location = mix)
 231   convert = make_element('audioconvert')
 232   encode = make_element('flacenc', quality = 8)
 233   target = make_element('filesink', location = output)
 234   pipe.add(origin, convert, encode, target)
 235   origin.link(convert, caps)
 236   GS.element_link_many(convert, encode, target)
 237
 238   run_pipe(pipe, 'output')
 239   del pipe
 240   print 'all done'
 241 finally:
 242   SH.rmtree(tmp)