[autoys] / misc / ab-chop

#! /usr/bin/python
###
### A simple program for doing blind A/B audio comparisons
###
### (c) 2010 Mark Wooding
###

###----- Licensing notice ---------------------------------------------------
###
### This program is free software; you can redistribute it and/or modify
### it under the terms of the GNU General Public License as published by
### the Free Software Foundation; either version 2 of the License, or
### (at your option) any later version.
###
### This program is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
### GNU General Public License for more details.
###
### You should have received a copy of the GNU General Public License
### along with this program; if not, write to the Free Software Foundation,
### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

###----- Usage --------------------------------------------------------------
###
### The command line syntax is:
###
###      ab-chop INPUT CAPS OUTPUT PIPELINE...
###
### This means that we should read INPUT, decode it (using a GStreamer
### `decodebin', so it should be able to handle most things you care to throw
### at it), and then re-encode it according to each PIPELINE in turn, decode
### /that/ again, and stash the resulting raw PCM data.  When we've finished,
### we line up the PCM data streams side-by-side, chop them into chunks, and
### then stitch chunks from randomly chosen streams together to make a new
### PCM stream.  Finally, we encode that mixed-up stream as FLAC, and write
### it to OUTPUT.  It also writes a file OUTPUT.sequence which is a list of
### numbers indicating which pipeline each chunk of the original came from.
###
### The motivation is that we want to test encoder quality.  So you take a
### reference source (as good as you can find), and use that as your INPUT.
### You then write GStreamer pipeline fragments for the encoders you want to
### compare; say `identity' if you want the unmodified original reference to
### be mixed in.
###
### The only tricky bit is the CAPS, which is a GStreamer capabilities string
### describing the raw PCM format to use as an intermediate representation.
### (This is far too low-level and cumbersome for real use, but it's OK for
### now.)  You need to say something like
###
###   audio/x-raw-int,width=16,rate=44100,channels=2,depth=16,
###     endianness=1234,signed=true
###
### for standard CD audio.

###--------------------------------------------------------------------------
### External dependencies.

## Standard Python libraries.
import sys as SYS
import os as OS
import shutil as SH
import fnmatch as FN
import random as R

SR = R.SystemRandom()

## GObject and GStreamer.
import gobject as G
import gst as GS

###--------------------------------------------------------------------------
### GStreamer utilities.

def link_on_demand(src, sink, sinkpad = None, cap = None):
  """
  Link SINK to SRC when a pad appears.

  More precisely, when SRC reports that a pad with media type matching the
  `fnmatch' pattern CAP has appeared, link the pad of SINK named SINKPAD (or
  some sensible pad by default).
  """
  def _link(src, srcpad):
    if cap is None or FN.fnmatchcase(srcpad.get_caps()[0].get_name(), cap):
      src.link_pads(srcpad.get_name(), sink, sinkpad)
  src.connect('pad-added', _link)

def make_element(factory, name = None, **props):
  """
  Return an element made by FACTORY with properties specified by PROPS.
  """
  elt = GS.element_factory_make(factory, name)
  elt.set_properties(**props)
  return elt

def dump_pipeline(pipe, indent = 0):
  done = {}
  q = []
  for e in pipe.iterate_sources():
    q = [e]
    while q:
      e, q = q[0], q[1:]
      if e in done:
        continue
      done[e] = True
      print
      print '%s%s %s' % ('  '*indent, type(e).__name__, e.get_name())
      for p in e.pads():
        c = p.get_negotiated_caps()
        peer = p.get_peer()
        print '%s  Pad %s %s (%s)' % \
              ('  '*(indent + 1),
               p.get_name(),
               peer and ('<-> %s.%s' % (peer.get_parent().get_name(),
                                        peer.get_name()))
                    or 'unconnected',
               c and c.to_string() or 'no-negotiated-caps')
        if peer:
          q.append(peer.get_parent())
        if isinstance(e, GS.Bin):
          dump_pipeline(e, indent + 1)

def run_pipe(pipe, what):
  """
  Run a GStreamer pipeline PIPE until it finishes.
  """
  loop = G.MainLoop()
  bus = pipe.get_bus()
  bus.add_signal_watch()
  def _bus_message(bus, msg):
    if msg.type == GS.MESSAGE_ERROR:
      SYS.stderr.write('error from pipeline: %s\n' % msg)
      SYS.exit(1)
    elif msg.type == GS.MESSAGE_STATE_CHANGED and \
         msg.src == pipe and \
         msg.structure['new-state'] == GS.STATE_PAUSED:
      dump_pipeline(pipe)
    elif msg.type == GS.MESSAGE_EOS:
      loop.quit()
  bus.connect('message', _bus_message)

  pipe.set_state(GS.STATE_PLAYING)
  loop.run()
  GS.DEBUG_BIN_TO_DOT_FILE(pipe, 3, what)
  pipe.set_state(GS.STATE_NULL)

###--------------------------------------------------------------------------
### Main program.

## Read the command line arguments.
input = SYS.argv[1]
caps = GS.caps_from_string(SYS.argv[2])
output = SYS.argv[3]

## We want a temporary place to keep things.  This provokes a warning, but
## `mkdir' is atomic and sane so it's not a worry.
tmp = OS.tmpnam()
OS.mkdir(tmp)
try:

  ## First step: produce raw PCM files from the original source and the
  ## requested encoders.
  q = 0
  temps = []
  for i in SYS.argv[4:]:
    temp = OS.path.join(tmp, '%d.raw' % q)
    temps.append(temp)
    pipe = GS.Pipeline()
    origin = make_element('filesrc', location = input)
    decode_1 = make_element('decodebin')
    convert_1 = make_element('audioconvert')
    encode = GS.parse_bin_from_description(i, True)
    decode_2 = make_element('decodebin')
    convert_2 = make_element('audioconvert')
    target = make_element('filesink', location = temp)
    pipe.add(origin, decode_1, convert_1, encode,
             decode_2, convert_2, target)
    origin.link(decode_1)
    link_on_demand(decode_1, convert_1)
    ##convert_1.link(encode, GS.caps_from_string('audio/x-raw-float, channels=2'))
    convert_1.link(encode)
    encode.link(decode_2)
    link_on_demand(decode_2, convert_2)
    convert_2.link(target, caps)

    run_pipe(pipe, 'input-%d' % q)
    del pipe
    print 'done %s' % i
    q += 1
  step = 1763520
  lens = [OS.stat(i).st_size for i in temps]
  blocks = (max(*lens) + step - 1)//step
  while True:
    seq = []
    done = {}
    for i in xrange(blocks):
      j = SR.randrange(q)
      done[j] = True
      seq.append(j)
    ok = True
    for i in xrange(q):
      if i not in done:
        ok = False
        break
    if ok:
      break
  ff = [open(i, 'rb') for i in temps]
  mix = OS.path.join(tmp, 'mix.raw')
  out = open(mix, 'wb')
  pos = 0
  for i in seq:
    f = ff[i]
    f.seek(pos)
    buf = f.read(step)
    out.write(buf)
    if len(buf) < step:
      break
    pos += step
  out.close()
  for f in ff:
    f.close()

  f = open(output + '.sequence', 'w')
  f.write(', '.join([str(i) for i in seq]) + '\n')
  f.close()

  pipe = GS.Pipeline()
  origin = make_element('filesrc', location = mix)
  convert = make_element('audioconvert')
  encode = make_element('flacenc', quality = 8)
  target = make_element('filesink', location = output)
  pipe.add(origin, convert, encode, target)
  origin.link(convert, caps)
  GS.element_link_many(convert, encode, target)

  run_pipe(pipe, 'output')
  del pipe
  print 'all done'
finally:
  SH.rmtree(tmp)
Commit	Line	Data
583b7e4a MW	1	#! /usr/bin/python
	2	###
	3	### A simple program for doing blind A/B audio comparisons
	4	###
	5	### (c) 2010 Mark Wooding
	6	###
	7
	8	###----- Licensing notice ---------------------------------------------------
	9	###
	10	### This program is free software; you can redistribute it and/or modify
	11	### it under the terms of the GNU General Public License as published by
	12	### the Free Software Foundation; either version 2 of the License, or
	13	### (at your option) any later version.
	14	###
	15	### This program is distributed in the hope that it will be useful,
	16	### but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	### GNU General Public License for more details.
	19	###
	20	### You should have received a copy of the GNU General Public License
	21	### along with this program; if not, write to the Free Software Foundation,
	22	### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	23
	24	###----- Usage --------------------------------------------------------------
	25	###
	26	### The command line syntax is:
	27	###
	28	### ab-chop INPUT CAPS OUTPUT PIPELINE...
	29	###
	30	### This means that we should read INPUT, decode it (using a GStreamer
	31	### `decodebin', so it should be able to handle most things you care to throw
	32	### at it), and then re-encode it according to each PIPELINE in turn, decode
	33	### /that/ again, and stash the resulting raw PCM data. When we've finished,
	34	### we line up the PCM data streams side-by-side, chop them into chunks, and
	35	### then stitch chunks from randomly chosen streams together to make a new
	36	### PCM stream. Finally, we encode that mixed-up stream as FLAC, and write
	37	### it to OUTPUT. It also writes a file OUTPUT.sequence which is a list of
	38	### numbers indicating which pipeline each chunk of the original came from.
	39	###
	40	### The motivation is that we want to test encoder quality. So you take a
	41	### reference source (as good as you can find), and use that as your INPUT.
	42	### You then write GStreamer pipeline fragments for the encoders you want to
	43	### compare; say `identity' if you want the unmodified original reference to
	44	### be mixed in.
	45	###
	46	### The only tricky bit is the CAPS, which is a GStreamer capabilities string
	47	### describing the raw PCM format to use as an intermediate representation.
	48	### (This is far too low-level and cumbersome for real use, but it's OK for
	49	### now.) You need to say something like
	50	###
	51	### audio/x-raw-int,width=16,rate=44100,channels=2,depth=16,
	52	### endianness=1234,signed=true
	53	###
	54	### for standard CD audio.
	55
	56	###--------------------------------------------------------------------------
	57	### External dependencies.
	58
	59	## Standard Python libraries.
	60	import sys as SYS
	61	import os as OS
	62	import shutil as SH
	63	import fnmatch as FN
	64	import random as R
65
66	SR = R.SystemRandom()
67
68	## GObject and GStreamer.
69	import gobject as G
70	import gst as GS
71
72	###--------------------------------------------------------------------------
73	### GStreamer utilities.
74
75	def link_on_demand(src, sink, sinkpad = None, cap = None):
76	"""
77	Link SINK to SRC when a pad appears.
78
79	More precisely, when SRC reports that a pad with media type matching the
80	`fnmatch' pattern CAP has appeared, link the pad of SINK named SINKPAD (or
81	some sensible pad by default).
82	"""
83	def _link(src, srcpad):
84	if cap is None or FN.fnmatchcase(srcpad.get_caps()[0].get_name(), cap):
85	src.link_pads(srcpad.get_name(), sink, sinkpad)
86	src.connect('pad-added', _link)
87
88	def make_element(factory, name = None, **props):
89	"""
90	Return an element made by FACTORY with properties specified by PROPS.
91	"""
92	elt = GS.element_factory_make(factory, name)
93	elt.set_properties(**props)
94	return elt
95
96	def dump_pipeline(pipe, indent = 0):
97	done = {}
98	q = []
99	for e in pipe.iterate_sources():
100	q = [e]
101	while q:
102	e, q = q[0], q[1:]
103	if e in done:
104	continue
105	done[e] = True
106	print
107	print '%s%s %s' % (' '*indent, type(e).__name__, e.get_name())
108	for p in e.pads():
109	c = p.get_negotiated_caps()
110	peer = p.get_peer()
111	print '%s Pad %s %s (%s)' % \
112	(' '*(indent + 1),
113	p.get_name(),
114	peer and ('<-> %s.%s' % (peer.get_parent().get_name(),
115	peer.get_name()))
116	or 'unconnected',
117	c and c.to_string() or 'no-negotiated-caps')
118	if peer:
119	q.append(peer.get_parent())
120	if isinstance(e, GS.Bin):
121	dump_pipeline(e, indent + 1)
122
123	def run_pipe(pipe, what):
124	"""
125	Run a GStreamer pipeline PIPE until it finishes.
126	"""
127	loop = G.MainLoop()
128	bus = pipe.get_bus()
129	bus.add_signal_watch()
130	def _bus_message(bus, msg):
131	if msg.type == GS.MESSAGE_ERROR:
132	SYS.stderr.write('error from pipeline: %s\n' % msg)
133	SYS.exit(1)
134	elif msg.type == GS.MESSAGE_STATE_CHANGED and \
135	msg.src == pipe and \
136	msg.structure['new-state'] == GS.STATE_PAUSED:
137	dump_pipeline(pipe)
138	elif msg.type == GS.MESSAGE_EOS:
139	loop.quit()
140	bus.connect('message', _bus_message)
141
142	pipe.set_state(GS.STATE_PLAYING)
143	loop.run()
144	GS.DEBUG_BIN_TO_DOT_FILE(pipe, 3, what)
145	pipe.set_state(GS.STATE_NULL)
146
147	###--------------------------------------------------------------------------
148	### Main program.
149
150	## Read the command line arguments.
151	input = SYS.argv[1]
152	caps = GS.caps_from_string(SYS.argv[2])
153	output = SYS.argv[3]
154
155	## We want a temporary place to keep things. This provokes a warning, but
156	## `mkdir' is atomic and sane so it's not a worry.
157	tmp = OS.tmpnam()
158	OS.mkdir(tmp)
159	try:
160
161	## First step: produce raw PCM files from the original source and the
162	## requested encoders.
163	q = 0
164	temps = []
165	for i in SYS.argv[4:]:
166	temp = OS.path.join(tmp, '%d.raw' % q)
167	temps.append(temp)
168	pipe = GS.Pipeline()
169	origin = make_element('filesrc', location = input)
170	decode_1 = make_element('decodebin')
171	convert_1 = make_element('audioconvert')
172	encode = GS.parse_bin_from_description(i, True)
173	decode_2 = make_element('decodebin')
174	convert_2 = make_element('audioconvert')
175	target = make_element('filesink', location = temp)
176	pipe.add(origin, decode_1, convert_1, encode,
177	decode_2, convert_2, target)
178	origin.link(decode_1)
179	link_on_demand(decode_1, convert_1)
180	##convert_1.link(encode, GS.caps_from_string('audio/x-raw-float, channels=2'))
181	convert_1.link(encode)
182	encode.link(decode_2)
183	link_on_demand(decode_2, convert_2)
184	convert_2.link(target, caps)
185
186	run_pipe(pipe, 'input-%d' % q)
187	del pipe
188	print 'done %s' % i
189	q += 1
190	step = 1763520
191	lens = [OS.stat(i).st_size for i in temps]
192	blocks = (max(*lens) + step - 1)//step
193	while True:
194	seq = []
195	done = {}
196	for i in xrange(blocks):
197	j = SR.randrange(q)
198	done[j] = True
199	seq.append(j)
200	ok = True
201	for i in xrange(q):
202	if i not in done:
203	ok = False
204	break
205	if ok:
206	break
207	ff = [open(i, 'rb') for i in temps]
208	mix = OS.path.join(tmp, 'mix.raw')
209	out = open(mix, 'wb')
210	pos = 0
211	for i in seq:
212	f = ff[i]
213	f.seek(pos)
214	buf = f.read(step)
215	out.write(buf)
216	if len(buf) < step:
217	break
218	pos += step
219	out.close()
220	for f in ff:
221	f.close()
222
223	f = open(output + '.sequence', 'w')
224	f.write(', '.join([str(i) for i in seq]) + '\n')
225	f.close()
226
227	pipe = GS.Pipeline()
228	origin = make_element('filesrc', location = mix)
229	convert = make_element('audioconvert')
230	encode = make_element('flacenc', quality = 8)
231	target = make_element('filesink', location = output)
232	pipe.add(origin, convert, encode, target)
233	origin.link(convert, caps)
234	GS.element_link_many(convert, encode, target)
235
236	run_pipe(pipe, 'output')
237	del pipe
238	print 'all done'
239	finally:
240	SH.rmtree(tmp)