#! /bin/bash

set -e
: ${JBDIR=/mnt/jb}

###--------------------------------------------------------------------------
### CD identification algorithms.
###
### 1. CDDB
###
### CCLLLLNN [NTRACK TRACK-START... LENGTH]
###
### CC is a checksum of the track start times; LLLL is the offset of the
### leadout track, in seconds (rounded down), and NN is the total number of
### tracks.  All of these are in hexadecimal, and include the 150-frame (2
### second) pre-gap.  All of these are in hexadecimal.  Since a CD can have
### at most 99 tracks, and can contain no more than 90 minutes of audio (!),
### the other two items fit without needing reduction.
###
### The checksum is the sum of the decimal digits of the track start times,
### in seconds, reduced modulo 255.
###
### NTRACK is the number of tracks; LENGTH is the offset of the leadout in
### seconds.  These are the same as in the checksum, so repeating them is
### pointless, but it's done anyway.  The TRACK-STARTs are the track start
### offsets, in frames.
###
###
### 2. AccurateRip
###
### DA1-DA2-CDDBID
###
### CDDBID is the CDDB id as described above.  DA1 is simply the sum of the
### track starts, including the lead-out track; DA2 is the sum of the
### products TRACKNO * OFFSET for the audio tracks only, but including the
### final lead-out -- so a data track makes the last audio track look very
### long.  Another wrinkle: the OFFSET for the first track is forced to 1 if
### it's zero (to avoid the entry being lost, I presume, though I'm not sure
### why this is ever so useful).
###
###
### 3. MusicBrainz
###
### The MusicBrainz identification is a base64-encoded SHA-1 hash of the
### table of contents.  The base64 encoding uses `.', `_' and `-' in place of
### `+', `/' and `=', because the standard characters /all/ have special
### meanings in URL query strings.  (Duh.  And I'm not quite sure why we
### still need the trailing marker.)
###
### The message to be hashed is FIRST LAST LENGTH TRACK-START..., where FIRST
### and LAST are the first and last track numbers, LENGTH is the offset of
### the lead-out, in frames, and the TRACK-STARTs are the start offsets of
### the tracks, in order, also in frames.  The track numbers are two
### uppercase hex digits; the frame offsets are eight.  All of these are
### simply concatenated together.
###
### MusicBrainz only concerns itself with the audio tracks.  If there's a
### data track, then we ignore it, and the lead-out is considered to be 11400
### frames before the data track.

###--------------------------------------------------------------------------
### Command line.

format=cddb
while getopts "acCm" opt; do
  case "$opt" in
    a) format=accuraterip ;;
    c) format=cddb ;;
    C) format=cddb-tracks ;;
    m) format=musicbrainz ;;
    *) exit 1 ;;
  esac
done
shift $((OPTIND - 1))

case $# in
  0)
    ;;
  1)
    if [ -r "$1/.discid" ]; then
      exec <"$1/.discid"
    else
      exec < <($JBDIR/bin/flaccrip-toc "$1")
    fi
    ;;
  *)
    echo >&2 "Usage: $0 [-acCm] [DIRECTORY]"
    exit 1
    ;;
esac

###--------------------------------------------------------------------------
### Main work.

## Initial setup.
cddbck=0
cddbtracks=""
nt=0 nat=0
da=0 db=0
mbtracks=""

## Wander through the table of contents picking up unconsidered trifles.
while read type offset; do

  ## Bump the track numbers here.  Most things want 1-based numbering, so
  ## this is right.  Don't bump for the end marker.  Those who care
  ## (AccurateRip) will sort it out for themselves.
  case "$type" in
    T) nt=$((nt + 1)) nat=$((nat + 1));;
    D) nt=$((nt + 1)) ;;
  esac

  ## Update the CDDB state.  This is common to several formats.
  case "$type" in
    [TD])
      o=$((offset + 150))
      s=$((o/75))
      cddbtracks="${cddbtracks:+$cddbtracks }$o"
      while :; do
	case "$s" in
	  ?*) cddbck=$((cddbck + ${s:0:1})); s=${s#?} ;;
	  *) break ;;
	esac
      done
      ;;
    E)
      final=$offset
      ;;
  esac

  ## Update other bits of information.
  case "$type" in
    T)
      da=$((da + offset))
      db=$((db + nat*(offset > 0 ? offset : 1)))
      mbtracks="$mbtracks$(printf "%08X" $((offset + 150)))"
      ;;
    D)
      mbfinal=$((offset - 11250))
      ;;
    E)
      da=$((da + offset))
      db=$((db + (nat + 1)*(offset > 0 ? offset : 1)))
      case "${mbfinal+t}" in
	t) ;;
	*) mbfinal=$((offset + 150)) ;;
      esac
      ;;
  esac
done

## Sort out the CDDB id.
cddbid=$(printf "%02x%04x%02x" $((cddbck%255)) $((final/75)) $nt)

###--------------------------------------------------------------------------
### Produce the answer.

case "$format" in
  cddb)
    echo "$cddbid"
    ;;
  cddb-tracks)
    echo "$cddbid $nt $cddbtracks $((final/75 + 2))"
    ;;
  accuraterip)
    printf "%03d-%08x-%08x-%s\n" $nat $da $db $cddbid
    ;;
  musicbrainz)
    mb=$(printf "%02X%02X%08X%s" 1 $nat $mbfinal $mbtracks)
    for ((i = nat; i < 99; i++)); do
      mb="${mb}00000000"
    done
    printf "%s" $mb |
      openssl dgst -sha1 -binary |
      openssl base64 | tr '+/=' '._-'
    ;;
esac