First get the appropriate index.
9706 wget http://www.thebricktestament.com/joshua/index.html
Divide it up into chunks
9715 csplit index.html '/.*
/' '{*}'
Find the ones that are junk rather than content
9716 ls
9717 less xx00
9718 rm xx00
9719 less xx01
9720 rm xx01
...
9726 rm xx0{2,3,4,5}
9727 ls
9728 less xx{0{6,7,8,9},1{0,1,2,3,4,5,6,7,8,9},2{0,1,2}}
9729 rm xx2{0,1,2}
Edit out any other junk from remainder
9730 zed xx19
9731 zed xx13
9732 zed xx12
Delete index.txt
9734 rm index.html
Demunge dos2unix
9736 perl -pi.bak -e 's/
//g' *
9740 rm *.bak
Change relative anchors into absolute ones
9741 grep 'a href=' *
9742 perl -pi.bak -e 's-a href="-a href="http://www.thebricktestament.com/joshua/-g' *
9744 rm *.bak
Change the ratings images to point to the ones on chiark
9752 grep -h rating * | sort | uniq
9755 perl -pi.bak -e 's=../website_images/parchment_bkg/rating=http://www.chiark.greenend.org.uk/~jdamery/bt-thumbs/rating=g' *
9757 rm *.bak
Watching out for ones which don't match the common scheme
9766 zed xx12
Get thumbnails and rename them so they've got unique leafnames
9772 for x in $(perl -ne 'm/img src="([^"]*)"/; print "$1\n";' xx* | grep -v http | sort| uniq); do y=$(echo $x | sed 's-/-_-'); wget -O $y http://www.thebricktestament.com/joshua/"$x"; done
Edit the image URLs in the fragments
9775 perl -pi.bak -e 's#" > $(( $x + 126 )).html.frag; cat xx$(( $x + 5 )) >> $(( $x + 126 )).html.frag; echo "" >> $(( $x + 126 )).html.frag; done
9785 for x in $(seq 1 4); do echo "" > $(( $x + 126 )).html.frag; cat xx0$(( $x + 5 )) >> $(( $x + 126 )).html.frag; echo "
" >> $(( $x + 126 )).html.frag; done
Put the fragments in the hopper.
9789 cp *.frag ~/var/lib/bricktestament/frags/