#!/usr/bin/python

# helper program for getting information from yppedia

# This is part of the YARRG website.  YARRG is a tool and website
# for assisting players of Yohoho Puzzle Pirates.
#
# Copyright (C) 2009 Ian Jackson <ijackson@chiark.greenend.org.uk>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Yohoho and Puzzle Pirates are probably trademarks of Three Rings and
# are used without permission.  This program is not endorsed or
# sponsored by Three Rings.

copyright_info = '''
yppedia-ocean-scraper is part of ypp-sc-tools Copyright (C) 2009 Ian Jackson
This program comes with ABSOLUTELY NO WARRANTY; this is free software,
and you are welcome to redistribute it under certain conditions.  For
details, read the top of the yppedia-ocean-scraper file.
'''

import signal
signal.signal(signal.SIGINT, signal.SIG_DFL)

import sys
import os
import urllib
import re as regexp
import subprocess
from optparse import OptionParser
from BeautifulSoup import BeautifulSoup


# For fuck's sake!
import codecs
import locale
def fix_stdout():
    sys.stdout = codecs.EncodedFile(sys.stdout, locale.getpreferredencoding())
    def null_decode(input, errors='strict'):
        return input, len(input)
    sys.stdout.decode = null_decode
# From
#  http://ewx.livejournal.com/457086.html?thread=3016574
#  http://ewx.livejournal.com/457086.html?thread=3016574
# lightly modified.
# See also Debian #415968.
fix_stdout()


# User agent:
class YarrgURLopener(urllib.FancyURLopener):
	base_version= urllib.URLopener().version
	proc= subprocess.Popen(
		["./database-info-fetch", "useragentstringmap",
		 base_version, "manual islands/topology fetch"],
		shell=False,
		stderr=None,
		stdout=subprocess.PIPE,
		)
	version = proc.communicate()[0].rstrip('\n');
	assert(proc.returncode is not None and proc.returncode == 0)
urllib._urlopener = YarrgURLopener()

ocean = None
soup = None
opts = None
arches = {}

def debug(k,v):
	if opts.debug:
		print >>sys.stderr, k,`v`

def fetch():
	global soup
	if opts.chart:
		url_base = 'index.php?title=Template:Map:%s_Ocean&action=edit'
	else:
		url_base = '%s_Ocean'
	url_base = url_base % urllib.quote(ocean,'')
	if opts.localhtml is None:
		url = ('http://yppedia.puzzlepirates.com/' + url_base)
		debug('fetching',url)
		dataf = urllib.urlopen(url)
		debug('fetched',dataf)
	else:
		dataf = file(opts.localhtml + '/' + url_base, 'r')
	soup = BeautifulSoup(dataf)

title_arch_re = regexp.compile('(\\S.*\\S) Archipelago \\((\\S+)\\)$')
title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)(?: \(page does not exist\))?$')
href_img_re = regexp.compile('\\.png$')

def title_arch_info(t):
	# returns (arch,ocean)
	debug('checking',t)
	if t is None: return (None,None)
	m = title_arch_re.match(t)
	if not m: return (None,None)
	return m.groups()

def title_arch_ok(t):
	(a,o) = title_arch_info(t)
	if o is None: return False
	return o == ocean

def parse_chart():
	ta = soup.find('textarea')
	debug('ta',ta)
	s = ta.string
	debug('s',s)
	s = regexp.sub(r'\&lt\;', '<', s)
	s = regexp.sub(r'\&gt\;', '>', s)
	s = regexp.sub(r'\&quot\;', '"', s)
	s = regexp.sub(r'\&amp\;', '&', s)
	debug('s',s)
	return s

def parse_ocean():
	content = soup.find('div', attrs = {'id': 'content'})

	def findall_title_arch_ok(t):
		return t.findAll('a', attrs = {'title': title_arch_ok})

	def is_archestable(u):
		if u.name != 'table': return False
		return len(findall_title_arch_ok(u)) > 1

	archestable = content.findChild('table', attrs={'border':'1'})
	debug('at',archestable)

	archsoups = []
	for row in archestable.findAll('tr',recursive=False):
		archsoups += row.findAll('td',recursive=False)
	debug('ac',archsoups)

	def is_island(v):
		return len(v.findAll(text = regexp.compile('.*Large'))) > 0
	def arch_up_map(u):
		return u.findParent(is_island)

	for arch in archsoups:
		links = arch.findAll('a', href=True)
		debug('links',links)
		if not links: continue
		(a,o) = title_arch_info(links[0]['title'])
		debug('arch-ocean', (a,o))
		assert(o == ocean)
		assert(a not in arches)
		isles = []
		for link in links[1:]:
			debug('link',link)
			if href_img_re.search(link['href']): continue
			m = title_any_re.match(link['title'])
			assert(m.group(2) == ocean)
			island = m.group(1)
			debug('island', island)
			isles.append(island)
		isles.sort()
		arches[a] = isles

def output():
	print 'ocean',ocean
	al = arches.keys()
	al.sort()
	for a in al:
		print '',a
		for island in arches[a]:
			print ' ',island

def main():
	global ocean
	global opts

	pa = OptionParser(
'''usage: .../yppedia-ocean-scraper [--debug] [--chart] OCEAN''')
	ao = pa.add_option

	ao('--chart', action='store_true', dest='chart',
		help='print chart source rather than arch/island info')
	ao('--debug', action='count', dest='debug', default=0,
		help='enable debugging output')
	ao('--local-html-dir', action='store', dest='localhtml',
		help='get yppedia pages from local directory LOCALHTML'+
			' instead of via HTTP')

	(opts,args) = pa.parse_args()
	if len(args) != 1:
		print >>sys.stderr, copyright_info
		pa.error('need an ocean argument')
	ocean = args[0]

	fetch()
	if opts.chart:
		print parse_chart()
	else:
		parse_ocean()
		output()

main()
