X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-live.git;a=blobdiff_plain;f=yarrg%2Fyppedia-ocean-scraper;h=5f9c5f007b46fda4265ccc7492dc2b3d58e079f0;hp=d53e2365da98491bf1b6204a131295cf4fef3d5c;hb=9df0d91b280265faa669aea7d1501787eeb932da;hpb=cf91713ebb03e007c03dc70867c73db3a1a754a9 diff --git a/yarrg/yppedia-ocean-scraper b/yarrg/yppedia-ocean-scraper index d53e236..5f9c5f0 100755 --- a/yarrg/yppedia-ocean-scraper +++ b/yarrg/yppedia-ocean-scraper @@ -1,32 +1,72 @@ #!/usr/bin/python +# helper program for getting information from yppedia + +# This is part of ypp-sc-tools, a set of third-party tools for assisting +# players of Yohoho Puzzle Pirates. +# +# Copyright (C) 2009 Ian Jackson +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Yohoho and Puzzle Pirates are probably trademarks of Three Rings and +# are used without permission. This program is not endorsed or +# sponsored by Three Rings. + +copyright_info = ''' +yppedia-ocean-scraper is part of ypp-sc-tools Copyright (C) 2009 Ian Jackson +This program comes with ABSOLUTELY NO WARRANTY; this is free software, +and you are welcome to redistribute it under certain conditions. For +details, read the top of the yppedia-ocean-scraper file. +''' + import signal signal.signal(signal.SIGINT, signal.SIG_DFL) +import sys import os import urllib import urllib2 import re as regexp -#from optparse import OptionParser - +from optparse import OptionParser from BeautifulSoup import BeautifulSoup -ocean = 'Opal' +ocean = None soup = None +opts = None +arches = {} def debug(k,v): -# print k,`v` - pass + if opts.debug: + print >>sys.stderr, k,`v` def fetch(): global soup - url = ('http://yppedia.puzzlepirates.com/%s_Ocean' % - urllib.quote(ocean,'')) + if opts.chart: + url_base = 'index.php?title=Template:Map:%s_Ocean&action=edit' + else: + url_base = '%s_Ocean' + url = ('http://yppedia.puzzlepirates.com/' + + (url_base % urllib.quote(ocean,''))) + debug('fetching',url) dataf = urllib2.urlopen(url) - soup = BeautifulSoup(dataf) + debug('fetched',dataf) + soup = BeautifulSoup(dataf, + convertEntities=BeautifulSoup.HTML_ENTITIES) -title_arch_re = regexp.compile('(\\S+) Archipelago \\((\\S+)\\)$') +title_arch_re = regexp.compile('(\\S.*\\S) Archipelago \\((\\S+)\\)$') title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)$') href_img_re = regexp.compile('\\.png$') @@ -43,7 +83,14 @@ def title_arch_ok(t): if o is None: return False return o == ocean -def parse(): +def parse_chart(): + ta = soup.find('textarea') + debug('ta',ta) + rc = ta.renderContents() + debug('rc',rc) + return rc + +def parse_ocean(): firstarch = soup.find('a', attrs = {'title': title_arch_ok}) debug('fa',firstarch) @@ -57,32 +104,69 @@ def parse(): archestable = firstarch.findParent('table', attrs={'border':'1'}) debug('at',archestable) - arches = [] + archsoups = [] for row in archestable.findAll('tr',recursive=False): - arches += row.findAll('td',recursive=False) - debug('ac',arches) + archsoups += row.findAll('td',recursive=False) + debug('ac',archsoups) def is_island(v): return len(v.findAll(text = regexp.compile('.*Large'))) > 0 def arch_up_map(u): return u.findParent(is_island) - for arch in arches: + for arch in archsoups: links = arch.findAll('a', href=True) debug('links',links) if not links: continue (a,o) = title_arch_info(links[0]['title']) + debug('arch-ocean', (a,o)) assert(o == ocean) - print 'arch', a + assert(a not in arches) + isles = [] for link in links[1:]: debug('link',link) if href_img_re.search(link['href']): continue m = title_any_re.match(link['title']) assert(m.group(2) == ocean) - print 'island', m.group(1) + island = m.group(1) + debug('island', island) + isles.append(island) + isles.sort() + arches[a] = isles + +def output(): + print 'ocean',ocean + al = arches.keys() + al.sort() + for a in al: + print '',a + for island in arches[a]: + print ' ',island def main(): + global ocean + global opts + + pa = OptionParser( +'''usage: .../yppedia-ocean-scraper [--debug] [--chart] OCEAN''') + ao = pa.add_option + + ao('--chart', action='store_true', dest='chart', + help='print chart source rather than arch/island info') + ao('--debug', action='count', dest='debug', default=0, + help='enable debugging output') + + (opts,args) = pa.parse_args() + if len(args) != 1: + print >>sys.stderr, copyright_info + pa.error('need an ocean argument') + ocean = args[0] + fetch() - parse() + if opts.chart: + print parse_chart() + else: + parse_ocean() + output() main()