From: Ian Jackson Date: Mon, 31 Aug 2009 15:34:48 +0000 (+0100) Subject: Ocean scraper seems to work properly now X-Git-Tag: 3.4~60 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.web-live.git;a=commitdiff_plain;h=cf9d0277c5d6bfe900a0881a00984f8268a90514;hp=cf91713ebb03e007c03dc70867c73db3a1a754a9 Ocean scraper seems to work properly now --- diff --git a/yarrg/yppedia-ocean-scraper b/yarrg/yppedia-ocean-scraper index d53e236..401de9b 100755 --- a/yarrg/yppedia-ocean-scraper +++ b/yarrg/yppedia-ocean-scraper @@ -1,28 +1,63 @@ #!/usr/bin/python +# helper program for getting information from yppedia + +# This is part of ypp-sc-tools, a set of third-party tools for assisting +# players of Yohoho Puzzle Pirates. +# +# Copyright (C) 2009 Ian Jackson +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Yohoho and Puzzle Pirates are probably trademarks of Three Rings and +# are used without permission. This program is not endorsed or +# sponsored by Three Rings. + +copyright_info = ''' +yppedia-ocean-scraper is part of ypp-sc-tools Copyright (C) 2009 Ian Jackson +This program comes with ABSOLUTELY NO WARRANTY; this is free software, +and you are welcome to redistribute it under certain conditions. For +details, read the top of the yppedia-ocean-scraper file. +''' + import signal signal.signal(signal.SIGINT, signal.SIG_DFL) +import sys import os import urllib import urllib2 import re as regexp -#from optparse import OptionParser - +from optparse import OptionParser from BeautifulSoup import BeautifulSoup -ocean = 'Opal' +ocean = None soup = None +opts = None +arches = {} def debug(k,v): -# print k,`v` - pass + if opts.debug: + print >>sys.stderr, k,`v` def fetch(): global soup url = ('http://yppedia.puzzlepirates.com/%s_Ocean' % urllib.quote(ocean,'')) + debug('fetching',url) dataf = urllib2.urlopen(url) + debug('fetched',dataf) soup = BeautifulSoup(dataf) @@ -57,32 +92,63 @@ def parse(): archestable = firstarch.findParent('table', attrs={'border':'1'}) debug('at',archestable) - arches = [] + archsoups = [] for row in archestable.findAll('tr',recursive=False): - arches += row.findAll('td',recursive=False) - debug('ac',arches) + archsoups += row.findAll('td',recursive=False) + debug('ac',archsoups) def is_island(v): return len(v.findAll(text = regexp.compile('.*Large'))) > 0 def arch_up_map(u): return u.findParent(is_island) - for arch in arches: + for arch in archsoups: links = arch.findAll('a', href=True) debug('links',links) if not links: continue (a,o) = title_arch_info(links[0]['title']) assert(o == ocean) - print 'arch', a + debug('arch', a) + assert(a not in arches) + isles = [] for link in links[1:]: debug('link',link) if href_img_re.search(link['href']): continue m = title_any_re.match(link['title']) assert(m.group(2) == ocean) - print 'island', m.group(1) + island = m.group(1) + debug('island', island) + isles.append(island) + isles.sort() + arches[a] = isles + +def output(): + print 'ocean',ocean + al = arches.keys() + al.sort() + for a in al: + print '',a + for island in arches[a]: + print ' ',island def main(): + global ocean + global opts + + pa = OptionParser( + '''usage: .../yppedia-ocean-scraper [--debug] OCEAN''') + ao = pa.add_option + ao('--debug', action='count', dest='debug', default=0, + help='enable debugging output') + + (opts,args) = pa.parse_args() + if len(args) != 1: + print >>sys.stderr, copyright_info + pa.error('need an ocean argument') + ocean = args[0] + fetch() parse() + output() main()