From ff016739a385102c8e11e80757d5404745bd9a39 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Mon, 31 Aug 2009 17:23:12 +0100 Subject: [PATCH] ocean scraper can fetch charts --- yarrg/yppedia-ocean-scraper | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/yarrg/yppedia-ocean-scraper b/yarrg/yppedia-ocean-scraper index 9105f19..5f9c5f0 100755 --- a/yarrg/yppedia-ocean-scraper +++ b/yarrg/yppedia-ocean-scraper @@ -53,12 +53,17 @@ def debug(k,v): def fetch(): global soup - url = ('http://yppedia.puzzlepirates.com/%s_Ocean' % - urllib.quote(ocean,'')) + if opts.chart: + url_base = 'index.php?title=Template:Map:%s_Ocean&action=edit' + else: + url_base = '%s_Ocean' + url = ('http://yppedia.puzzlepirates.com/' + + (url_base % urllib.quote(ocean,''))) debug('fetching',url) dataf = urllib2.urlopen(url) debug('fetched',dataf) - soup = BeautifulSoup(dataf) + soup = BeautifulSoup(dataf, + convertEntities=BeautifulSoup.HTML_ENTITIES) title_arch_re = regexp.compile('(\\S.*\\S) Archipelago \\((\\S+)\\)$') @@ -78,7 +83,14 @@ def title_arch_ok(t): if o is None: return False return o == ocean -def parse(): +def parse_chart(): + ta = soup.find('textarea') + debug('ta',ta) + rc = ta.renderContents() + debug('rc',rc) + return rc + +def parse_ocean(): firstarch = soup.find('a', attrs = {'title': title_arch_ok}) debug('fa',firstarch) @@ -136,8 +148,11 @@ def main(): global opts pa = OptionParser( - '''usage: .../yppedia-ocean-scraper [--debug] OCEAN''') +'''usage: .../yppedia-ocean-scraper [--debug] [--chart] OCEAN''') ao = pa.add_option + + ao('--chart', action='store_true', dest='chart', + help='print chart source rather than arch/island info') ao('--debug', action='count', dest='debug', default=0, help='enable debugging output') @@ -148,7 +163,10 @@ def main(): ocean = args[0] fetch() - parse() - output() + if opts.chart: + print parse_chart() + else: + parse_ocean() + output() main() -- 2.30.2