From ed20d2bb8e1c89b328cdb467b7e8f8b8b3cdf61a Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Sun, 16 Jan 2011 13:25:45 +0000 Subject: [PATCH] yoweb-scrape: wip new flag and ocean functionality --- yoweb-scrape | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 136 insertions(+), 2 deletions(-) diff --git a/yoweb-scrape b/yoweb-scrape index a705f67..9b87ea3 100755 --- a/yoweb-scrape +++ b/yoweb-scrape @@ -44,6 +44,7 @@ import random import curses import termios import random +import subprocess from optparse import OptionParser from StringIO import StringIO @@ -98,6 +99,14 @@ def format_time_interval(ti): if ti < 86400: return '%dh' % (ti / 3600) return '%dd' % (ti / 86400) +def yppsc_dir(): + lib = os.getenv("YPPSC_YARRG_SRCBASE") + if lib is not None: return lib + lib = sys.argv[0] + lib = regexp.sub('/[^/]+$', '', lib) + os.environ["YPPSC_YARRG_SRCBASE"] = lib + return lib + #---------- caching and rate-limiting data fetcher ---------- class Fetcher: @@ -377,6 +386,110 @@ class CrewInfo(SomethingSoupInfo): def __str__(self): return `(self.crew, self.msgs)` +class FlagInfo(SomethingSoupInfo): + def __init__(self, flagid, max_age=600): + SomethingSoupInfo.__init__(self, + 'flag/info.wm?flagid=', flagid, max_age) + self._find_flag() + + def _find_flag(self): + font2 = self._soup.find('font',{'size':'+2'}) + self.flag = font2.find('b').contents[0] + magnate = self._soup.find('img',{'src': + '/yoweb/images/repute-MAGNATE.png'}) + warinfo = (magnate.findParent('table').findParent('tr'). + findNextSibling('tr').findNext('td',{'align':'left'})) + for waritem in warinfo.contents: + print 'ITEM ',`waritem` + +#---------- scraper for ocean info incl. embargoes etc. ---------- + +class IslandInfo(): + def __init__(self, ocean, islename): + self.ocean = ocean + self.name = islename + def collect(self): + pass + def yppedia_dataf(self): + def q(x): return urllib.quote(x.replace(' ','_')) + url_rhs = q(self.name) + '_(' + q(self.ocean) + ')' + if opts.localhtml is None: + url = 'http://yppedia.puzzlepirates.com/' + url_rhs + debug('IslandInfo retrieving YPP '+url); + return urllib.urlopen(url) + else: + return file(opts.localhtml + '/' + url_rhs, 'r') + def yoweb_url(self): + soup = BeautifulSoup(self.yppedia_dataf()) + content = soup.find('div', attrs = {'id': 'content'}) + yoweb_re = regexp.compile('^http://\w+\.puzzlepirates\.com/'+ + 'yoweb/island/info\.wm\?islandid=\d+$') + a = soup.find('a', attrs = { 'href': yoweb_re }) + if a is None: return None + return a['href'] + def ruling_flag_id(self): + yo = self.yoweb_url() + if yo is None: return None + dataf = fetcher.fetch(yo, 600) + soup = BeautifulSoup(dataf) + ruler_re = regexp.compile('http://\w+\.puzzlepirates\.com/'+ + 'yoweb/flag/info\.wm\?flagid=(\d+)$') + ruler = soup.find('a', attrs = { 'href': ruler_re }) + if not ruler: return None + m = ruler_re.find(ruler['href']) + return m.group(1) + +class OceanInfo(): + # Public data attributes (valid after collect()): + # oi.islands[islename] = IslandInfo(...) + # oi.arches[archname][islename] = IslandInfo(...) + def __init__(self): + self.isleclass = IslandInfo + self.ocean = fetcher.ocean.lower().capitalize() + def collect(self): + cmdl = ['./yppedia-ocean-scraper'] + if opts.localhtml is not None: + cmdl += ['--local-html-dir',opts.localhtml] + cmdl += [self.ocean] + debug('OceanInfo collect running ' + `cmdl`) + oscraper = subprocess.Popen( + cmdl, + stdout = subprocess.PIPE, + cwd = yppsc_dir()+'/yarrg', + shell=False, stderr=None, + ) + h = oscraper.stdout.readline() + debug('OceanInfo collect h '+`h`) + assert(regexp.match('^ocean ', h)) + arch_re = regexp.compile('^ (\S.*)') + island_re = regexp.compile('^ (\S.*)') + + self.islands = { } + self.arches = { } + archname = None + + for l in oscraper.stdout: + debug('OceanInfo collect l '+`l`) + l = l.rstrip('\n') + m = island_re.match(l) + if m: + assert(archname is not None) + islename = m.group(1) + isle = self.isleclass(self.ocean, islename) + isle.arch = archname + self.islands[islename] = isle + self.arches[archname][islename] = isle + continue + m = arch_re.match(l) + if m: + archname = m.group(1) + assert(archname not in self.arches) + self.arches[archname] = { } + continue + assert(False) + oscraper.wait() + assert(oscraper.returncode == 0) + #---------- pretty-printer for tables of pirate puzzle standings ---------- class StandingsTable: @@ -1036,6 +1149,14 @@ def do_crew_of(args, bu): ci = prep_crew_of(args, bu) print ci +def do_flag_of(args, bu): + if len(args) != 1: bu('flag-of takes one pirate name') + max_age = 300 + pi = PirateInfo(args[0], max_age) + if pi.flag is None: fi = None + else: fi = FlagInfo(pi.flag[0], max_age) + print `fi` + def do_standings_crew_of(args, bu): ci = prep_crew_of(args, bu, 60) tab = StandingsTable(sys.stdout) @@ -1048,6 +1169,18 @@ def do_standings_crew_of(args, bu): pi = PirateInfo(p, random.randint(900,1800)) tab.pirate(pi) +def do_ocean(args, bu): + if (len(args)): bu('ocean takes no further arguments') + fetcher.default_ocean() + oi = OceanInfo() + oi.collect() + for islename in sorted(oi.islands.keys()): + isle = oi.islands[islename] + yoweb_url = isle.yoweb_url() + print " %s -- %s" % (islename, yoweb_url) + +#----- modes which use the chat log parser are quite complex ----- + class ProgressPrintPercentage: def __init__(self, f=sys.stdout): self._f = f @@ -1063,8 +1196,6 @@ class ProgressPrintPercentage: self._f.write(' \r') self._f.flush() -#----- modes which use the chat log parser are quite complex ----- - def prep_chat_log(args, bu, progress=ProgressPrintPercentage(), max_myself_age=3600): @@ -1398,6 +1529,9 @@ display modes (for --display) apply to ship-aid: ao('--display', action='store', dest='display', type='choice', choices=['dumb','overwrite'], help='how to display ship aid') + ao('--local-ypp-dir', action='store', dest='localhtml', + help='get yppedia pages from local directory LOCALHTML'+ + ' instead of via HTTP') ao_jt = lambda wh, t: ao( '--timeout-sa-'+wh, action='store', dest='timeout_'+wh, -- 2.30.2