#---------- caching and rate-limiting data fetcher ----------
class Fetcher:
- def __init__(self, ocean, cachedir):
+ def __init__(self, cachedir):
debug('Fetcher init %s' % cachedir)
- self.ocean = ocean
self.cachedir = cachedir
try: os.mkdir(cachedir)
except (OSError,IOError), oe:
if oe.errno != errno.EEXIST: raise
self._cache_scan(time.time())
- def default_ocean(self, ocean='ice'):
- if self.ocean is None:
- self.ocean = ocean
-
def _cache_scan(self, now):
# returns list of ages, unsorted
ages = []
debug('Fetcher stored')
return data
+class Yoweb(Fetcher):
+ def __init__(self, ocean, cachedir):
+ debug('Yoweb init %s' % cachedir)
+ self.ocean = ocean
+ Fetcher.__init__(self, cachedir)
+
+ def default_ocean(self, ocean='ice'):
+ if self.ocean is None:
+ self.ocean = ocean
+
def yoweb(self, kind, tail, max_age):
self.default_ocean()
+ assert(self.ocean)
url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
self.ocean, kind, tail)
return self.fetch(url, max_age)
+class Yppedia(Fetcher):
+ def __init__(self, cachedir):
+ debug('Yoweb init %s' % cachedir)
+ self.base = 'http://yppedia.puzzlepirates.com/'
+ self.localhtml = opts.localhtml
+ Fetcher.__init__(self, cachedir)
+
+ def __call__(self, rhs):
+ if self.localhtml is None:
+ url = self.base + rhs
+ debug('Yppedia retrieving YPP '+url);
+ return self.fetch(url, 3000)
+ else:
+ return file(opts.localhtml + '/' + rhs, 'r')
+
#---------- logging assistance for troubled screenscrapers ----------
class SoupLog:
return `(self.crew, self.msgs)`
class FlagInfo(SomethingSoupInfo):
+ # Public data members (after init):
+ #
+ # name # string
+ #
+ # relations[n] = (otherflagname, otherflagid, [stringfromyoweb],
+ # thisdeclaring, otherdeclaringmin, otherdeclaringmax)
+ # # where {this,other}declaring{,min,max} are:
+ # # -1 {this,other} is declaring war
+ # # 0 {this,other} is not doing either
+ # # +1 {this,other} is allying
+ # relation_byname[otherflagname] = relations[some_n]
+ # relation_byid[otherflagname] = relations[some_n]
+ #
+ # islands[n] = (islandname, islandid)
+ #
def __init__(self, flagid, max_age=600):
SomethingSoupInfo.__init__(self,
'flag/info.wm?flagid=', flagid, max_age)
def _find_flag(self):
font2 = self._soup.find('font',{'size':'+2'})
- self.flag = font2.find('b').contents[0]
+ self.name = font2.find('b').contents[0]
+
+ self.relations = [ ]
+ self.relation_byname = { }
+ self.relation_byid = { }
+ self.islands = [ ]
+
magnate = self._soup.find('img',{'src':
'/yoweb/images/repute-MAGNATE.png'})
warinfo = (magnate.findParent('table').findParent('tr').
findNextSibling('tr').findNext('td',{'align':'left'}))
- for waritem in warinfo.contents:
- print 'ITEM ',`waritem`
+
+ def warn(m):
+ print >>sys.stderr, 'WARNING: '+m
+
+ def wi_warn(head, waritem):
+ warn('unknown warmap item: %s: %s' %
+ (`head`, ``waritem``))
+
+ def wihelp_item(waritem, thing):
+ url = waritem.get('href', None)
+ if url is None:
+ return ('no url for '+thing,None,None)
+ m = regexp.search('\?'+thing+'id=(\d+)$', url)
+ if not m: return ('no '+thing+'id',None,None)
+ tid = m.group(1)
+ tname = waritem.string
+ if tname is None:
+ return (thing+' name not just string',None,None)
+ return (None,tid,tname)
+
+ def wi_alwar(head, waritem, thisdecl, othermin, othermax):
+ (err,flagid,flagname) = wihelp_item(waritem,'flag')
+ if err: return err
+ rel = self.relation_byid.get(flagid, None)
+ if rel: return 'flag id twice!'
+ if flagname in self.relation_byname:
+ return 'flag name twice!'
+ rel = (flagname,flagid,head, thisdecl,othermin,othermax)
+ self.relations.append(rel)
+ self.relation_byid[flagid] = rel
+ self.relation_byname[flagid] = rel
+
+ def wi_isle(head, waritem):
+ (err,isleid,islename) = wihelp_item(waritem,'island')
+ if err: return err
+ self.islands.append((isleid,islename))
+
+ warmap = {
+ 'Allied with': (wi_alwar,+1,+1,+1),
+ 'Declaring war against': (wi_alwar,-1, 0,+1),
+ 'At war with': (wi_alwar,-1,-1,-1),
+ 'Trying to form an alliance with': (wi_alwar,+1,-1,0),
+ 'Islands controlled by this flag': (wi_isle,),
+ }
+
+ how = (wi_warn, None)
+
+ for waritem in warinfo.findAll(['font','a']):
+ if waritem is None: break
+ if waritem.name == 'font':
+ colour = waritem.get('color',None)
+ if colour.lstrip('#') != '958A5F':
+ warn('strange colour %s in %s' %
+ (colour,``waritem``))
+ continue
+ head = waritem.string
+ if head is None:
+ warn('no head string in '+``waritem``)
+ continue
+ head = regexp.sub('\\s+', ' ', head).strip()
+ head = head.rstrip(':')
+ how = (head,) + warmap.get(head, (wi_warn,))
+ continue
+ assert(waritem.name == 'a')
+
+ debug('WARHOW %s(%s, waritem, *%s)' %
+ (how[1], `how[0]`, `how[2:]`))
+ bad = how[1](how[0], waritem, *how[2:])
+ if bad:
+ warn('bad waritem %s: %s: %s' % (`how[0]`,
+ bad, ``waritem``))
+
+ def __str__(self):
+ return `(self.name, self.islands, self.relations)`
#---------- scraper for ocean info incl. embargoes etc. ----------
-class IslandInfo():
+class IslandBasicInfo():
+ # Public data attributes:
+ # ocean
+ # name
+ # Public data attributes maybe set by caller:
+ # arch
def __init__(self, ocean, islename):
self.ocean = ocean
self.name = islename
- def collect(self):
- pass
- def yppedia_dataf(self):
+ def yppedia(self):
def q(x): return urllib.quote(x.replace(' ','_'))
url_rhs = q(self.name) + '_(' + q(self.ocean) + ')'
- if opts.localhtml is None:
- url = 'http://yppedia.puzzlepirates.com/' + url_rhs
- debug('IslandInfo retrieving YPP '+url);
- return urllib.urlopen(url)
- else:
- return file(opts.localhtml + '/' + url_rhs, 'r')
- def yoweb_url(self):
- soup = BeautifulSoup(self.yppedia_dataf())
+ return yppedia(url_rhs)
+ def __str__(self):
+ return `(self.ocean, self.name)`
+
+class IslandExtendedInfo(IslandBasicInfo):
+ # Public data attributes (inherited):
+ # ocean
+ # name
+ # Public data attributes (additional):
+ # islandid
+ # yoweb_url
+ # flagid
+ def __init__(self, ocean, islename):
+ IslandBasicInfo.__init__(self, ocean, islename)
+ self.islandid = None
+ self.yoweb_url = None
+ self._collect_yoweb()
+ self._collect_flagid()
+
+ def _collect_yoweb(self):
+ debug('IEI COLLECT YOWEB '+`self.name`)
+ self.islandid = None
+ self.yoweb_url = None
+
+ soup = BeautifulSoup(self.yppedia())
content = soup.find('div', attrs = {'id': 'content'})
yoweb_re = regexp.compile('^http://\w+\.puzzlepirates\.com/'+
- 'yoweb/island/info\.wm\?islandid=\d+$')
+ 'yoweb/island/info\.wm\?islandid=(\d+)$')
a = soup.find('a', attrs = { 'href': yoweb_re })
- if a is None: return None
- return a['href']
- def ruling_flag_id(self):
- yo = self.yoweb_url()
+ if a is None:
+ debug('IEI COLLECT YOWEB '+`self.name`+' NONE')
+ return
+
+ debug('IEI COLLECT YOWEB '+`self.name`+' GOT '+``a``)
+ self.yoweb_url = a['href']
+ m = yoweb_re.search(self.yoweb_url)
+ self.islandid = m.group(1)
+
+ def _collect_flagid(self):
+ self.flagid = None
+
+ yo = self.yoweb_url
+ debug('IEI COLLECT FLAGID '+`self.name`+' URL '+`yo`)
if yo is None: return None
- dataf = fetcher.fetch(yo, 600)
+ dataf = fetcher.fetch(yo, 1800)
soup = BeautifulSoup(dataf)
- ruler_re = regexp.compile('http://\w+\.puzzlepirates\.com/'+
- 'yoweb/flag/info\.wm\?flagid=(\d+)$')
+ ruler_re = regexp.compile(
+ '/yoweb/flag/info\.wm\?flagid=(\d+)$')
ruler = soup.find('a', attrs = { 'href': ruler_re })
- if not ruler: return None
- m = ruler_re.find(ruler['href'])
- return m.group(1)
+ if not ruler:
+ debug('IEI COLLECT FLAGID '+`self.name`+' NONE')
+ return
+ debug('IEI COLLECT FLAGID '+`self.name`+' GOT '+``ruler``)
+ m = ruler_re.search(ruler['href'])
+ self.flagid = m.group(1)
+
+ def __str__(self):
+ return `(self.ocean, self.islandid, self.name,
+ self.yoweb_url, self.flagid)`
class OceanInfo():
- # Public data attributes (valid after collect()):
+ # Public data attributes:
# oi.islands[islename] = IslandInfo(...)
# oi.arches[archname][islename] = IslandInfo(...)
- def __init__(self):
- self.isleclass = IslandInfo
+ def __init__(self, isleclass=IslandBasicInfo):
+ self.isleclass = isleclass
self.ocean = fetcher.ocean.lower().capitalize()
- def collect(self):
+
cmdl = ['./yppedia-ocean-scraper']
if opts.localhtml is not None:
cmdl += ['--local-html-dir',opts.localhtml]
oscraper.wait()
assert(oscraper.returncode == 0)
+ def __str__(self):
+ return `(self.islands, self.arches)`
+
#---------- pretty-printer for tables of pirate puzzle standings ----------
class StandingsTable:
pi = PirateInfo(args[0], max_age)
if pi.flag is None: fi = None
else: fi = FlagInfo(pi.flag[0], max_age)
- print `fi`
+ print fi
def do_standings_crew_of(args, bu):
ci = prep_crew_of(args, bu, 60)
def do_ocean(args, bu):
if (len(args)): bu('ocean takes no further arguments')
fetcher.default_ocean()
- oi = OceanInfo()
- oi.collect()
+ oi = OceanInfo(IslandExtendedInfo)
+ print oi
for islename in sorted(oi.islands.keys()):
isle = oi.islands[islename]
- yoweb_url = isle.yoweb_url()
- print " %s -- %s" % (islename, yoweb_url)
+ print isle
#----- modes which use the chat log parser are quite complex -----
#---------- main program ----------
def main():
- global opts, fetcher
+ global opts, fetcher, yppedia
pa = OptionParser(
'''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
else:
opts.display = 'overwrite'
- fetcher = Fetcher(opts.ocean, opts.cache_dir)
+ fetcher = Yoweb(opts.ocean, opts.cache_dir)
+ yppedia = Yppedia(opts.cache_dir)
mode_fn(args[1:], pa.error)