#---------- caching and rate-limiting data fetcher ----------
class Fetcher:
- def __init__(self, ocean, cachedir):
+ def __init__(self, cachedir):
debug('Fetcher init %s' % cachedir)
- self.ocean = ocean
self.cachedir = cachedir
try: os.mkdir(cachedir)
except (OSError,IOError), oe:
if oe.errno != errno.EEXIST: raise
self._cache_scan(time.time())
- def default_ocean(self, ocean='ice'):
- if self.ocean is None:
- self.ocean = ocean
-
def _cache_scan(self, now):
# returns list of ages, unsorted
ages = []
debug('Fetcher stored')
return data
+class Yoweb(Fetcher):
+ def __init__(self, ocean, cachedir):
+ debug('Yoweb init %s' % cachedir)
+ self.ocean = ocean
+ Fetcher.__init__(self, cachedir)
+
+ def default_ocean(self, ocean='ice'):
+ if self.ocean is None:
+ self.ocean = ocean
+
def yoweb(self, kind, tail, max_age):
self.default_ocean()
+ assert(self.ocean)
url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
self.ocean, kind, tail)
return self.fetch(url, max_age)
+class Yppedia(Fetcher):
+ def __init__(self, cachedir):
+ debug('Yoweb init %s' % cachedir)
+ self.base = 'http://yppedia.puzzlepirates.com/'
+ self.localhtml = opts.localhtml
+ Fetcher.__init__(self, cachedir)
+
+ def __call__(self, rhs):
+ if self.localhtml is None:
+ url = self.base + rhs
+ debug('Yppedia retrieving YPP '+url);
+ return self.fetch(url, 3000)
+ else:
+ return file(opts.localhtml + '/' + rhs, 'r')
+
#---------- logging assistance for troubled screenscrapers ----------
class SoupLog:
class CrewInfo(SomethingSoupInfo):
# Public data members:
+ # ci.crewid
# ci.crew = [ ('Captain', ['Pirate', ...]),
# ('Senior Officer', [...]),
# ... ]
# pi.msgs = [ 'message describing problem with scrape' ]
def __init__(self, crewid, max_age=300):
+ self.crewid = crewid
SomethingSoupInfo.__init__(self,
'crew/info.wm?crewid=', crewid, max_age)
self._find_crew()
def __str__(self):
return `(self.crew, self.msgs)`
+class FlagRelation():
+ # Public data members (put there by hand by creater)
+ # other_flagname
+ # other_flagid
+ # yoweb_heading
+ # this_declaring
+ # other_declaring_min
+ # other_declaring_max
+ # where {this,other}_declaring{,_min,_max} are:
+ # -1 {this,other} is declaring war
+ # 0 {this,other} is not doing either
+ # +1 {this,other} is allying
+ def __repr__(self):
+ return '<FlagRelation %s %d/%d..%d %s %s>' % (
+ self.yoweb_heading, self.this_declaring,
+ self.other_declaring_min, self.other_declaring_max,
+ self.other_flagname, self.other_flagid)
+
class FlagInfo(SomethingSoupInfo):
# Public data members (after init):
#
+ # flagid
# name # string
#
- # relations[n] = (otherflagname, otherflagid, [stringfromyoweb],
- # thisdeclaring, otherdeclaringmin, otherdeclaringmax)
- # # where {this,other}declaring{,min,max} are:
- # # -1 {this,other} is declaring war
- # # 0 {this,other} is not doing either
- # # +1 {this,other} is allying
+ # relations[n] = FlagRelation
# relation_byname[otherflagname] = relations[some_n]
# relation_byid[otherflagname] = relations[some_n]
#
# islands[n] = (islandname, islandid)
#
def __init__(self, flagid, max_age=600):
+ self.flagid = flagid
SomethingSoupInfo.__init__(self,
'flag/info.wm?flagid=', flagid, max_age)
self._find_flag()
(`head`, ``waritem``))
def wihelp_item(waritem, thing):
- if waritem.name == 'a':
- url = waritem.get('href', None)
- if url is None:
- return ('no url for '+thing,None,None)
- else:
- hr = waritem.find('a',{'href':True})
- if not hr: return ('no a for '+thing,None,None)
- url = hr['href']
+ url = waritem.get('href', None)
+ if url is None:
+ return ('no url for '+thing,None,None)
m = regexp.search('\?'+thing+'id=(\d+)$', url)
if not m: return ('no '+thing+'id',None,None)
tid = m.group(1)
- tname = m.string
+ tname = waritem.string
if tname is None:
return (thing+' name not just string',None,None)
return (None,tid,tname)
if rel: return 'flag id twice!'
if flagname in self.relation_byname:
return 'flag name twice!'
- rel = (flagname,flagid,[], thisdecl,othermin,othermax)
+ rel = FlagRelation()
+ rel.other_flagname = flagname
+ rel.other_flagid = flagid
+ rel.yoweb_heading = head
+ rel.this_declaring = thisdecl
+ rel.other_declaring_min = othermin
+ rel.other_declaring_max = othermax
self.relations.append(rel)
self.relation_byid[flagid] = rel
self.relation_byname[flagid] = rel
how = (wi_warn, None)
- for waritem in warinfo.contents:
- debug('WARITEM '+``waritem``)
- if isinstance(waritem, unicode):
- waritem = waritem.strip()
- if waritem: warn('unknown waritem '+``waritem``)
- continue
- if waritem.name == 'br':
- continue
- if waritem.name == 'b':
- head = ''.join(waritem.findAll(text=True))
+ for waritem in warinfo.findAll(['font','a']):
+ if waritem is None: break
+ if waritem.name == 'font':
+ colour = waritem.get('color',None)
+ if colour.lstrip('#') != '958A5F':
+ warn('strange colour %s in %s' %
+ (colour,``waritem``))
+ continue
+ head = waritem.string
+ if head is None:
+ warn('no head string in '+``waritem``)
+ continue
head = regexp.sub('\\s+', ' ', head).strip()
head = head.rstrip(':')
how = (head,) + warmap.get(head, (wi_warn,))
continue
+ assert(waritem.name == 'a')
+
debug('WARHOW %s(%s, waritem, *%s)' %
(how[1], `how[0]`, `how[2:]`))
bad = how[1](how[0], waritem, *how[2:])
#---------- scraper for ocean info incl. embargoes etc. ----------
-class IslandInfo():
+class IslandBasicInfo():
+ # Public data attributes:
+ # ocean
+ # name
+ # Public data attributes maybe set by caller:
+ # arch
def __init__(self, ocean, islename):
self.ocean = ocean
self.name = islename
- def collect(self):
- pass
- def yppedia_dataf(self):
+ def yppedia(self):
def q(x): return urllib.quote(x.replace(' ','_'))
url_rhs = q(self.name) + '_(' + q(self.ocean) + ')'
- if opts.localhtml is None:
- url = 'http://yppedia.puzzlepirates.com/' + url_rhs
- debug('IslandInfo retrieving YPP '+url);
- return urllib.urlopen(url)
- else:
- return file(opts.localhtml + '/' + url_rhs, 'r')
- def yoweb_url(self):
- soup = BeautifulSoup(self.yppedia_dataf())
+ return yppedia(url_rhs)
+ def __str__(self):
+ return `(self.ocean, self.name)`
+
+class IslandExtendedInfo(IslandBasicInfo):
+ # Public data attributes (inherited):
+ # ocean
+ # name
+ # Public data attributes (additional):
+ # islandid
+ # yoweb_url
+ # flagid
+ def __init__(self, ocean, islename):
+ IslandBasicInfo.__init__(self, ocean, islename)
+ self.islandid = None
+ self.yoweb_url = None
+ self._collect_yoweb()
+ self._collect_flagid()
+
+ def _collect_yoweb(self):
+ debug('IEI COLLECT YOWEB '+`self.name`)
+ self.islandid = None
+ self.yoweb_url = None
+
+ soup = BeautifulSoup(self.yppedia())
content = soup.find('div', attrs = {'id': 'content'})
yoweb_re = regexp.compile('^http://\w+\.puzzlepirates\.com/'+
- 'yoweb/island/info\.wm\?islandid=\d+$')
+ 'yoweb/island/info\.wm\?islandid=(\d+)$')
a = soup.find('a', attrs = { 'href': yoweb_re })
- if a is None: return None
- return a['href']
- def ruling_flag_id(self):
- yo = self.yoweb_url()
+ if a is None:
+ debug('IEI COLLECT YOWEB '+`self.name`+' NONE')
+ return
+
+ debug('IEI COLLECT YOWEB '+`self.name`+' GOT '+``a``)
+ self.yoweb_url = a['href']
+ m = yoweb_re.search(self.yoweb_url)
+ self.islandid = m.group(1)
+
+ def _collect_flagid(self):
+ self.flagid = None
+
+ yo = self.yoweb_url
+ debug('IEI COLLECT FLAGID '+`self.name`+' URL '+`yo`)
if yo is None: return None
- dataf = fetcher.fetch(yo, 600)
+ dataf = fetcher.fetch(yo, 1800)
soup = BeautifulSoup(dataf)
- ruler_re = regexp.compile('http://\w+\.puzzlepirates\.com/'+
- 'yoweb/flag/info\.wm\?flagid=(\d+)$')
+ ruler_re = regexp.compile(
+ '/yoweb/flag/info\.wm\?flagid=(\d+)$')
ruler = soup.find('a', attrs = { 'href': ruler_re })
- if not ruler: return None
- m = ruler_re.find(ruler['href'])
- return m.group(1)
+ if not ruler:
+ debug('IEI COLLECT FLAGID '+`self.name`+' NONE')
+ return
+ debug('IEI COLLECT FLAGID '+`self.name`+' GOT '+``ruler``)
+ m = ruler_re.search(ruler['href'])
+ self.flagid = m.group(1)
+
+ def __str__(self):
+ return `(self.ocean, self.islandid, self.name,
+ self.yoweb_url, self.flagid)`
+
+class IslandFlagInfo(IslandExtendedInfo):
+ # Public data attributes (inherited):
+ # ocean
+ # name
+ # islandid
+ # yoweb_url
+ # flagid
+ # Public data attributes (additional):
+ # flag
+ def __init__(self, ocean, islename):
+ IslandExtendedInfo.__init__(self, ocean, islename)
+ self.flag = None
+ self._collect_flag()
+
+ def _collect_flag(self):
+ if self.flagid is None: return
+ self.flag = FlagInfo(self.flagid, 1800)
+
+ def __str__(self):
+ return IslandExtendedInfo.__str__(self) + '; ' + str(self.flag)
+
+class NullProgressReporter():
+ def doing(self, msg): pass
+ def stop(self): pass
+
+class TypewriterProgressReporter():
+ def __init__(self):
+ self._l = 0
+ def doing(self,m):
+ self._doing(m + '...')
+ def _doing(self,m):
+ self._write('\r')
+ self._write(m)
+ less = self._l - len(m)
+ if less > 0:
+ self._write(' ' * less)
+ self._write('\b' * less)
+ self._l = len(m)
+ sys.stdout.flush()
+ def stop(self):
+ self._doing('')
+ self._l = 0
+ def _write(self,t):
+ sys.stdout.write(t)
class OceanInfo():
- # Public data attributes (valid after collect()):
+ # Public data attributes:
# oi.islands[islename] = IslandInfo(...)
# oi.arches[archname][islename] = IslandInfo(...)
- def __init__(self):
- self.isleclass = IslandInfo
+ def __init__(self, isleclass=IslandBasicInfo):
+ self.isleclass = isleclass
self.ocean = fetcher.ocean.lower().capitalize()
- def collect(self):
+
+ progressreporter.doing('fetching ocean info')
+
cmdl = ['./yppedia-ocean-scraper']
if opts.localhtml is not None:
cmdl += ['--local-html-dir',opts.localhtml]
arch_re = regexp.compile('^ (\S.*)')
island_re = regexp.compile('^ (\S.*)')
+ oscraper.wait()
+ assert(oscraper.returncode == 0)
+
self.islands = { }
self.arches = { }
archname = None
+ isles = [ ]
+ progressreporter.doing('parsing ocean info')
+
for l in oscraper.stdout:
debug('OceanInfo collect l '+`l`)
l = l.rstrip('\n')
if m:
assert(archname is not None)
islename = m.group(1)
- isle = self.isleclass(self.ocean, islename)
- isle.arch = archname
- self.islands[islename] = isle
- self.arches[archname][islename] = isle
+ isles.append((archname, islename))
continue
m = arch_re.match(l)
if m:
self.arches[archname] = { }
continue
assert(False)
- oscraper.wait()
- assert(oscraper.returncode == 0)
+
+ for i in xrange(0, len(isles)-1):
+ (archname, islename) = isles[i]
+ progressreporter.doing(
+ 'fetching isle info %2d/%d (%s: %s)'
+ % (i, len(isles), archname, islename))
+ isle = self.isleclass(self.ocean, islename)
+ isle.arch = archname
+ self.islands[islename] = isle
+ self.arches[archname][islename] = isle
+
+ def __str__(self):
+ return `(self.islands, self.arches)`
#---------- pretty-printer for tables of pirate puzzle standings ----------
print '%s: %s,' % (`pirate`, info)
print '}'
-def prep_crew_of(args, bu, max_age=300):
- if len(args) != 1: bu('crew-of takes one pirate name')
+def prep_crewflag_of(args, bu, max_age, selector, constructor):
+ if len(args) != 1: bu('crew-of etc. take one pirate name')
pi = PirateInfo(args[0], max_age)
- if pi.crew is None: return None
- return CrewInfo(pi.crew[0], max_age)
+ cf = selector(pi)
+ if cf is None: return None
+ return constructor(cf[0], max_age)
+
+def prep_crew_of(args, bu, max_age=300):
+ return prep_crewflag_of(args, bu, max_age,
+ (lambda pi: pi.crew), CrewInfo)
+
+def prep_flag_of(args, bu, max_age=300):
+ return prep_crewflag_of(args, bu, max_age,
+ (lambda pi: pi.flag), FlagInfo)
def do_crew_of(args, bu):
ci = prep_crew_of(args, bu)
print ci
def do_flag_of(args, bu):
- if len(args) != 1: bu('flag-of takes one pirate name')
- max_age = 300
- pi = PirateInfo(args[0], max_age)
- if pi.flag is None: fi = None
- else: fi = FlagInfo(pi.flag[0], max_age)
- print `fi`
+ fi = prep_flag_of(args, bu)
+ print fi
def do_standings_crew_of(args, bu):
ci = prep_crew_of(args, bu, 60)
def do_ocean(args, bu):
if (len(args)): bu('ocean takes no further arguments')
fetcher.default_ocean()
- oi = OceanInfo()
- oi.collect()
+ oi = OceanInfo(IslandFlagInfo)
+ print oi
+ for islename in sorted(oi.islands.keys()):
+ isle = oi.islands[islename]
+ print isle
+
+def do_embargoes(args, bu):
+ if (len(args)): bu('ocean takes no further arguments')
+ fetcher.default_ocean()
+ oi = OceanInfo(IslandFlagInfo)
+ wr = sys.stdout.write
+ print ('EMBARGOES: Island | Owning flag'+
+ ' | Embargoed flags')
+
+ def getflname(isle):
+ if isle.islandid is None: return 'uncolonisable'
+ if isle.flag is None: return 'uncolonised'
+ return isle.flag.name
+
+ progressreporter.stop()
+
+ for archname in sorted(oi.arches.keys()):
+ print 'ARCHIPELAGO: ',archname
+ for islename in sorted(oi.arches[archname].keys()):
+ isle = oi.islands[islename]
+ wr(' %-20s | ' % isle.name)
+ flname = getflname(isle)
+ wr('%-30s | ' % flname)
+ flag = isle.flag
+ if flag is None: print ''; continue
+ delim = ''
+ for rel in flag.relations:
+ if rel.this_declaring >= 0: continue
+ wr(delim)
+ wr(rel.other_flagname)
+ delim = '; '
+ print ''
+
+def do_embargoes_flag_of(args, bu):
+ progressreporter.doing('fetching flag info')
+ fi = prep_flag_of(args, bu)
+ if fi is None:
+ progressreporter.stop()
+ print 'Pirate is not in a flag.'
+ return
+
+ oi = OceanInfo(IslandFlagInfo)
+
+ progressreporter.stop()
+ print ''
+
+ any = False
for islename in sorted(oi.islands.keys()):
isle = oi.islands[islename]
- yoweb_url = isle.yoweb_url()
- print " %s -- %s" % (islename, yoweb_url)
+ flag = isle.flag
+ if flag is None: continue
+ for rel in flag.relations:
+ if rel.this_declaring >= 0: continue
+ if rel.other_flagid != fi.flagid: continue
+ if not any: print 'EMBARGOED:'
+ any = True
+ print " %-30s (%s)" % (islename, flag.name)
+ if not any:
+ print 'No embargoes.'
+ print ''
+
+ war_flag(fi)
+ print ''
+
+def do_war_flag_of(args, bu):
+ fi = prep_flag_of(args, bu)
+ war_flag(fi)
+
+def war_flag(fi):
+ any = False
+ for certain in [True, False]:
+ anythis = False
+ for rel in fi.relations:
+ if rel.this_declaring >= 0: continue
+ if (rel.other_declaring_max < 0) != certain: continue
+ if not anythis:
+ if certain: m = 'SINKING PvP'
+ else: m = 'RISK OF SINKING PvP'
+ print '%s (%s):' % (m, rel.yoweb_heading)
+ anythis = True
+ any = True
+ print " ", rel.other_flagname
+ if not any:
+ print 'No sinking PvP.'
#----- modes which use the chat log parser are quite complex -----
#---------- main program ----------
def main():
- global opts, fetcher
+ global opts, fetcher, yppedia, progressreporter
pa = OptionParser(
'''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
+ yoweb-scrape [--ocean OCEAN ...] ocean|embargoes
+ yoweb-scrape [--ocean OCEAN ...] war-flag-of|embargoes-flag-of PIRATE
yoweb-scrape [options] ship-aid CHAT-LOG (must be .../PIRATE_OCEAN_chat-log*)
display modes (for --display) apply to ship-aid:
else:
opts.display = 'overwrite'
- fetcher = Fetcher(opts.ocean, opts.cache_dir)
+ fetcher = Yoweb(opts.ocean, opts.cache_dir)
+ yppedia = Yppedia(opts.cache_dir)
+
+ if opts.debug or not os.isatty(0):
+ progressreporter = NullProgressReporter()
+ else:
+ progressreporter = TypewriterProgressReporter()
mode_fn(args[1:], pa.error)