#---------- caching and rate-limiting data fetcher ----------
class Fetcher:
- def __init__(self, ocean, cachedir):
+ def __init__(self, cachedir):
debug('Fetcher init %s' % cachedir)
- self.ocean = ocean
self.cachedir = cachedir
try: os.mkdir(cachedir)
except (OSError,IOError), oe:
if oe.errno != errno.EEXIST: raise
self._cache_scan(time.time())
- def default_ocean(self, ocean='ice'):
- if self.ocean is None:
- self.ocean = ocean
-
def _cache_scan(self, now):
# returns list of ages, unsorted
ages = []
debug('Fetcher stored')
return data
+class Yoweb(Fetcher):
+ def __init__(self, ocean, cachedir):
+ debug('Yoweb init %s' % cachedir)
+ self.ocean = ocean
+ Fetcher.__init__(self, cachedir)
+
+ def default_ocean(self, ocean='ice'):
+ if self.ocean is None:
+ self.ocean = ocean
+
def yoweb(self, kind, tail, max_age):
self.default_ocean()
+ assert(self.ocean)
url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
self.ocean, kind, tail)
return self.fetch(url, max_age)
+class Yppedia(Fetcher):
+ def __init__(self, cachedir):
+ debug('Yoweb init %s' % cachedir)
+ self.base = 'http://yppedia.puzzlepirates.com/'
+ self.localhtml = opts.localhtml
+ Fetcher.__init__(self, cachedir)
+
+ def __call__(self, rhs):
+ if self.localhtml is None:
+ url = self.base + rhs
+ debug('Yppedia retrieving YPP '+url);
+ return self.fetch(url, 3000)
+ else:
+ return file(opts.localhtml + '/' + rhs, 'r')
+
#---------- logging assistance for troubled screenscrapers ----------
class SoupLog:
#---------- scraper for ocean info incl. embargoes etc. ----------
-class IslandInfo():
+class IslandBasicInfo():
+ # Public data attributes:
+ # ocean
+ # name
+ # Public data attributes maybe set by caller:
+ # arch
def __init__(self, ocean, islename):
self.ocean = ocean
self.name = islename
- def collect(self):
- pass
- def yppedia_dataf(self):
+ def yppedia(self):
def q(x): return urllib.quote(x.replace(' ','_'))
url_rhs = q(self.name) + '_(' + q(self.ocean) + ')'
- if opts.localhtml is None:
- url = 'http://yppedia.puzzlepirates.com/' + url_rhs
- debug('IslandInfo retrieving YPP '+url);
- return urllib.urlopen(url)
- else:
- return file(opts.localhtml + '/' + url_rhs, 'r')
- def yoweb_url(self):
- soup = BeautifulSoup(self.yppedia_dataf())
+ return yppedia(url_rhs)
+ def __str__(self):
+ return `(self.ocean, self.name)`
+
+class IslandExtendedInfo(IslandBasicInfo):
+ # Public data attributes (inherited):
+ # ocean
+ # name
+ # Public data attributes (additional):
+ # islandid
+ # yoweb_url
+ # flagid
+ def __init__(self, ocean, islename):
+ IslandBasicInfo.__init__(self, ocean, islename)
+ self.islandid = None
+ self.yoweb_url = None
+ self._collect_yoweb()
+ self._collect_flagid()
+
+ def _collect_yoweb(self):
+ debug('IEI COLLECT YOWEB '+`self.name`)
+ self.islandid = None
+ self.yoweb_url = None
+
+ soup = BeautifulSoup(self.yppedia())
content = soup.find('div', attrs = {'id': 'content'})
yoweb_re = regexp.compile('^http://\w+\.puzzlepirates\.com/'+
- 'yoweb/island/info\.wm\?islandid=\d+$')
+ 'yoweb/island/info\.wm\?islandid=(\d+)$')
a = soup.find('a', attrs = { 'href': yoweb_re })
- if a is None: return None
- return a['href']
- def ruling_flag_id(self):
- yo = self.yoweb_url()
+ if a is None:
+ debug('IEI COLLECT YOWEB '+`self.name`+' NONE')
+ return
+
+ debug('IEI COLLECT YOWEB '+`self.name`+' GOT '+``a``)
+ self.yoweb_url = a['href']
+ m = yoweb_re.search(self.yoweb_url)
+ self.islandid = m.group(1)
+
+ def _collect_flagid(self):
+ self.flagid = None
+
+ yo = self.yoweb_url
+ debug('IEI COLLECT FLAGID '+`self.name`+' URL '+`yo`)
if yo is None: return None
- dataf = fetcher.fetch(yo, 600)
+ dataf = fetcher.fetch(yo, 1800)
soup = BeautifulSoup(dataf)
- ruler_re = regexp.compile('http://\w+\.puzzlepirates\.com/'+
- 'yoweb/flag/info\.wm\?flagid=(\d+)$')
+ ruler_re = regexp.compile(
+ '/yoweb/flag/info\.wm\?flagid=(\d+)$')
ruler = soup.find('a', attrs = { 'href': ruler_re })
- if not ruler: return None
- m = ruler_re.find(ruler['href'])
- return m.group(1)
+ if not ruler:
+ debug('IEI COLLECT FLAGID '+`self.name`+' NONE')
+ return
+ debug('IEI COLLECT FLAGID '+`self.name`+' GOT '+``ruler``)
+ m = ruler_re.search(ruler['href'])
+ self.flagid = m.group(1)
+
+ def __str__(self):
+ return `(self.ocean, self.islandid, self.name,
+ self.yoweb_url, self.flagid)`
+
+class IslandFlagInfo(IslandExtendedInfo):
+ # Public data attributes (inherited):
+ # ocean
+ # name
+ # islandid
+ # yoweb_url
+ # flagid
+ # Public data attributes (additional):
+ # flag
+ def __init__(self, ocean, islename):
+ IslandExtendedInfo.__init__(self, ocean, islename)
+ self.flag = None
+ self._collect_flag()
+
+ def _collect_flag(self):
+ if self.flagid is None: return
+ self.flag = FlagInfo(self.flagid, 1800)
+
+ def __str__(self):
+ return IslandExtendedInfo.__str__(self) + '; ' + str(self.flag)
+
+class NullProgressReporter():
+ def start(self): pass
+ def doing(self, msg): pass
+ def stop(self): pass
+
+class TypewriterProgressReporter():
+ def start(self):
+ self._l = 0
+ def doing(self,m):
+ self._doing(m + '...')
+ def _doing(self,m):
+ self._write('\r')
+ self._write(m)
+ less = self._l - len(m)
+ if less > 0:
+ self._write(' ' * less)
+ self._write('\b' * less)
+ self._l = len(m)
+ sys.stdout.flush()
+ def stop(self):
+ self._doing('')
+ self._l = 0
+ def _write(self,t):
+ sys.stdout.write(t)
class OceanInfo():
- # Public data attributes (valid after collect()):
+ # Public data attributes:
# oi.islands[islename] = IslandInfo(...)
# oi.arches[archname][islename] = IslandInfo(...)
- def __init__(self):
- self.isleclass = IslandInfo
+ def __init__(self, isleclass=IslandBasicInfo, progressreporter=None):
+ if progressreporter is None:
+ if opts.debug: progressreporter = NullProgressReporter()
+ else: progressreporter = TypewriterProgressReporter()
+
+ self.isleclass = isleclass
self.ocean = fetcher.ocean.lower().capitalize()
- def collect(self):
+
+ progressreporter.start()
+ progressreporter.doing('fetching ocean info')
+
cmdl = ['./yppedia-ocean-scraper']
if opts.localhtml is not None:
cmdl += ['--local-html-dir',opts.localhtml]
arch_re = regexp.compile('^ (\S.*)')
island_re = regexp.compile('^ (\S.*)')
+ oscraper.wait()
+ assert(oscraper.returncode == 0)
+
self.islands = { }
self.arches = { }
archname = None
+ isles = [ ]
+ progressreporter.doing('parsing ocean info')
+
for l in oscraper.stdout:
debug('OceanInfo collect l '+`l`)
l = l.rstrip('\n')
if m:
assert(archname is not None)
islename = m.group(1)
- isle = self.isleclass(self.ocean, islename)
- isle.arch = archname
- self.islands[islename] = isle
- self.arches[archname][islename] = isle
+ isles.append((archname, islename))
continue
m = arch_re.match(l)
if m:
self.arches[archname] = { }
continue
assert(False)
- oscraper.wait()
- assert(oscraper.returncode == 0)
+
+ for i in xrange(0, len(isles)-1):
+ (archname, islename) = isles[i]
+ progressreporter.doing(
+ 'fetching isle info %2d/%d (%s: %s)'
+ % (i, len(isles), archname, islename))
+ isle = self.isleclass(self.ocean, islename)
+ isle.arch = archname
+ self.islands[islename] = isle
+ self.arches[archname][islename] = isle
+
+ progressreporter.stop()
+
+ def __str__(self):
+ return `(self.islands, self.arches)`
#---------- pretty-printer for tables of pirate puzzle standings ----------
def do_ocean(args, bu):
if (len(args)): bu('ocean takes no further arguments')
fetcher.default_ocean()
- oi = OceanInfo()
- oi.collect()
+ oi = OceanInfo(IslandFlagInfo)
+ print oi
for islename in sorted(oi.islands.keys()):
isle = oi.islands[islename]
- yoweb_url = isle.yoweb_url()
- print " %s -- %s" % (islename, yoweb_url)
+ print isle
+
+def do_embargoes(args, bu):
+ if (len(args)): bu('ocean takes no further arguments')
+ fetcher.default_ocean()
+ oi = OceanInfo(IslandFlagInfo)
+ wr = sys.stdout.write
+ print ('EMBARGOES: Island | Owning flag'+
+ ' | Embargoed flags')
+
+ def getflname(isle):
+ if isle.islandid is None: return 'uncolonisable'
+ if isle.flag is None: return 'uncolonised'
+ return isle.flag.name
+
+ for archname in sorted(oi.arches.keys()):
+ print 'ARCHIPELAGO: ',archname
+ for islename in sorted(oi.arches[archname].keys()):
+ isle = oi.islands[islename]
+ wr(' %-20s | ' % isle.name)
+ flname = getflname(isle)
+ wr('%-30s | ' % flname)
+ flag = isle.flag
+ if flag is None: print ''; continue
+ delim = ''
+ for rel in flag.relations:
+ (oname, oid, dummy, thisdeclaring,
+ odeclaringmin,odeclaringmax) = rel
+ if thisdeclaring >= 0: continue
+ wr(delim)
+ wr(oname)
+ delim = '; '
+ print ''
#----- modes which use the chat log parser are quite complex -----
#---------- main program ----------
def main():
- global opts, fetcher
+ global opts, fetcher, yppedia
pa = OptionParser(
'''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
else:
opts.display = 'overwrite'
- fetcher = Fetcher(opts.ocean, opts.cache_dir)
+ fetcher = Yoweb(opts.ocean, opts.cache_dir)
+ yppedia = Yppedia(opts.cache_dir)
mode_fn(args[1:], pa.error)