X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?a=blobdiff_plain;f=yoweb-scrape;h=717b539131d8ffb0e4ba6abba5f6cddc3b38b199;hb=5d8ee699b4b762eb9ce3c6eb3b17f4c10d6d4ad1;hp=d2557b594a86b8811c3323c375bd4f5cd14c1b57;hpb=f0eb636cf4249dbc353f24a74287913f8c88fcc0;p=ypp-sc-tools.web-test.git
diff --git a/yoweb-scrape b/yoweb-scrape
index d2557b5..717b539 100755
--- a/yoweb-scrape
+++ b/yoweb-scrape
@@ -45,6 +45,7 @@ import curses
import termios
import random
import subprocess
+import copy
from optparse import OptionParser
from StringIO import StringIO
@@ -107,6 +108,18 @@ def yppsc_dir():
os.environ["YPPSC_YARRG_SRCBASE"] = lib
return lib
+soup_massage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
+soup_massage.append(
+ (regexp.compile('(\
0:
need_wait += random.random() - 0.5
return need_wait
- def _rate_limit_cache_clean(self, now):
- need_wait = self.need_wait(now)
+ def _rate_limit_cache_clean(self, now, next_url=None):
+ need_wait = self.need_wait(now, next_url=next_url)
if need_wait > 0:
- debug('Fetcher wait %d' % need_wait)
+ debug('Fetcher wait %f' % need_wait)
sleep(need_wait)
def fetch(self, url, max_age):
@@ -186,7 +210,7 @@ class Fetcher:
return data
debug('Fetcher fetch')
- self._rate_limit_cache_clean(now)
+ self._rate_limit_cache_clean(now, next_url=url)
stream = urllib2.urlopen(url)
data = stream.read()
@@ -252,9 +276,7 @@ class SomethingSoupInfo(SoupLog):
def __init__(self, kind, tail, max_age):
SoupLog.__init__(self)
html = fetcher.yoweb(kind, tail, max_age)
- self._soup = BeautifulSoup(html,
- convertEntities=BeautifulSoup.HTML_ENTITIES
- )
+ self._soup = make_soup(html)
#---------- scraper for pirate pages ----------
@@ -361,12 +383,14 @@ u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-
class CrewInfo(SomethingSoupInfo):
# Public data members:
+ # ci.crewid
# ci.crew = [ ('Captain', ['Pirate', ...]),
# ('Senior Officer', [...]),
# ... ]
# pi.msgs = [ 'message describing problem with scrape' ]
def __init__(self, crewid, max_age=300):
+ self.crewid = crewid
SomethingSoupInfo.__init__(self,
'crew/info.wm?crewid=', crewid, max_age)
self._find_crew()
@@ -407,23 +431,38 @@ class CrewInfo(SomethingSoupInfo):
def __str__(self):
return `(self.crew, self.msgs)`
+class FlagRelation():
+ # Public data members (put there by hand by creater)
+ # other_flagname
+ # other_flagid
+ # yoweb_heading
+ # this_declaring
+ # other_declaring_min
+ # other_declaring_max
+ # where {this,other}_declaring{,_min,_max} are:
+ # -1 {this,other} is declaring war
+ # 0 {this,other} is not doing either
+ # +1 {this,other} is allying
+ def __repr__(self):
+ return '' % (
+ self.yoweb_heading, self.this_declaring,
+ self.other_declaring_min, self.other_declaring_max,
+ self.other_flagname, self.other_flagid)
+
class FlagInfo(SomethingSoupInfo):
# Public data members (after init):
#
+ # flagid
# name # string
#
- # relations[n] = (otherflagname, otherflagid, [stringfromyoweb],
- # thisdeclaring, otherdeclaringmin, otherdeclaringmax)
- # # where {this,other}declaring{,min,max} are:
- # # -1 {this,other} is declaring war
- # # 0 {this,other} is not doing either
- # # +1 {this,other} is allying
+ # relations[n] = FlagRelation
# relation_byname[otherflagname] = relations[some_n]
# relation_byid[otherflagname] = relations[some_n]
#
# islands[n] = (islandname, islandid)
#
def __init__(self, flagid, max_age=600):
+ self.flagid = flagid
SomethingSoupInfo.__init__(self,
'flag/info.wm?flagid=', flagid, max_age)
self._find_flag()
@@ -468,7 +507,13 @@ class FlagInfo(SomethingSoupInfo):
if rel: return 'flag id twice!'
if flagname in self.relation_byname:
return 'flag name twice!'
- rel = (flagname,flagid,head, thisdecl,othermin,othermax)
+ rel = FlagRelation()
+ rel.other_flagname = flagname
+ rel.other_flagid = flagid
+ rel.yoweb_heading = head
+ rel.this_declaring = thisdecl
+ rel.other_declaring_min = othermin
+ rel.other_declaring_max = othermax
self.relations.append(rel)
self.relation_byid[flagid] = rel
self.relation_byname[flagid] = rel
@@ -519,16 +564,14 @@ class FlagInfo(SomethingSoupInfo):
#---------- scraper for ocean info incl. embargoes etc. ----------
class IslandBasicInfo():
- # Public members:
+ # Public data attributes:
# ocean
# name
- # Public members maybe set by caller:
+ # Public data attributes maybe set by caller:
# arch
def __init__(self, ocean, islename):
self.ocean = ocean
self.name = islename
- def collect(self):
- pass
def yppedia(self):
def q(x): return urllib.quote(x.replace(' ','_'))
url_rhs = q(self.name) + '_(' + q(self.ocean) + ')'
@@ -537,15 +580,17 @@ class IslandBasicInfo():
return `(self.ocean, self.name)`
class IslandExtendedInfo(IslandBasicInfo):
- # Public members (inherited):
+ # Public data attributes (inherited):
# ocean
# name
- # Public members (additional):
+ # Public data attributes (additional):
# islandid
# yoweb_url
# flagid
- def collect(self):
- IslandBasicInfo.collect(self)
+ def __init__(self, ocean, islename):
+ IslandBasicInfo.__init__(self, ocean, islename)
+ self.islandid = None
+ self.yoweb_url = None
self._collect_yoweb()
self._collect_flagid()
@@ -554,7 +599,7 @@ class IslandExtendedInfo(IslandBasicInfo):
self.islandid = None
self.yoweb_url = None
- soup = BeautifulSoup(self.yppedia())
+ soup = make_soup(self.yppedia())
content = soup.find('div', attrs = {'id': 'content'})
yoweb_re = regexp.compile('^http://\w+\.puzzlepirates\.com/'+
'yoweb/island/info\.wm\?islandid=(\d+)$')
@@ -575,7 +620,7 @@ class IslandExtendedInfo(IslandBasicInfo):
debug('IEI COLLECT FLAGID '+`self.name`+' URL '+`yo`)
if yo is None: return None
dataf = fetcher.fetch(yo, 1800)
- soup = BeautifulSoup(dataf)
+ soup = make_soup(dataf)
ruler_re = regexp.compile(
'/yoweb/flag/info\.wm\?flagid=(\d+)$')
ruler = soup.find('a', attrs = { 'href': ruler_re })
@@ -590,14 +635,61 @@ class IslandExtendedInfo(IslandBasicInfo):
return `(self.ocean, self.islandid, self.name,
self.yoweb_url, self.flagid)`
+class IslandFlagInfo(IslandExtendedInfo):
+ # Public data attributes (inherited):
+ # ocean
+ # name
+ # islandid
+ # yoweb_url
+ # flagid
+ # Public data attributes (additional):
+ # flag
+ def __init__(self, ocean, islename):
+ IslandExtendedInfo.__init__(self, ocean, islename)
+ self.flag = None
+ self._collect_flag()
+
+ def _collect_flag(self):
+ if self.flagid is None: return
+ self.flag = FlagInfo(self.flagid, 1800)
+
+ def __str__(self):
+ return IslandExtendedInfo.__str__(self) + '; ' + str(self.flag)
+
+class NullProgressReporter():
+ def doing(self, msg): pass
+ def stop(self): pass
+
+class TypewriterProgressReporter():
+ def __init__(self):
+ self._l = 0
+ def doing(self,m):
+ self._doing(m + '...')
+ def _doing(self,m):
+ self._write('\r')
+ self._write(m)
+ less = self._l - len(m)
+ if less > 0:
+ self._write(' ' * less)
+ self._write('\b' * less)
+ self._l = len(m)
+ sys.stdout.flush()
+ def stop(self):
+ self._doing('')
+ self._l = 0
+ def _write(self,t):
+ sys.stdout.write(t)
+
class OceanInfo():
- # Public data attributes (valid after collect()):
+ # Public data attributes:
# oi.islands[islename] = IslandInfo(...)
# oi.arches[archname][islename] = IslandInfo(...)
def __init__(self, isleclass=IslandBasicInfo):
self.isleclass = isleclass
self.ocean = fetcher.ocean.lower().capitalize()
- def collect(self):
+
+ progressreporter.doing('fetching ocean info')
+
cmdl = ['./yppedia-ocean-scraper']
if opts.localhtml is not None:
cmdl += ['--local-html-dir',opts.localhtml]
@@ -615,10 +707,16 @@ class OceanInfo():
arch_re = regexp.compile('^ (\S.*)')
island_re = regexp.compile('^ (\S.*)')
+ oscraper.wait()
+ assert(oscraper.returncode == 0)
+
self.islands = { }
self.arches = { }
archname = None
+ isles = [ ]
+ progressreporter.doing('parsing ocean info')
+
for l in oscraper.stdout:
debug('OceanInfo collect l '+`l`)
l = l.rstrip('\n')
@@ -626,11 +724,7 @@ class OceanInfo():
if m:
assert(archname is not None)
islename = m.group(1)
- isle = self.isleclass(self.ocean, islename)
- isle.arch = archname
- isle.collect()
- self.islands[islename] = isle
- self.arches[archname][islename] = isle
+ isles.append((archname, islename))
continue
m = arch_re.match(l)
if m:
@@ -639,8 +733,17 @@ class OceanInfo():
self.arches[archname] = { }
continue
assert(False)
- oscraper.wait()
- assert(oscraper.returncode == 0)
+
+ for i in xrange(0, len(isles)-1):
+ (archname, islename) = isles[i]
+ progressreporter.doing(
+ 'fetching isle info %2d/%d (%s: %s)'
+ % (i, len(isles), archname, islename))
+ isle = self.isleclass(self.ocean, islename)
+ isle.arch = archname
+ self.islands[islename] = isle
+ self.arches[archname][islename] = isle
+
def __str__(self):
return `(self.islands, self.arches)`
@@ -1293,22 +1396,27 @@ def do_pirate(pirates, bu):
print '%s: %s,' % (`pirate`, info)
print '}'
-def prep_crew_of(args, bu, max_age=300):
- if len(args) != 1: bu('crew-of takes one pirate name')
+def prep_crewflag_of(args, bu, max_age, selector, constructor):
+ if len(args) != 1: bu('crew-of etc. take one pirate name')
pi = PirateInfo(args[0], max_age)
- if pi.crew is None: return None
- return CrewInfo(pi.crew[0], max_age)
+ cf = selector(pi)
+ if cf is None: return None
+ return constructor(cf[0], max_age)
+
+def prep_crew_of(args, bu, max_age=300):
+ return prep_crewflag_of(args, bu, max_age,
+ (lambda pi: pi.crew), CrewInfo)
+
+def prep_flag_of(args, bu, max_age=300):
+ return prep_crewflag_of(args, bu, max_age,
+ (lambda pi: pi.flag), FlagInfo)
def do_crew_of(args, bu):
ci = prep_crew_of(args, bu)
print ci
def do_flag_of(args, bu):
- if len(args) != 1: bu('flag-of takes one pirate name')
- max_age = 300
- pi = PirateInfo(args[0], max_age)
- if pi.flag is None: fi = None
- else: fi = FlagInfo(pi.flag[0], max_age)
+ fi = prep_flag_of(args, bu)
print fi
def do_standings_crew_of(args, bu):
@@ -1326,13 +1434,96 @@ def do_standings_crew_of(args, bu):
def do_ocean(args, bu):
if (len(args)): bu('ocean takes no further arguments')
fetcher.default_ocean()
- oi = OceanInfo(IslandExtendedInfo)
- oi.collect()
+ oi = OceanInfo(IslandFlagInfo)
print oi
for islename in sorted(oi.islands.keys()):
isle = oi.islands[islename]
print isle
+def do_embargoes(args, bu):
+ if (len(args)): bu('ocean takes no further arguments')
+ fetcher.default_ocean()
+ oi = OceanInfo(IslandFlagInfo)
+ wr = sys.stdout.write
+ print ('EMBARGOES: Island | Owning flag'+
+ ' | Embargoed flags')
+
+ def getflname(isle):
+ if isle.islandid is None: return 'uncolonisable'
+ if isle.flag is None: return 'uncolonised'
+ return isle.flag.name
+
+ progressreporter.stop()
+
+ for archname in sorted(oi.arches.keys()):
+ print 'ARCHIPELAGO: ',archname
+ for islename in sorted(oi.arches[archname].keys()):
+ isle = oi.islands[islename]
+ wr(' %-20s | ' % isle.name)
+ flname = getflname(isle)
+ wr('%-30s | ' % flname)
+ flag = isle.flag
+ if flag is None: print ''; continue
+ delim = ''
+ for rel in flag.relations:
+ if rel.this_declaring >= 0: continue
+ wr(delim)
+ wr(rel.other_flagname)
+ delim = '; '
+ print ''
+
+def do_embargoes_flag_of(args, bu):
+ progressreporter.doing('fetching flag info')
+ fi = prep_flag_of(args, bu)
+ if fi is None:
+ progressreporter.stop()
+ print 'Pirate is not in a flag.'
+ return
+
+ oi = OceanInfo(IslandFlagInfo)
+
+ progressreporter.stop()
+ print ''
+
+ any = False
+ for islename in sorted(oi.islands.keys()):
+ isle = oi.islands[islename]
+ flag = isle.flag
+ if flag is None: continue
+ for rel in flag.relations:
+ if rel.this_declaring >= 0: continue
+ if rel.other_flagid != fi.flagid: continue
+ if not any: print 'EMBARGOED:'
+ any = True
+ print " %-30s (%s)" % (islename, flag.name)
+ if not any:
+ print 'No embargoes.'
+ print ''
+
+ war_flag(fi)
+ print ''
+
+def do_war_flag_of(args, bu):
+ fi = prep_flag_of(args, bu)
+ war_flag(fi)
+
+def war_flag(fi):
+ any = False
+ for certain in [True, False]:
+ anythis = False
+ for rel in fi.relations:
+ if rel.this_declaring >= 0: continue
+ if (rel.other_declaring_max < 0) != certain: continue
+ if not anythis:
+ if certain: m = 'SINKING PvP'
+ else: m = 'RISK OF SINKING PvP'
+ print '%s (%s):' % (m, rel.yoweb_heading)
+ anythis = True
+ any = True
+ print " ", rel.other_flagname
+ if not any:
+ print 'No sinking PvP.'
+
#----- modes which use the chat log parser are quite complex -----
class ProgressPrintPercentage:
@@ -1654,7 +1845,7 @@ class KeystrokeReader(DummyKeystrokeReader):
#---------- main program ----------
def main():
- global opts, fetcher, yppedia
+ global opts, fetcher, yppedia, progressreporter
pa = OptionParser(
'''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
@@ -1663,6 +1854,8 @@ actions:
yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
+ yoweb-scrape [--ocean OCEAN ...] ocean|embargoes
+ yoweb-scrape [--ocean OCEAN ...] war-flag-of|embargoes-flag-of PIRATE
yoweb-scrape [options] ship-aid CHAT-LOG (must be .../PIRATE_OCEAN_chat-log*)
display modes (for --display) apply to ship-aid:
@@ -1739,6 +1932,11 @@ display modes (for --display) apply to ship-aid:
fetcher = Yoweb(opts.ocean, opts.cache_dir)
yppedia = Yppedia(opts.cache_dir)
+ if opts.debug or not os.isatty(0):
+ progressreporter = NullProgressReporter()
+ else:
+ progressreporter = TypewriterProgressReporter()
+
mode_fn(args[1:], pa.error)
main()