X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.main.git;a=blobdiff_plain;f=yoweb-scrape;h=717b539131d8ffb0e4ba6abba5f6cddc3b38b199;hp=90a5677c50e3ff1b1a01fb5427d98ff5ebf564ca;hb=ac0ff0f496aaa83b013c0102ee8e4e4076d150b4;hpb=bb6f3057cff743f25c2ef95a734b9c0f05c97ff6 diff --git a/yoweb-scrape b/yoweb-scrape index 90a5677..717b539 100755 --- a/yoweb-scrape +++ b/yoweb-scrape @@ -45,6 +45,7 @@ import curses import termios import random import subprocess +import copy from optparse import OptionParser from StringIO import StringIO @@ -107,6 +108,18 @@ def yppsc_dir(): os.environ["YPPSC_YARRG_SRCBASE"] = lib return lib +soup_massage = copy.copy(BeautifulSoup.MARKUP_MASSAGE) +soup_massage.append( + (regexp.compile('(\ 0: need_wait += random.random() - 0.5 return need_wait - def _rate_limit_cache_clean(self, now): - need_wait = self.need_wait(now) + def _rate_limit_cache_clean(self, now, next_url=None): + need_wait = self.need_wait(now, next_url=next_url) if need_wait > 0: - debug('Fetcher wait %d' % need_wait) + debug('Fetcher wait %f' % need_wait) sleep(need_wait) def fetch(self, url, max_age): @@ -186,7 +210,7 @@ class Fetcher: return data debug('Fetcher fetch') - self._rate_limit_cache_clean(now) + self._rate_limit_cache_clean(now, next_url=url) stream = urllib2.urlopen(url) data = stream.read() @@ -252,9 +276,7 @@ class SomethingSoupInfo(SoupLog): def __init__(self, kind, tail, max_age): SoupLog.__init__(self) html = fetcher.yoweb(kind, tail, max_age) - self._soup = BeautifulSoup(html, - convertEntities=BeautifulSoup.HTML_ENTITIES - ) + self._soup = make_soup(html) #---------- scraper for pirate pages ---------- @@ -361,12 +383,14 @@ u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A- class CrewInfo(SomethingSoupInfo): # Public data members: + # ci.crewid # ci.crew = [ ('Captain', ['Pirate', ...]), # ('Senior Officer', [...]), # ... ] # pi.msgs = [ 'message describing problem with scrape' ] def __init__(self, crewid, max_age=300): + self.crewid = crewid SomethingSoupInfo.__init__(self, 'crew/info.wm?crewid=', crewid, max_age) self._find_crew() @@ -407,23 +431,38 @@ class CrewInfo(SomethingSoupInfo): def __str__(self): return `(self.crew, self.msgs)` +class FlagRelation(): + # Public data members (put there by hand by creater) + # other_flagname + # other_flagid + # yoweb_heading + # this_declaring + # other_declaring_min + # other_declaring_max + # where {this,other}_declaring{,_min,_max} are: + # -1 {this,other} is declaring war + # 0 {this,other} is not doing either + # +1 {this,other} is allying + def __repr__(self): + return '' % ( + self.yoweb_heading, self.this_declaring, + self.other_declaring_min, self.other_declaring_max, + self.other_flagname, self.other_flagid) + class FlagInfo(SomethingSoupInfo): # Public data members (after init): # + # flagid # name # string # - # relations[n] = (otherflagname, otherflagid, [stringfromyoweb], - # thisdeclaring, otherdeclaringmin, otherdeclaringmax) - # # where {this,other}declaring{,min,max} are: - # # -1 {this,other} is declaring war - # # 0 {this,other} is not doing either - # # +1 {this,other} is allying + # relations[n] = FlagRelation # relation_byname[otherflagname] = relations[some_n] # relation_byid[otherflagname] = relations[some_n] # # islands[n] = (islandname, islandid) # def __init__(self, flagid, max_age=600): + self.flagid = flagid SomethingSoupInfo.__init__(self, 'flag/info.wm?flagid=', flagid, max_age) self._find_flag() @@ -468,7 +507,13 @@ class FlagInfo(SomethingSoupInfo): if rel: return 'flag id twice!' if flagname in self.relation_byname: return 'flag name twice!' - rel = (flagname,flagid,head, thisdecl,othermin,othermax) + rel = FlagRelation() + rel.other_flagname = flagname + rel.other_flagid = flagid + rel.yoweb_heading = head + rel.this_declaring = thisdecl + rel.other_declaring_min = othermin + rel.other_declaring_max = othermax self.relations.append(rel) self.relation_byid[flagid] = rel self.relation_byname[flagid] = rel @@ -554,7 +599,7 @@ class IslandExtendedInfo(IslandBasicInfo): self.islandid = None self.yoweb_url = None - soup = BeautifulSoup(self.yppedia()) + soup = make_soup(self.yppedia()) content = soup.find('div', attrs = {'id': 'content'}) yoweb_re = regexp.compile('^http://\w+\.puzzlepirates\.com/'+ 'yoweb/island/info\.wm\?islandid=(\d+)$') @@ -575,7 +620,7 @@ class IslandExtendedInfo(IslandBasicInfo): debug('IEI COLLECT FLAGID '+`self.name`+' URL '+`yo`) if yo is None: return None dataf = fetcher.fetch(yo, 1800) - soup = BeautifulSoup(dataf) + soup = make_soup(dataf) ruler_re = regexp.compile( '/yoweb/flag/info\.wm\?flagid=(\d+)$') ruler = soup.find('a', attrs = { 'href': ruler_re }) @@ -612,12 +657,11 @@ class IslandFlagInfo(IslandExtendedInfo): return IslandExtendedInfo.__str__(self) + '; ' + str(self.flag) class NullProgressReporter(): - def start(self): pass def doing(self, msg): pass def stop(self): pass class TypewriterProgressReporter(): - def start(self): + def __init__(self): self._l = 0 def doing(self,m): self._doing(m + '...') @@ -640,15 +684,10 @@ class OceanInfo(): # Public data attributes: # oi.islands[islename] = IslandInfo(...) # oi.arches[archname][islename] = IslandInfo(...) - def __init__(self, isleclass=IslandBasicInfo, progressreporter=None): - if progressreporter is None: - if opts.debug: progressreporter = NullProgressReporter() - else: progressreporter = TypewriterProgressReporter() - + def __init__(self, isleclass=IslandBasicInfo): self.isleclass = isleclass self.ocean = fetcher.ocean.lower().capitalize() - progressreporter.start() progressreporter.doing('fetching ocean info') cmdl = ['./yppedia-ocean-scraper'] @@ -705,8 +744,6 @@ class OceanInfo(): self.islands[islename] = isle self.arches[archname][islename] = isle - progressreporter.stop() - def __str__(self): return `(self.islands, self.arches)` @@ -1359,22 +1396,27 @@ def do_pirate(pirates, bu): print '%s: %s,' % (`pirate`, info) print '}' -def prep_crew_of(args, bu, max_age=300): - if len(args) != 1: bu('crew-of takes one pirate name') +def prep_crewflag_of(args, bu, max_age, selector, constructor): + if len(args) != 1: bu('crew-of etc. take one pirate name') pi = PirateInfo(args[0], max_age) - if pi.crew is None: return None - return CrewInfo(pi.crew[0], max_age) + cf = selector(pi) + if cf is None: return None + return constructor(cf[0], max_age) + +def prep_crew_of(args, bu, max_age=300): + return prep_crewflag_of(args, bu, max_age, + (lambda pi: pi.crew), CrewInfo) + +def prep_flag_of(args, bu, max_age=300): + return prep_crewflag_of(args, bu, max_age, + (lambda pi: pi.flag), FlagInfo) def do_crew_of(args, bu): ci = prep_crew_of(args, bu) print ci def do_flag_of(args, bu): - if len(args) != 1: bu('flag-of takes one pirate name') - max_age = 300 - pi = PirateInfo(args[0], max_age) - if pi.flag is None: fi = None - else: fi = FlagInfo(pi.flag[0], max_age) + fi = prep_flag_of(args, bu) print fi def do_standings_crew_of(args, bu): @@ -1411,6 +1453,8 @@ def do_embargoes(args, bu): if isle.flag is None: return 'uncolonised' return isle.flag.name + progressreporter.stop() + for archname in sorted(oi.arches.keys()): print 'ARCHIPELAGO: ',archname for islename in sorted(oi.arches[archname].keys()): @@ -1422,14 +1466,64 @@ def do_embargoes(args, bu): if flag is None: print ''; continue delim = '' for rel in flag.relations: - (oname, oid, dummy, thisdeclaring, - odeclaringmin,odeclaringmax) = rel - if thisdeclaring >= 0: continue + if rel.this_declaring >= 0: continue wr(delim) - wr(oname) + wr(rel.other_flagname) delim = '; ' print '' +def do_embargoes_flag_of(args, bu): + progressreporter.doing('fetching flag info') + fi = prep_flag_of(args, bu) + if fi is None: + progressreporter.stop() + print 'Pirate is not in a flag.' + return + + oi = OceanInfo(IslandFlagInfo) + + progressreporter.stop() + print '' + + any = False + for islename in sorted(oi.islands.keys()): + isle = oi.islands[islename] + flag = isle.flag + if flag is None: continue + for rel in flag.relations: + if rel.this_declaring >= 0: continue + if rel.other_flagid != fi.flagid: continue + if not any: print 'EMBARGOED:' + any = True + print " %-30s (%s)" % (islename, flag.name) + if not any: + print 'No embargoes.' + print '' + + war_flag(fi) + print '' + +def do_war_flag_of(args, bu): + fi = prep_flag_of(args, bu) + war_flag(fi) + +def war_flag(fi): + any = False + for certain in [True, False]: + anythis = False + for rel in fi.relations: + if rel.this_declaring >= 0: continue + if (rel.other_declaring_max < 0) != certain: continue + if not anythis: + if certain: m = 'SINKING PvP' + else: m = 'RISK OF SINKING PvP' + print '%s (%s):' % (m, rel.yoweb_heading) + anythis = True + any = True + print " ", rel.other_flagname + if not any: + print 'No sinking PvP.' + #----- modes which use the chat log parser are quite complex ----- class ProgressPrintPercentage: @@ -1751,7 +1845,7 @@ class KeystrokeReader(DummyKeystrokeReader): #---------- main program ---------- def main(): - global opts, fetcher, yppedia + global opts, fetcher, yppedia, progressreporter pa = OptionParser( '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...] @@ -1760,6 +1854,8 @@ actions: yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG + yoweb-scrape [--ocean OCEAN ...] ocean|embargoes + yoweb-scrape [--ocean OCEAN ...] war-flag-of|embargoes-flag-of PIRATE yoweb-scrape [options] ship-aid CHAT-LOG (must be .../PIRATE_OCEAN_chat-log*) display modes (for --display) apply to ship-aid: @@ -1836,6 +1932,11 @@ display modes (for --display) apply to ship-aid: fetcher = Yoweb(opts.ocean, opts.cache_dir) yppedia = Yppedia(opts.cache_dir) + if opts.debug or not os.isatty(0): + progressreporter = NullProgressReporter() + else: + progressreporter = TypewriterProgressReporter() + mode_fn(args[1:], pa.error) main()