chiark / gitweb /
website: mention that jarrg complies with new Third Party Software policy
[ypp-sc-tools.db-test.git] / yoweb-scrape
index ba828f51c0fb1a125e6ef729f82caacf0f7919b4..db5dc1338298a47a2c9120d2f93b168d07842ba5 100755 (executable)
@@ -110,19 +110,14 @@ def yppsc_dir():
 #---------- caching and rate-limiting data fetcher ----------
 
 class Fetcher:
-       def __init__(self, ocean, cachedir):
+       def __init__(self, cachedir):
                debug('Fetcher init %s' % cachedir)
-               self.ocean = ocean
                self.cachedir = cachedir
                try: os.mkdir(cachedir)
                except (OSError,IOError), oe:
                        if oe.errno != errno.EEXIST: raise
                self._cache_scan(time.time())
 
-       def default_ocean(self, ocean='ice'):
-               if self.ocean is None:
-                       self.ocean = ocean
-
        def _cache_scan(self, now):
                # returns list of ages, unsorted
                ages = []
@@ -204,12 +199,38 @@ class Fetcher:
                debug('Fetcher  stored')
                return data
 
+class Yoweb(Fetcher):
+       def __init__(self, ocean, cachedir):
+               debug('Yoweb init %s' % cachedir)
+               self.ocean = ocean
+               Fetcher.__init__(self, cachedir)
+
+       def default_ocean(self, ocean='ice'):
+               if self.ocean is None:
+                       self.ocean = ocean
+
        def yoweb(self, kind, tail, max_age):
                self.default_ocean()
+               assert(self.ocean)
                url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
                        self.ocean, kind, tail)
                return self.fetch(url, max_age)
 
+class Yppedia(Fetcher):
+       def __init__(self, cachedir):
+               debug('Yoweb init %s' % cachedir)
+               self.base = 'http://yppedia.puzzlepirates.com/'
+               self.localhtml = opts.localhtml
+               Fetcher.__init__(self, cachedir)
+
+       def __call__(self, rhs):
+               if self.localhtml is None:
+                       url = self.base + rhs
+                       debug('Yppedia retrieving YPP '+url);
+                       return self.fetch(url, 3000)
+               else:
+                       return file(opts.localhtml + '/' + rhs, 'r')
+
 #---------- logging assistance for troubled screenscrapers ----------
 
 class SoupLog:
@@ -340,12 +361,14 @@ u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-
 
 class CrewInfo(SomethingSoupInfo):
        # Public data members:
+       #  ci.crewid
        #  ci.crew = [ ('Captain',        ['Pirate', ...]),
        #              ('Senior Officer', [...]),
        #               ... ]
        #  pi.msgs = [ 'message describing problem with scrape' ]
 
        def __init__(self, crewid, max_age=300):
+               self.crewid = crewid
                SomethingSoupInfo.__init__(self,
                        'crew/info.wm?crewid=', crewid, max_age)
                self._find_crew()
@@ -386,23 +409,38 @@ class CrewInfo(SomethingSoupInfo):
        def __str__(self):
                return `(self.crew, self.msgs)`
 
+class FlagRelation():
+       # Public data members (put there by hand by creater)
+       #       other_flagname
+       #       other_flagid
+       #       yoweb_heading
+       #       this_declaring
+       #       other_declaring_min
+       #       other_declaring_max
+       # where {this,other}_declaring{,_min,_max} are:
+       #       -1      {this,other} is declaring war
+       #        0      {this,other} is not doing either
+       #       +1      {this,other} is allying
+       def __repr__(self):
+               return '<FlagRelation %s %d/%d..%d %s %s>' % (
+                       self.yoweb_heading, self.this_declaring,
+                       self.other_declaring_min, self.other_declaring_max,
+                       self.other_flagname, self.other_flagid)
+
 class FlagInfo(SomethingSoupInfo):
        # Public data members (after init):
        #
+       #   flagid
        #   name        #               string
        #
-       #   relations[n] = (otherflagname, otherflagid, [stringfromyoweb],
-       #               thisdeclaring, otherdeclaringmin, otherdeclaringmax)
-       #               # where {this,other}declaring{,min,max} are:
-       #               #       -1      {this,other} is declaring war
-       #               #        0      {this,other} is not doing either
-       #               #       +1      {this,other} is allying
+       #   relations[n] = FlagRelation
        #   relation_byname[otherflagname] = relations[some_n]
        #   relation_byid[otherflagname] = relations[some_n]
        #
        #   islands[n] = (islandname, islandid)
        #
        def __init__(self, flagid, max_age=600):
+               self.flagid = flagid
                SomethingSoupInfo.__init__(self,
                        'flag/info.wm?flagid=', flagid, max_age)
                self._find_flag()
@@ -447,7 +485,13 @@ class FlagInfo(SomethingSoupInfo):
                        if rel: return 'flag id twice!'
                        if flagname in self.relation_byname:
                                return 'flag name twice!'
-                       rel = (flagname,flagid,head, thisdecl,othermin,othermax)
+                       rel = FlagRelation()
+                       rel.other_flagname = flagname
+                       rel.other_flagid = flagid
+                       rel.yoweb_heading = head
+                       rel.this_declaring = thisdecl
+                       rel.other_declaring_min = othermin
+                       rel.other_declaring_max = othermax
                        self.relations.append(rel)
                        self.relation_byid[flagid] = rel
                        self.relation_byname[flagid] = rel
@@ -497,49 +541,133 @@ class FlagInfo(SomethingSoupInfo):
 
 #---------- scraper for ocean info incl. embargoes etc. ----------
 
-class IslandInfo():
+class IslandBasicInfo():
+       # Public data attributes:
+       #  ocean
+       #  name
+       # Public data attributes maybe set by caller:
+       #  arch
        def __init__(self, ocean, islename):
                self.ocean = ocean
                self.name = islename
-       def collect(self):
-               pass
-       def yppedia_dataf(self):
+       def yppedia(self):
                def q(x): return urllib.quote(x.replace(' ','_'))
                url_rhs = q(self.name) + '_(' + q(self.ocean) + ')'
-               if opts.localhtml is None:
-                       url = 'http://yppedia.puzzlepirates.com/' + url_rhs
-                       debug('IslandInfo retrieving YPP '+url);
-                       return urllib.urlopen(url)
-               else:
-                       return file(opts.localhtml + '/' + url_rhs, 'r')
-       def yoweb_url(self):
-               soup = BeautifulSoup(self.yppedia_dataf())
+               return yppedia(url_rhs)
+       def __str__(self):
+               return `(self.ocean, self.name)`
+
+class IslandExtendedInfo(IslandBasicInfo):
+       # Public data attributes (inherited):
+       #  ocean
+       #  name
+       # Public data attributes (additional):
+       #  islandid
+       #  yoweb_url
+       #  flagid
+       def __init__(self, ocean, islename):
+               IslandBasicInfo.__init__(self, ocean, islename)
+               self.islandid = None
+               self.yoweb_url = None
+               self._collect_yoweb()
+               self._collect_flagid()
+
+       def _collect_yoweb(self):
+               debug('IEI COLLECT YOWEB '+`self.name`)
+               self.islandid = None
+               self.yoweb_url = None
+
+               soup = BeautifulSoup(self.yppedia())
                content = soup.find('div', attrs = {'id': 'content'})
                yoweb_re = regexp.compile('^http://\w+\.puzzlepirates\.com/'+
-                       'yoweb/island/info\.wm\?islandid=\d+$')
+                       'yoweb/island/info\.wm\?islandid=(\d+)$')
                a = soup.find('a', attrs = { 'href': yoweb_re })
-               if a is None: return None
-               return a['href']
-       def ruling_flag_id(self):
-               yo = self.yoweb_url()
+               if a is None:
+                       debug('IEI COLLECT YOWEB '+`self.name`+' NONE')
+                       return
+
+               debug('IEI COLLECT YOWEB '+`self.name`+' GOT '+``a``)
+               self.yoweb_url = a['href']
+               m = yoweb_re.search(self.yoweb_url)
+               self.islandid = m.group(1)
+
+       def _collect_flagid(self):
+               self.flagid = None
+
+               yo = self.yoweb_url
+               debug('IEI COLLECT FLAGID '+`self.name`+' URL '+`yo`)
                if yo is None: return None
-               dataf = fetcher.fetch(yo, 600)
+               dataf = fetcher.fetch(yo, 1800)
                soup = BeautifulSoup(dataf)
-               ruler_re = regexp.compile('http://\w+\.puzzlepirates\.com/'+
-                       'yoweb/flag/info\.wm\?flagid=(\d+)$')
+               ruler_re = regexp.compile(
+                       '/yoweb/flag/info\.wm\?flagid=(\d+)$')
                ruler = soup.find('a', attrs = { 'href': ruler_re })
-               if not ruler: return None
-               m = ruler_re.find(ruler['href'])
-               return m.group(1)
+               if not ruler: 
+                       debug('IEI COLLECT FLAGID '+`self.name`+' NONE')
+                       return
+               debug('IEI COLLECT FLAGID '+`self.name`+' GOT '+``ruler``)
+               m = ruler_re.search(ruler['href'])
+               self.flagid = m.group(1)
+
+       def __str__(self):
+               return `(self.ocean, self.islandid, self.name,
+                       self.yoweb_url, self.flagid)`
+
+class IslandFlagInfo(IslandExtendedInfo):
+       # Public data attributes (inherited):
+       #  ocean
+       #  name
+       #  islandid
+       #  yoweb_url
+       #  flagid
+       # Public data attributes (additional):
+       #  flag
+       def __init__(self, ocean, islename):
+               IslandExtendedInfo.__init__(self, ocean, islename)
+               self.flag = None
+               self._collect_flag()
+
+       def _collect_flag(self):
+               if self.flagid is None: return
+               self.flag = FlagInfo(self.flagid, 1800)
+
+       def __str__(self):
+               return IslandExtendedInfo.__str__(self) + '; ' + str(self.flag)
+
+class NullProgressReporter():
+       def doing(self, msg): pass
+       def stop(self): pass
+
+class TypewriterProgressReporter():
+       def __init__(self):
+               self._l = 0
+       def doing(self,m):
+               self._doing(m + '...')
+       def _doing(self,m):
+               self._write('\r')
+               self._write(m)
+               less = self._l - len(m)
+               if less > 0:
+                       self._write(' ' * less)
+                       self._write('\b' * less)
+               self._l = len(m)
+               sys.stdout.flush()
+       def stop(self):
+               self._doing('')
+               self._l = 0
+       def _write(self,t):
+               sys.stdout.write(t)
 
 class OceanInfo():
-       # Public data attributes (valid after collect()):
+       # Public data attributes:
        #   oi.islands[islename] = IslandInfo(...)
        #   oi.arches[archname][islename] = IslandInfo(...)
-       def __init__(self):
-               self.isleclass = IslandInfo
+       def __init__(self, isleclass=IslandBasicInfo):
+               self.isleclass = isleclass
                self.ocean = fetcher.ocean.lower().capitalize()
-       def collect(self):
+
+               progressreporter.doing('fetching ocean info')
+
                cmdl = ['./yppedia-ocean-scraper']
                if opts.localhtml is not None:
                        cmdl += ['--local-html-dir',opts.localhtml]
@@ -557,10 +685,16 @@ class OceanInfo():
                arch_re = regexp.compile('^ (\S.*)')
                island_re = regexp.compile('^  (\S.*)')
 
+               oscraper.wait()
+               assert(oscraper.returncode == 0)
+
                self.islands = { }
                self.arches = { }
                archname = None
 
+               isles = [ ]
+               progressreporter.doing('parsing ocean info')
+
                for l in oscraper.stdout:
                        debug('OceanInfo collect l '+`l`)
                        l = l.rstrip('\n')
@@ -568,10 +702,7 @@ class OceanInfo():
                        if m:
                                assert(archname is not None)
                                islename = m.group(1)
-                               isle = self.isleclass(self.ocean, islename)
-                               isle.arch = archname
-                               self.islands[islename] = isle
-                               self.arches[archname][islename] = isle
+                               isles.append((archname, islename))
                                continue
                        m = arch_re.match(l)
                        if m:
@@ -580,8 +711,19 @@ class OceanInfo():
                                self.arches[archname] = { }
                                continue
                        assert(False)
-               oscraper.wait()
-               assert(oscraper.returncode == 0)
+
+               for i in xrange(0, len(isles)-1):
+                       (archname, islename) = isles[i]
+                       progressreporter.doing(
+                               'fetching isle info %2d/%d (%s: %s)'
+                               % (i, len(isles), archname, islename))
+                       isle = self.isleclass(self.ocean, islename)
+                       isle.arch = archname
+                       self.islands[islename] = isle
+                       self.arches[archname][islename] = isle
+
+       def __str__(self):
+               return `(self.islands, self.arches)`
 
 #---------- pretty-printer for tables of pirate puzzle standings ----------
 
@@ -1232,22 +1374,27 @@ def do_pirate(pirates, bu):
                print '%s: %s,' % (`pirate`, info)
        print '}'
 
-def prep_crew_of(args, bu, max_age=300):
-       if len(args) != 1: bu('crew-of takes one pirate name')
+def prep_crewflag_of(args, bu, max_age, selector, constructor):
+       if len(args) != 1: bu('crew-of etc. take one pirate name')
        pi = PirateInfo(args[0], max_age)
-       if pi.crew is None: return None
-       return CrewInfo(pi.crew[0], max_age)
+       cf = selector(pi)
+       if cf is None: return None
+       return constructor(cf[0], max_age)
+
+def prep_crew_of(args, bu, max_age=300):
+       return prep_crewflag_of(args, bu, max_age,
+               (lambda pi: pi.crew), CrewInfo)
+
+def prep_flag_of(args, bu, max_age=300):
+       return prep_crewflag_of(args, bu, max_age,
+               (lambda pi: pi.flag), FlagInfo)
 
 def do_crew_of(args, bu):
        ci = prep_crew_of(args, bu)
        print ci
 
 def do_flag_of(args, bu):
-       if len(args) != 1: bu('flag-of takes one pirate name')
-       max_age = 300
-       pi = PirateInfo(args[0], max_age)
-       if pi.flag is None: fi = None
-       else: fi = FlagInfo(pi.flag[0], max_age)
+       fi = prep_flag_of(args, bu)
        print fi
 
 def do_standings_crew_of(args, bu):
@@ -1265,12 +1412,95 @@ def do_standings_crew_of(args, bu):
 def do_ocean(args, bu):
        if (len(args)): bu('ocean takes no further arguments')
        fetcher.default_ocean()
-       oi = OceanInfo()
-       oi.collect()
+       oi = OceanInfo(IslandFlagInfo)
+       print oi
        for islename in sorted(oi.islands.keys()):
                isle = oi.islands[islename]
-               yoweb_url = isle.yoweb_url()
-               print " %s -- %s" % (islename, yoweb_url)
+               print isle
+
+def do_embargoes(args, bu):
+       if (len(args)): bu('ocean takes no further arguments')
+       fetcher.default_ocean()
+       oi = OceanInfo(IslandFlagInfo)
+       wr = sys.stdout.write
+       print ('EMBARGOES:  Island    | Owning flag'+
+               '                    | Embargoed flags')
+
+       def getflname(isle):
+               if isle.islandid is None: return 'uncolonisable'
+               if isle.flag is None: return 'uncolonised'
+               return isle.flag.name
+
+       progressreporter.stop()
+
+       for archname in sorted(oi.arches.keys()):
+               print 'ARCHIPELAGO: ',archname
+               for islename in sorted(oi.arches[archname].keys()):
+                       isle = oi.islands[islename]
+                       wr(' %-20s | ' % isle.name)
+                       flname = getflname(isle)
+                       wr('%-30s | ' % flname)
+                       flag = isle.flag
+                       if flag is None: print ''; continue
+                       delim = ''
+                       for rel in flag.relations:
+                               if rel.this_declaring >= 0: continue
+                               wr(delim)
+                               wr(rel.other_flagname)
+                               delim = '; '
+                       print ''
+
+def do_embargoes_flag_of(args, bu):
+       progressreporter.doing('fetching flag info')
+       fi = prep_flag_of(args, bu)
+       if fi is None:
+               progressreporter.stop()
+               print 'Pirate is not in a flag.'
+               return
+
+       oi = OceanInfo(IslandFlagInfo)
+
+       progressreporter.stop()
+       print ''
+
+       any = False
+       for islename in sorted(oi.islands.keys()):
+               isle = oi.islands[islename]
+               flag = isle.flag
+               if flag is None: continue
+               for rel in flag.relations:
+                       if rel.this_declaring >= 0: continue
+                       if rel.other_flagid != fi.flagid: continue
+                       if not any: print 'EMBARGOED:'
+                       any = True
+                       print "  %-30s (%s)" % (islename, flag.name)
+       if not any:
+               print 'No embargoes.'
+       print ''
+
+       war_flag(fi)
+       print ''
+
+def do_war_flag_of(args, bu):
+       fi = prep_flag_of(args, bu)
+       war_flag(fi)
+
+def war_flag(fi):
+       any = False
+       for certain in [True, False]:
+               anythis = False
+               for rel in fi.relations:
+                       if rel.this_declaring >= 0: continue
+                       if (rel.other_declaring_max < 0) != certain: continue
+                       if not anythis:
+                               if certain: m = 'SINKING PvP'
+                               else: m = 'RISK OF SINKING PvP'
+                               print '%s (%s):' % (m, rel.yoweb_heading)
+                       anythis = True
+                       any = True
+                       print " ", rel.other_flagname
+       if not any:
+               print 'No sinking PvP.'
 
 #----- modes which use the chat log parser are quite complex -----
 
@@ -1593,7 +1823,7 @@ class KeystrokeReader(DummyKeystrokeReader):
 #---------- main program ----------
 
 def main():
-       global opts, fetcher
+       global opts, fetcher, yppedia, progressreporter
 
        pa = OptionParser(
 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
@@ -1602,6 +1832,8 @@ actions:
  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
+ yoweb-scrape [--ocean OCEAN ...] ocean|embargoes
+ yoweb-scrape [--ocean OCEAN ...] war-flag-of|embargoes-flag-of PIRATE
  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
 
 display modes (for --display) apply to ship-aid:
@@ -1675,7 +1907,13 @@ display modes (for --display) apply to ship-aid:
                else:
                        opts.display = 'overwrite'
 
-       fetcher = Fetcher(opts.ocean, opts.cache_dir)
+       fetcher = Yoweb(opts.ocean, opts.cache_dir)
+       yppedia = Yppedia(opts.cache_dir)
+
+       if opts.debug or not os.isatty(0):
+                progressreporter = NullProgressReporter()
+       else:
+               progressreporter = TypewriterProgressReporter()
 
        mode_fn(args[1:], pa.error)