chiark / gitweb /
yoweb-scrape: wip new flag and ocean functionality
authorIan Jackson <ian@liberator.relativity.greenend.org.uk>
Sun, 16 Jan 2011 13:25:45 +0000 (13:25 +0000)
committerIan Jackson <ian@liberator.relativity.greenend.org.uk>
Sun, 16 Jan 2011 13:25:45 +0000 (13:25 +0000)
yoweb-scrape

index a705f67..9b87ea3 100755 (executable)
@@ -44,6 +44,7 @@ import random
 import curses
 import termios
 import random
+import subprocess
 from optparse import OptionParser
 from StringIO import StringIO
 
@@ -98,6 +99,14 @@ def format_time_interval(ti):
        if ti < 86400: return '%dh' % (ti / 3600)
        return '%dd' % (ti / 86400)
 
+def yppsc_dir():
+       lib = os.getenv("YPPSC_YARRG_SRCBASE")
+       if lib is not None: return lib
+       lib = sys.argv[0] 
+       lib = regexp.sub('/[^/]+$', '', lib)
+       os.environ["YPPSC_YARRG_SRCBASE"] = lib
+       return lib
+
 #---------- caching and rate-limiting data fetcher ----------
 
 class Fetcher:
@@ -377,6 +386,110 @@ class CrewInfo(SomethingSoupInfo):
        def __str__(self):
                return `(self.crew, self.msgs)`
 
+class FlagInfo(SomethingSoupInfo):
+       def __init__(self, flagid, max_age=600):
+               SomethingSoupInfo.__init__(self,
+                       'flag/info.wm?flagid=', flagid, max_age)
+               self._find_flag()
+
+       def _find_flag(self):
+               font2 = self._soup.find('font',{'size':'+2'})
+               self.flag = font2.find('b').contents[0]
+               magnate = self._soup.find('img',{'src':
+                       '/yoweb/images/repute-MAGNATE.png'})
+               warinfo = (magnate.findParent('table').findParent('tr').
+                       findNextSibling('tr').findNext('td',{'align':'left'}))
+               for waritem in warinfo.contents:
+                       print 'ITEM ',`waritem`
+
+#---------- scraper for ocean info incl. embargoes etc. ----------
+
+class IslandInfo():
+       def __init__(self, ocean, islename):
+               self.ocean = ocean
+               self.name = islename
+       def collect(self):
+               pass
+       def yppedia_dataf(self):
+               def q(x): return urllib.quote(x.replace(' ','_'))
+               url_rhs = q(self.name) + '_(' + q(self.ocean) + ')'
+               if opts.localhtml is None:
+                       url = 'http://yppedia.puzzlepirates.com/' + url_rhs
+                       debug('IslandInfo retrieving YPP '+url);
+                       return urllib.urlopen(url)
+               else:
+                       return file(opts.localhtml + '/' + url_rhs, 'r')
+       def yoweb_url(self):
+               soup = BeautifulSoup(self.yppedia_dataf())
+               content = soup.find('div', attrs = {'id': 'content'})
+               yoweb_re = regexp.compile('^http://\w+\.puzzlepirates\.com/'+
+                       'yoweb/island/info\.wm\?islandid=\d+$')
+               a = soup.find('a', attrs = { 'href': yoweb_re })
+               if a is None: return None
+               return a['href']
+       def ruling_flag_id(self):
+               yo = self.yoweb_url()
+               if yo is None: return None
+               dataf = fetcher.fetch(yo, 600)
+               soup = BeautifulSoup(dataf)
+               ruler_re = regexp.compile('http://\w+\.puzzlepirates\.com/'+
+                       'yoweb/flag/info\.wm\?flagid=(\d+)$')
+               ruler = soup.find('a', attrs = { 'href': ruler_re })
+               if not ruler: return None
+               m = ruler_re.find(ruler['href'])
+               return m.group(1)
+
+class OceanInfo():
+       # Public data attributes (valid after collect()):
+       #   oi.islands[islename] = IslandInfo(...)
+       #   oi.arches[archname][islename] = IslandInfo(...)
+       def __init__(self):
+               self.isleclass = IslandInfo
+               self.ocean = fetcher.ocean.lower().capitalize()
+       def collect(self):
+               cmdl = ['./yppedia-ocean-scraper']
+               if opts.localhtml is not None:
+                       cmdl += ['--local-html-dir',opts.localhtml]
+               cmdl += [self.ocean]
+               debug('OceanInfo collect running ' + `cmdl`)
+               oscraper = subprocess.Popen(
+                       cmdl,
+                       stdout = subprocess.PIPE,
+                       cwd = yppsc_dir()+'/yarrg',
+                       shell=False, stderr=None,
+                       )
+               h = oscraper.stdout.readline()
+               debug('OceanInfo collect h '+`h`)
+               assert(regexp.match('^ocean ', h))
+               arch_re = regexp.compile('^ (\S.*)')
+               island_re = regexp.compile('^  (\S.*)')
+
+               self.islands = { }
+               self.arches = { }
+               archname = None
+
+               for l in oscraper.stdout:
+                       debug('OceanInfo collect l '+`l`)
+                       l = l.rstrip('\n')
+                       m = island_re.match(l)
+                       if m:
+                               assert(archname is not None)
+                               islename = m.group(1)
+                               isle = self.isleclass(self.ocean, islename)
+                               isle.arch = archname
+                               self.islands[islename] = isle
+                               self.arches[archname][islename] = isle
+                               continue
+                       m = arch_re.match(l)
+                       if m:
+                               archname = m.group(1)
+                               assert(archname not in self.arches)
+                               self.arches[archname] = { }
+                               continue
+                       assert(False)
+               oscraper.wait()
+               assert(oscraper.returncode == 0)
+
 #---------- pretty-printer for tables of pirate puzzle standings ----------
 
 class StandingsTable:
@@ -1036,6 +1149,14 @@ def do_crew_of(args, bu):
        ci = prep_crew_of(args, bu)
        print ci
 
+def do_flag_of(args, bu):
+       if len(args) != 1: bu('flag-of takes one pirate name')
+       max_age = 300
+       pi = PirateInfo(args[0], max_age)
+       if pi.flag is None: fi = None
+       else: fi = FlagInfo(pi.flag[0], max_age)
+       print `fi`
+
 def do_standings_crew_of(args, bu):
        ci = prep_crew_of(args, bu, 60)
        tab = StandingsTable(sys.stdout)
@@ -1048,6 +1169,18 @@ def do_standings_crew_of(args, bu):
                        pi = PirateInfo(p, random.randint(900,1800))
                        tab.pirate(pi)
 
+def do_ocean(args, bu):
+       if (len(args)): bu('ocean takes no further arguments')
+       fetcher.default_ocean()
+       oi = OceanInfo()
+       oi.collect()
+       for islename in sorted(oi.islands.keys()):
+               isle = oi.islands[islename]
+               yoweb_url = isle.yoweb_url()
+               print " %s -- %s" % (islename, yoweb_url)
+
+#----- modes which use the chat log parser are quite complex -----
+
 class ProgressPrintPercentage:
        def __init__(self, f=sys.stdout):
                self._f = f
@@ -1063,8 +1196,6 @@ class ProgressPrintPercentage:
                self._f.write('                   \r')
                self._f.flush()
 
-#----- modes which use the chat log parser are quite complex -----
-
 def prep_chat_log(args, bu,
                progress=ProgressPrintPercentage(),
                max_myself_age=3600):
@@ -1398,6 +1529,9 @@ display modes (for --display) apply to ship-aid:
        ao('--display', action='store', dest='display',
                type='choice', choices=['dumb','overwrite'],
                help='how to display ship aid')
+       ao('--local-ypp-dir', action='store', dest='localhtml',
+               help='get yppedia pages from local directory LOCALHTML'+
+                       ' instead of via HTTP')
 
        ao_jt = lambda wh, t: ao(
                '--timeout-sa-'+wh, action='store', dest='timeout_'+wh,