chiark / gitweb /
yoweb-scrape: wip new flag and ocean functionality - before rework waritem parser
[ypp-sc-tools.main.git] / yoweb-scrape
index 3ef87609e0d268087a07055557c4446dfc5646f1..68df2163812afe1b409937bd3fff9c24147073f7 100755 (executable)
@@ -44,6 +44,7 @@ import random
 import curses
 import termios
 import random
+import subprocess
 from optparse import OptionParser
 from StringIO import StringIO
 
@@ -98,6 +99,14 @@ def format_time_interval(ti):
        if ti < 86400: return '%dh' % (ti / 3600)
        return '%dd' % (ti / 86400)
 
+def yppsc_dir():
+       lib = os.getenv("YPPSC_YARRG_SRCBASE")
+       if lib is not None: return lib
+       lib = sys.argv[0] 
+       lib = regexp.sub('/[^/]+$', '', lib)
+       os.environ["YPPSC_YARRG_SRCBASE"] = lib
+       return lib
+
 #---------- caching and rate-limiting data fetcher ----------
 
 class Fetcher:
@@ -377,6 +386,204 @@ class CrewInfo(SomethingSoupInfo):
        def __str__(self):
                return `(self.crew, self.msgs)`
 
+class FlagInfo(SomethingSoupInfo):
+       # Public data members (after init):
+       #
+       #   name        #               string
+       #
+       #   relations[n] = (otherflagname, otherflagid, [stringfromyoweb],
+       #               thisdeclaring, otherdeclaringmin, otherdeclaringmax)
+       #               # where {this,other}declaring{,min,max} are:
+       #               #       -1      {this,other} is declaring war
+       #               #        0      {this,other} is not doing either
+       #               #       +1      {this,other} is allying
+       #   relation_byname[otherflagname] = relations[some_n]
+       #   relation_byid[otherflagname] = relations[some_n]
+       #
+       #   islands[n] = (islandname, islandid)
+       #
+       def __init__(self, flagid, max_age=600):
+               SomethingSoupInfo.__init__(self,
+                       'flag/info.wm?flagid=', flagid, max_age)
+               self._find_flag()
+
+       def _find_flag(self):
+               font2 = self._soup.find('font',{'size':'+2'})
+               self.name = font2.find('b').contents[0]
+
+               self.relations = [ ]
+               self.relation_byname = { }
+               self.relation_byid = { }
+               self.islands = [ ]
+
+               magnate = self._soup.find('img',{'src':
+                       '/yoweb/images/repute-MAGNATE.png'})
+               warinfo = (magnate.findParent('table').findParent('tr').
+                       findNextSibling('tr').findNext('td',{'align':'left'}))
+
+               def warn(m):
+                       print >>sys.stderr, 'WARNING: '+m
+
+               def wi_warn(head, waritem):
+                       warn('unknown warmap item: %s: %s' % 
+                               (`head`, ``waritem``))
+
+               def wihelp_item(waritem, thing):
+                       if waritem.name == 'a':
+                               url = waritem.get('href', None)
+                               if url is None:
+                                       return ('no url for '+thing,None,None)
+                       else:
+                               hr = waritem.find('a',{'href':True})
+                               if not hr: return ('no a for '+thing,None,None)
+                               url = hr['href']
+                       m = regexp.search('\?'+thing+'id=(\d+)$', url)
+                       if not m: return ('no '+thing+'id',None,None)
+                       tid = m.group(1)
+                       tname = m.string
+                       if tname is None:
+                               return (thing+' name not just string',None,None)
+                       return (None,tid,tname)
+
+               def wi_alwar(head, waritem, thisdecl, othermin, othermax):
+                       (err,flagid,flagname) = wihelp_item(waritem,'flag')
+                       if err: return err
+                       rel = self.relation_byid.get(flagid, None)
+                       if rel: return 'flag id twice!'
+                       if flagname in self.relation_byname:
+                               return 'flag name twice!'
+                       rel = (flagname,flagid,[], thisdecl,othermin,othermax)
+                       self.relations.append(rel)
+                       self.relation_byid[flagid] = rel
+                       self.relation_byname[flagid] = rel
+
+               def wi_isle(head, waritem):
+                       (err,isleid,islename) = wihelp_item(waritem,'island')
+                       if err: return err
+                       self.islands.append((isleid,islename))
+
+               warmap = {
+                       'Allied with':                  (wi_alwar,+1,+1,+1),
+                       'Declaring war against':        (wi_alwar,-1, 0,+1),
+                       'At war with':                  (wi_alwar,-1,-1,-1),
+                       'Trying to form an alliance with': (wi_alwar,+1,-1,0),
+                       'Islands controlled by this flag': (wi_isle,),
+                       }
+
+               how = (wi_warn, None)
+
+               for waritem in warinfo.contents:
+                       debug('WARITEM '+``waritem``)
+                       if isinstance(waritem, unicode):
+                               waritem = waritem.strip()
+                               if waritem: warn('unknown waritem '+``waritem``)
+                               continue
+                       if waritem.name == 'br':
+                               continue
+                       if waritem.name == 'b':
+                               head = ''.join(waritem.findAll(text=True))
+                               head = regexp.sub('\\s+', ' ', head).strip()
+                               head = head.rstrip(':')
+                               how = (head,) + warmap.get(head, (wi_warn,))
+                               continue
+                       debug('WARHOW %s(%s, waritem, *%s)' %
+                               (how[1], `how[0]`, `how[2:]`))
+                       bad = how[1](how[0], waritem, *how[2:])
+                       if bad:
+                               warn('bad waritem %s: %s: %s' % (`how[0]`,
+                                       bad, ``waritem``))
+
+       def __str__(self):
+               return `(self.name, self.islands, self.relations)`
+
+#---------- scraper for ocean info incl. embargoes etc. ----------
+
+class IslandInfo():
+       def __init__(self, ocean, islename):
+               self.ocean = ocean
+               self.name = islename
+       def collect(self):
+               pass
+       def yppedia_dataf(self):
+               def q(x): return urllib.quote(x.replace(' ','_'))
+               url_rhs = q(self.name) + '_(' + q(self.ocean) + ')'
+               if opts.localhtml is None:
+                       url = 'http://yppedia.puzzlepirates.com/' + url_rhs
+                       debug('IslandInfo retrieving YPP '+url);
+                       return urllib.urlopen(url)
+               else:
+                       return file(opts.localhtml + '/' + url_rhs, 'r')
+       def yoweb_url(self):
+               soup = BeautifulSoup(self.yppedia_dataf())
+               content = soup.find('div', attrs = {'id': 'content'})
+               yoweb_re = regexp.compile('^http://\w+\.puzzlepirates\.com/'+
+                       'yoweb/island/info\.wm\?islandid=\d+$')
+               a = soup.find('a', attrs = { 'href': yoweb_re })
+               if a is None: return None
+               return a['href']
+       def ruling_flag_id(self):
+               yo = self.yoweb_url()
+               if yo is None: return None
+               dataf = fetcher.fetch(yo, 600)
+               soup = BeautifulSoup(dataf)
+               ruler_re = regexp.compile('http://\w+\.puzzlepirates\.com/'+
+                       'yoweb/flag/info\.wm\?flagid=(\d+)$')
+               ruler = soup.find('a', attrs = { 'href': ruler_re })
+               if not ruler: return None
+               m = ruler_re.find(ruler['href'])
+               return m.group(1)
+
+class OceanInfo():
+       # Public data attributes (valid after collect()):
+       #   oi.islands[islename] = IslandInfo(...)
+       #   oi.arches[archname][islename] = IslandInfo(...)
+       def __init__(self):
+               self.isleclass = IslandInfo
+               self.ocean = fetcher.ocean.lower().capitalize()
+       def collect(self):
+               cmdl = ['./yppedia-ocean-scraper']
+               if opts.localhtml is not None:
+                       cmdl += ['--local-html-dir',opts.localhtml]
+               cmdl += [self.ocean]
+               debug('OceanInfo collect running ' + `cmdl`)
+               oscraper = subprocess.Popen(
+                       cmdl,
+                       stdout = subprocess.PIPE,
+                       cwd = yppsc_dir()+'/yarrg',
+                       shell=False, stderr=None,
+                       )
+               h = oscraper.stdout.readline()
+               debug('OceanInfo collect h '+`h`)
+               assert(regexp.match('^ocean ', h))
+               arch_re = regexp.compile('^ (\S.*)')
+               island_re = regexp.compile('^  (\S.*)')
+
+               self.islands = { }
+               self.arches = { }
+               archname = None
+
+               for l in oscraper.stdout:
+                       debug('OceanInfo collect l '+`l`)
+                       l = l.rstrip('\n')
+                       m = island_re.match(l)
+                       if m:
+                               assert(archname is not None)
+                               islename = m.group(1)
+                               isle = self.isleclass(self.ocean, islename)
+                               isle.arch = archname
+                               self.islands[islename] = isle
+                               self.arches[archname][islename] = isle
+                               continue
+                       m = arch_re.match(l)
+                       if m:
+                               archname = m.group(1)
+                               assert(archname not in self.arches)
+                               self.arches[archname] = { }
+                               continue
+                       assert(False)
+               oscraper.wait()
+               assert(oscraper.returncode == 0)
+
 #---------- pretty-printer for tables of pirate puzzle standings ----------
 
 class StandingsTable:
@@ -681,6 +888,56 @@ class ChatLogTracker:
                else:
                        return None
 
+       def local_command(self, metacmd):
+               # returns None if all went well, or problem message
+               return self._command(self._myself.name, metacmd,
+                       "local", time.time(), 
+                       (lambda m: debug('CMD %s' % metacmd)))
+
+       def _command(self, cmdr, metacmd, chan, timestamp, d):
+               # returns None if all went well, or problem message
+               metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
+               m2 = regexp.match(
+                   '/([adj]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
+                   metacmd)
+               if not m2: return "unknown syntax or command"
+
+               (cmd, pattern, targets) = m2.groups()
+               dml = ['cmd', chan, cmd]
+
+               if cmd == 'a': each = self._onboard_event
+               elif cmd == 'd': each = disembark
+               else: each = lambda *l: self._onboard_event(*l,
+                               **{'jobber':'applied'})
+
+               if cmdr == self._myself.name:
+                       dml.append('self')
+                       how = 'cmd: %s' % cmd
+               else:
+                       dml.append('other')
+                       how = 'cmd: %s %s' % (cmd,cmdr)
+
+               if cmd == 'j':
+                       if pattern is not None:
+                               return "/j command does not take a vessel"
+                       v = None
+               else:
+                       v = self._find_matching_vessel(
+                               pattern, timestamp, cmdr,
+                               dml, create=True)
+
+               if cmd == 'j' or v is not None:
+                       targets = targets.strip().split(' ')
+                       dml.append(`len(targets)`)
+                       for target in targets:
+                               each(v, timestamp, target.title(), how)
+                       self._vessel_updated(v, timestamp)
+
+               dm = ' '.join(dml)
+               return d(dm)
+
+               return None
+
        def chatline(self,l):
                rm = lambda re: regexp.match(re,l)
                d = lambda m: self._debug_line_disposition(timestamp,l,m)
@@ -783,40 +1040,12 @@ class ChatLogTracker:
 
                def chat_metacmd(chan):
                        (cmdr, metacmd) = m.groups()
-                       metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
-                       m2 = regexp.match(
-                           '/([adj]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
-                           metacmd)
-                       if not m2: return chat(chan)
-
-                       (cmd, pattern, targets) = m2.groups()
-                       dml = ['cmd', chan, cmd]
-
-                       if cmd == 'a': each = self._onboard_event
-                       elif cmd == 'd': each = disembark
-                       else: each = lambda v,ts,tt,h: self._onboard_event(
-                                       None,ts,tt,h, jobber='applied')
-
-                       if cmdr == self._myself.name:
-                               dml.append('self')
-                               how = 'cmd: %s' % cmd
+                       whynot = self._command(
+                               cmdr, metacmd, chan, timestamp, d)
+                       if whynot is not None:
+                               return chat(chan)
                        else:
-                               dml.append('other')
-                               how = 'cmd: %s %s' % (cmd,cmdr)
-
-                       v = self._find_matching_vessel(
-                               pattern, timestamp, cmdr, dml, create=True)
-
-                       if v is not None:
-                               targets = targets.strip().split(' ')
-                               dml.append(`len(targets)`)
-                               for target in targets:
-                                       each(v, timestamp, target.title(), how)
-                               self._vessel_updated(v, timestamp)
-
-                       dm = ' '.join(dml)
-                       chat_core(cmdr, 'cmd '+chan)
-                       return d(dm)
+                               chat_core(cmdr, 'cmd '+chan)
 
                m = rm('(\\w+) (?:issued an order|ordered everyone) "')
                if m: return ob1('general order');
@@ -1014,6 +1243,14 @@ def do_crew_of(args, bu):
        ci = prep_crew_of(args, bu)
        print ci
 
+def do_flag_of(args, bu):
+       if len(args) != 1: bu('flag-of takes one pirate name')
+       max_age = 300
+       pi = PirateInfo(args[0], max_age)
+       if pi.flag is None: fi = None
+       else: fi = FlagInfo(pi.flag[0], max_age)
+       print `fi`
+
 def do_standings_crew_of(args, bu):
        ci = prep_crew_of(args, bu, 60)
        tab = StandingsTable(sys.stdout)
@@ -1026,6 +1263,18 @@ def do_standings_crew_of(args, bu):
                        pi = PirateInfo(p, random.randint(900,1800))
                        tab.pirate(pi)
 
+def do_ocean(args, bu):
+       if (len(args)): bu('ocean takes no further arguments')
+       fetcher.default_ocean()
+       oi = OceanInfo()
+       oi.collect()
+       for islename in sorted(oi.islands.keys()):
+               isle = oi.islands[islename]
+               yoweb_url = isle.yoweb_url()
+               print " %s -- %s" % (islename, yoweb_url)
+
+#----- modes which use the chat log parser are quite complex -----
+
 class ProgressPrintPercentage:
        def __init__(self, f=sys.stdout):
                self._f = f
@@ -1041,8 +1290,6 @@ class ProgressPrintPercentage:
                self._f.write('                   \r')
                self._f.flush()
 
-#----- modes which use the chat log parser are quite complex -----
-
 def prep_chat_log(args, bu,
                progress=ProgressPrintPercentage(),
                max_myself_age=3600):
@@ -1214,15 +1461,27 @@ def ship_aid_core(myself, track, displayer, kreader):
        rotate_nya = '/-\\'
 
        sort = NameSorter()
+       clicmd = None
+       clierr = None
+       cliexec = None
 
        while True:
                track.catchup()
                now = time.time()
 
-               (vn, s) = find_vessel()
-               s = track.myname() + s
-               s += " at %s" % time.strftime("%Y-%m-%d %H:%M:%S")
-               s += kreader.info()
+               (vn, vs) = find_vessel()
+
+               s = ''
+               if cliexec is not None:
+                       s += '...'
+               elif clierr is not None:
+                       s += 'Error: '+clierr
+               elif clicmd is not None:
+                       s += '/' + clicmd
+               else:
+                       s = track.myname() + vs
+                       s += " at %s" % time.strftime("%Y-%m-%d %H:%M:%S")
+                       s += kreader.info()
                s += '\n'
 
                tbl_s = StringIO()
@@ -1263,11 +1522,34 @@ def ship_aid_core(myself, track, displayer, kreader):
                displayer.show(s)
                tbl_s.close()
 
+               if cliexec is not None:
+                       clierr = track.local_command("/"+cliexec.strip())
+                       cliexec = None
+                       continue
+
                k = kreader.getch()
                if k is None:
                        rotate_nya = rotate_nya[1:3] + rotate_nya[0]
                        continue
 
+               if clierr is not None:
+                       clierr = None
+                       continue
+
+               if clicmd is not None:
+                       if k == '\r' or k == '\n':
+                               cliexec = clicmd
+                               clicmd = clicmdbase
+                       elif k == '\e' and clicmd != "":
+                               clicmd = clicmdbase
+                       elif k == '\33':
+                               clicmd = None
+                       elif k == '\b' or k == '\177':
+                               clicmd = clicmd[ 0 : len(clicmd)-1 ]
+                       else:
+                               clicmd += k
+                       continue
+
                if k == 'q': break
                elif k == 'g': sort = SkillSorter('Gunning')
                elif k == 'c': sort = SkillSorter('Carpentry')
@@ -1277,6 +1559,8 @@ def ship_aid_core(myself, track, displayer, kreader):
                elif k == 'd': sort = SkillSorter('Battle Navigation')
                elif k == 't': sort = SkillSorter('Treasure Haul')
                elif k == 'a': sort = NameSorter()
+               elif k == '/': clicmdbase = ""; clicmd = clicmdbase
+               elif k == '+': clicmdbase = "a "; clicmd = clicmdbase
                else: pass # unknown key command
 
 #---------- individual keystroke input ----------
@@ -1339,6 +1623,9 @@ display modes (for --display) apply to ship-aid:
        ao('--display', action='store', dest='display',
                type='choice', choices=['dumb','overwrite'],
                help='how to display ship aid')
+       ao('--local-ypp-dir', action='store', dest='localhtml',
+               help='get yppedia pages from local directory LOCALHTML'+
+                       ' instead of via HTTP')
 
        ao_jt = lambda wh, t: ao(
                '--timeout-sa-'+wh, action='store', dest='timeout_'+wh,