chiark / gitweb /
query_routesearch: fix to not complain about >=
[ypp-sc-tools.web-test.git] / yoweb-scrape
index f9976b8341a14bf96aa5d851a7691c8b13df0ca2..717b539131d8ffb0e4ba6abba5f6cddc3b38b199 100755 (executable)
@@ -45,6 +45,7 @@ import curses
 import termios
 import random
 import subprocess
+import copy
 from optparse import OptionParser
 from StringIO import StringIO
 
@@ -107,6 +108,18 @@ def yppsc_dir():
        os.environ["YPPSC_YARRG_SRCBASE"] = lib
        return lib
 
+soup_massage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
+soup_massage.append(
+               (regexp.compile('(\<td.*") ("center")'),
+                lambda m: m.group(1)+' align='+m.group(2))
+       )
+
+def make_soup(*args, **kwargs):
+       return BeautifulSoup(*args,
+               convertEntities=BeautifulSoup.HTML_ENTITIES,
+               markupMassage=soup_massage,
+                        **kwargs)
+
 #---------- caching and rate-limiting data fetcher ----------
 
 class Fetcher:
@@ -118,12 +131,24 @@ class Fetcher:
                        if oe.errno != errno.EEXIST: raise
                self._cache_scan(time.time())
 
-       def _cache_scan(self, now):
+       def _match_url_normalise(self, url):
+               without_scheme = regexp.sub('^[a-z]+://', '', url)
+               without_tail = regexp.sub('/.*', '', without_scheme)
+               return without_tail
+
+       def _cache_scan(self, now, match_url=None):
                # returns list of ages, unsorted
+               if match_url is not None:
+                       match_url = self._match_url_normalise(match_url)
                ages = []
                debug('Fetcher   scan_cache')
                for leaf in os.listdir(self.cachedir):
                        if not leaf.startswith('#'): continue
+                       if match_url is not None:
+                               leaf_url = urllib.unquote_plus(leaf.strip('#'))
+                               leaf_url = self._match_url_normalise(leaf_url)
+                               if leaf_url != match_url:
+                                       continue
                        path = self.cachedir + '/' + leaf
                        try: s = os.stat(path)
                        except (OSError,IOError), oe:
@@ -139,8 +164,8 @@ class Fetcher:
                        ages.append(age)
                return ages
 
-       def need_wait(self, now, imaginary=[]):
-               ages = self._cache_scan(now)
+       def need_wait(self, now, imaginary=[], next_url=None):
+               ages = self._cache_scan(now, match_url=next_url)
                ages += imaginary
                ages.sort()
                debug('Fetcher   ages ' + `ages`)
@@ -152,15 +177,14 @@ class Fetcher:
                                        (min_age, age))
                                need_wait = max(need_wait, min_age - age)
                        min_age += 3
-                       min_age *= 1.25
                if need_wait > 0:
                        need_wait += random.random() - 0.5
                return need_wait
 
-       def _rate_limit_cache_clean(self, now):
-               need_wait = self.need_wait(now)
+       def _rate_limit_cache_clean(self, now, next_url=None):
+               need_wait = self.need_wait(now, next_url=next_url)
                if need_wait > 0:
-                       debug('Fetcher   wait %d' % need_wait)
+                       debug('Fetcher   wait %f' % need_wait)
                        sleep(need_wait)
 
        def fetch(self, url, max_age):
@@ -186,7 +210,7 @@ class Fetcher:
                        return data
 
                debug('Fetcher  fetch')
-               self._rate_limit_cache_clean(now)
+               self._rate_limit_cache_clean(now, next_url=url)
 
                stream = urllib2.urlopen(url)
                data = stream.read()
@@ -252,9 +276,7 @@ class SomethingSoupInfo(SoupLog):
        def __init__(self, kind, tail, max_age):
                SoupLog.__init__(self)
                html = fetcher.yoweb(kind, tail, max_age)
-               self._soup = BeautifulSoup(html,
-                       convertEntities=BeautifulSoup.HTML_ENTITIES
-                       )
+               self._soup = make_soup(html)
 
 #---------- scraper for pirate pages ----------
 
@@ -409,18 +431,31 @@ class CrewInfo(SomethingSoupInfo):
        def __str__(self):
                return `(self.crew, self.msgs)`
 
+class FlagRelation():
+       # Public data members (put there by hand by creater)
+       #       other_flagname
+       #       other_flagid
+       #       yoweb_heading
+       #       this_declaring
+       #       other_declaring_min
+       #       other_declaring_max
+       # where {this,other}_declaring{,_min,_max} are:
+       #       -1      {this,other} is declaring war
+       #        0      {this,other} is not doing either
+       #       +1      {this,other} is allying
+       def __repr__(self):
+               return '<FlagRelation %s %d/%d..%d %s %s>' % (
+                       self.yoweb_heading, self.this_declaring,
+                       self.other_declaring_min, self.other_declaring_max,
+                       self.other_flagname, self.other_flagid)
+
 class FlagInfo(SomethingSoupInfo):
        # Public data members (after init):
        #
        #   flagid
        #   name        #               string
        #
-       #   relations[n] = (otherflagname, otherflagid, [stringfromyoweb],
-       #               thisdeclaring, otherdeclaringmin, otherdeclaringmax)
-       #               # where {this,other}declaring{,min,max} are:
-       #               #       -1      {this,other} is declaring war
-       #               #        0      {this,other} is not doing either
-       #               #       +1      {this,other} is allying
+       #   relations[n] = FlagRelation
        #   relation_byname[otherflagname] = relations[some_n]
        #   relation_byid[otherflagname] = relations[some_n]
        #
@@ -472,7 +507,13 @@ class FlagInfo(SomethingSoupInfo):
                        if rel: return 'flag id twice!'
                        if flagname in self.relation_byname:
                                return 'flag name twice!'
-                       rel = (flagname,flagid,head, thisdecl,othermin,othermax)
+                       rel = FlagRelation()
+                       rel.other_flagname = flagname
+                       rel.other_flagid = flagid
+                       rel.yoweb_heading = head
+                       rel.this_declaring = thisdecl
+                       rel.other_declaring_min = othermin
+                       rel.other_declaring_max = othermax
                        self.relations.append(rel)
                        self.relation_byid[flagid] = rel
                        self.relation_byname[flagid] = rel
@@ -558,7 +599,7 @@ class IslandExtendedInfo(IslandBasicInfo):
                self.islandid = None
                self.yoweb_url = None
 
-               soup = BeautifulSoup(self.yppedia())
+               soup = make_soup(self.yppedia())
                content = soup.find('div', attrs = {'id': 'content'})
                yoweb_re = regexp.compile('^http://\w+\.puzzlepirates\.com/'+
                        'yoweb/island/info\.wm\?islandid=(\d+)$')
@@ -579,7 +620,7 @@ class IslandExtendedInfo(IslandBasicInfo):
                debug('IEI COLLECT FLAGID '+`self.name`+' URL '+`yo`)
                if yo is None: return None
                dataf = fetcher.fetch(yo, 1800)
-               soup = BeautifulSoup(dataf)
+               soup = make_soup(dataf)
                ruler_re = regexp.compile(
                        '/yoweb/flag/info\.wm\?flagid=(\d+)$')
                ruler = soup.find('a', attrs = { 'href': ruler_re })
@@ -1376,6 +1417,7 @@ def do_crew_of(args, bu):
 
 def do_flag_of(args, bu):
        fi = prep_flag_of(args, bu)
+       print fi
 
 def do_standings_crew_of(args, bu):
        ci = prep_crew_of(args, bu, 60)
@@ -1424,34 +1466,63 @@ def do_embargoes(args, bu):
                        if flag is None: print ''; continue
                        delim = ''
                        for rel in flag.relations:
-                               (oname, oid, dummy, thisdeclaring,
-                                       odeclaringmin,odeclaringmax) = rel
-                               if thisdeclaring >= 0: continue
+                               if rel.this_declaring >= 0: continue
                                wr(delim)
-                               wr(oname)
+                               wr(rel.other_flagname)
                                delim = '; '
                        print ''
 
 def do_embargoes_flag_of(args, bu):
        progressreporter.doing('fetching flag info')
        fi = prep_flag_of(args, bu)
+       if fi is None:
+               progressreporter.stop()
+               print 'Pirate is not in a flag.'
+               return
+
        oi = OceanInfo(IslandFlagInfo)
+
        progressreporter.stop()
+       print ''
+
        any = False
        for islename in sorted(oi.islands.keys()):
                isle = oi.islands[islename]
                flag = isle.flag
                if flag is None: continue
                for rel in flag.relations:
-                       (oname, oid, dummy, thisdeclaring,
-                               odeclaringmin,odeclaringmax) = rel
-                       if thisdeclaring >= 0: continue
-                       if oid != fi.flagid: continue
+                       if rel.this_declaring >= 0: continue
+                       if rel.other_flagid != fi.flagid: continue
                        if not any: print 'EMBARGOED:'
                        any = True
-                       print " %-30s (%s)" % (islename, flag.name)
+                       print "  %-30s (%s)" % (islename, flag.name)
        if not any:
                print 'No embargoes.'
+       print ''
+
+       war_flag(fi)
+       print ''
+
+def do_war_flag_of(args, bu):
+       fi = prep_flag_of(args, bu)
+       war_flag(fi)
+
+def war_flag(fi):
+       any = False
+       for certain in [True, False]:
+               anythis = False
+               for rel in fi.relations:
+                       if rel.this_declaring >= 0: continue
+                       if (rel.other_declaring_max < 0) != certain: continue
+                       if not anythis:
+                               if certain: m = 'SINKING PvP'
+                               else: m = 'RISK OF SINKING PvP'
+                               print '%s (%s):' % (m, rel.yoweb_heading)
+                       anythis = True
+                       any = True
+                       print " ", rel.other_flagname
+       if not any:
+               print 'No sinking PvP.'
 
 #----- modes which use the chat log parser are quite complex -----
 
@@ -1784,7 +1855,7 @@ actions:
  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
  yoweb-scrape [--ocean OCEAN ...] ocean|embargoes
- yoweb-scrape [--ocean OCEAN ...] embargoes-flag-of PIRATE
+ yoweb-scrape [--ocean OCEAN ...] war-flag-of|embargoes-flag-of PIRATE
  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
 
 display modes (for --display) apply to ship-aid: