chiark / gitweb /
yoweb-scrape: wip new flag and ocean functionality - before rework waritem parser
authorIan Jackson <ian@liberator.relativity.greenend.org.uk>
Sun, 16 Jan 2011 14:38:14 +0000 (14:38 +0000)
committerIan Jackson <ian@liberator.relativity.greenend.org.uk>
Sun, 16 Jan 2011 14:38:14 +0000 (14:38 +0000)
yoweb-scrape

index 9b87ea3..68df216 100755 (executable)
@@ -387,6 +387,21 @@ class CrewInfo(SomethingSoupInfo):
                return `(self.crew, self.msgs)`
 
 class FlagInfo(SomethingSoupInfo):
+       # Public data members (after init):
+       #
+       #   name        #               string
+       #
+       #   relations[n] = (otherflagname, otherflagid, [stringfromyoweb],
+       #               thisdeclaring, otherdeclaringmin, otherdeclaringmax)
+       #               # where {this,other}declaring{,min,max} are:
+       #               #       -1      {this,other} is declaring war
+       #               #        0      {this,other} is not doing either
+       #               #       +1      {this,other} is allying
+       #   relation_byname[otherflagname] = relations[some_n]
+       #   relation_byid[otherflagname] = relations[some_n]
+       #
+       #   islands[n] = (islandname, islandid)
+       #
        def __init__(self, flagid, max_age=600):
                SomethingSoupInfo.__init__(self,
                        'flag/info.wm?flagid=', flagid, max_age)
@@ -394,13 +409,92 @@ class FlagInfo(SomethingSoupInfo):
 
        def _find_flag(self):
                font2 = self._soup.find('font',{'size':'+2'})
-               self.flag = font2.find('b').contents[0]
+               self.name = font2.find('b').contents[0]
+
+               self.relations = [ ]
+               self.relation_byname = { }
+               self.relation_byid = { }
+               self.islands = [ ]
+
                magnate = self._soup.find('img',{'src':
                        '/yoweb/images/repute-MAGNATE.png'})
                warinfo = (magnate.findParent('table').findParent('tr').
                        findNextSibling('tr').findNext('td',{'align':'left'}))
+
+               def warn(m):
+                       print >>sys.stderr, 'WARNING: '+m
+
+               def wi_warn(head, waritem):
+                       warn('unknown warmap item: %s: %s' % 
+                               (`head`, ``waritem``))
+
+               def wihelp_item(waritem, thing):
+                       if waritem.name == 'a':
+                               url = waritem.get('href', None)
+                               if url is None:
+                                       return ('no url for '+thing,None,None)
+                       else:
+                               hr = waritem.find('a',{'href':True})
+                               if not hr: return ('no a for '+thing,None,None)
+                               url = hr['href']
+                       m = regexp.search('\?'+thing+'id=(\d+)$', url)
+                       if not m: return ('no '+thing+'id',None,None)
+                       tid = m.group(1)
+                       tname = m.string
+                       if tname is None:
+                               return (thing+' name not just string',None,None)
+                       return (None,tid,tname)
+
+               def wi_alwar(head, waritem, thisdecl, othermin, othermax):
+                       (err,flagid,flagname) = wihelp_item(waritem,'flag')
+                       if err: return err
+                       rel = self.relation_byid.get(flagid, None)
+                       if rel: return 'flag id twice!'
+                       if flagname in self.relation_byname:
+                               return 'flag name twice!'
+                       rel = (flagname,flagid,[], thisdecl,othermin,othermax)
+                       self.relations.append(rel)
+                       self.relation_byid[flagid] = rel
+                       self.relation_byname[flagid] = rel
+
+               def wi_isle(head, waritem):
+                       (err,isleid,islename) = wihelp_item(waritem,'island')
+                       if err: return err
+                       self.islands.append((isleid,islename))
+
+               warmap = {
+                       'Allied with':                  (wi_alwar,+1,+1,+1),
+                       'Declaring war against':        (wi_alwar,-1, 0,+1),
+                       'At war with':                  (wi_alwar,-1,-1,-1),
+                       'Trying to form an alliance with': (wi_alwar,+1,-1,0),
+                       'Islands controlled by this flag': (wi_isle,),
+                       }
+
+               how = (wi_warn, None)
+
                for waritem in warinfo.contents:
-                       print 'ITEM ',`waritem`
+                       debug('WARITEM '+``waritem``)
+                       if isinstance(waritem, unicode):
+                               waritem = waritem.strip()
+                               if waritem: warn('unknown waritem '+``waritem``)
+                               continue
+                       if waritem.name == 'br':
+                               continue
+                       if waritem.name == 'b':
+                               head = ''.join(waritem.findAll(text=True))
+                               head = regexp.sub('\\s+', ' ', head).strip()
+                               head = head.rstrip(':')
+                               how = (head,) + warmap.get(head, (wi_warn,))
+                               continue
+                       debug('WARHOW %s(%s, waritem, *%s)' %
+                               (how[1], `how[0]`, `how[2:]`))
+                       bad = how[1](how[0], waritem, *how[2:])
+                       if bad:
+                               warn('bad waritem %s: %s: %s' % (`how[0]`,
+                                       bad, ``waritem``))
+
+       def __str__(self):
+               return `(self.name, self.islands, self.relations)`
 
 #---------- scraper for ocean info incl. embargoes etc. ----------