chiark / gitweb /
yoweb-scrape: wip new flag and ocean functionality - can parse a flag now
authorIan Jackson <ian@liberator.relativity.greenend.org.uk>
Sun, 16 Jan 2011 14:47:38 +0000 (14:47 +0000)
committerIan Jackson <ian@liberator.relativity.greenend.org.uk>
Sun, 16 Jan 2011 14:47:38 +0000 (14:47 +0000)
yoweb-scrape

index 68df216..ba828f5 100755 (executable)
@@ -429,18 +429,13 @@ class FlagInfo(SomethingSoupInfo):
                                (`head`, ``waritem``))
 
                def wihelp_item(waritem, thing):
-                       if waritem.name == 'a':
-                               url = waritem.get('href', None)
-                               if url is None:
-                                       return ('no url for '+thing,None,None)
-                       else:
-                               hr = waritem.find('a',{'href':True})
-                               if not hr: return ('no a for '+thing,None,None)
-                               url = hr['href']
+                       url = waritem.get('href', None)
+                       if url is None:
+                               return ('no url for '+thing,None,None)
                        m = regexp.search('\?'+thing+'id=(\d+)$', url)
                        if not m: return ('no '+thing+'id',None,None)
                        tid = m.group(1)
-                       tname = m.string
+                       tname = waritem.string
                        if tname is None:
                                return (thing+' name not just string',None,None)
                        return (None,tid,tname)
@@ -452,7 +447,7 @@ class FlagInfo(SomethingSoupInfo):
                        if rel: return 'flag id twice!'
                        if flagname in self.relation_byname:
                                return 'flag name twice!'
-                       rel = (flagname,flagid,[], thisdecl,othermin,othermax)
+                       rel = (flagname,flagid,head, thisdecl,othermin,othermax)
                        self.relations.append(rel)
                        self.relation_byid[flagid] = rel
                        self.relation_byname[flagid] = rel
@@ -472,20 +467,24 @@ class FlagInfo(SomethingSoupInfo):
 
                how = (wi_warn, None)
 
-               for waritem in warinfo.contents:
-                       debug('WARITEM '+``waritem``)
-                       if isinstance(waritem, unicode):
-                               waritem = waritem.strip()
-                               if waritem: warn('unknown waritem '+``waritem``)
-                               continue
-                       if waritem.name == 'br':
-                               continue
-                       if waritem.name == 'b':
-                               head = ''.join(waritem.findAll(text=True))
+               for waritem in warinfo.findAll(['font','a']):
+                       if waritem is None: break
+                       if waritem.name == 'font':
+                               colour = waritem.get('color',None)
+                               if colour.lstrip('#') != '958A5F':
+                                       warn('strange colour %s in %s' %
+                                               (colour,``waritem``))
+                                       continue
+                               head = waritem.string
+                               if head is None:
+                                       warn('no head string in '+``waritem``)
+                                       continue
                                head = regexp.sub('\\s+', ' ', head).strip()
                                head = head.rstrip(':')
                                how = (head,) + warmap.get(head, (wi_warn,))
                                continue
+                       assert(waritem.name == 'a')                             
+
                        debug('WARHOW %s(%s, waritem, *%s)' %
                                (how[1], `how[0]`, `how[2:]`))
                        bad = how[1](how[0], waritem, *how[2:])
@@ -1249,7 +1248,7 @@ def do_flag_of(args, bu):
        pi = PirateInfo(args[0], max_age)
        if pi.flag is None: fi = None
        else: fi = FlagInfo(pi.flag[0], max_age)
-       print `fi`
+       print fi
 
 def do_standings_crew_of(args, bu):
        ci = prep_crew_of(args, bu, 60)