From a732139b9b5d7a0a7d132f3020101f52ceea00e2 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Sun, 16 Jan 2011 14:47:38 +0000 Subject: [PATCH] yoweb-scrape: wip new flag and ocean functionality - can parse a flag now --- yoweb-scrape | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/yoweb-scrape b/yoweb-scrape index 68df216..ba828f5 100755 --- a/yoweb-scrape +++ b/yoweb-scrape @@ -429,18 +429,13 @@ class FlagInfo(SomethingSoupInfo): (`head`, ``waritem``)) def wihelp_item(waritem, thing): - if waritem.name == 'a': - url = waritem.get('href', None) - if url is None: - return ('no url for '+thing,None,None) - else: - hr = waritem.find('a',{'href':True}) - if not hr: return ('no a for '+thing,None,None) - url = hr['href'] + url = waritem.get('href', None) + if url is None: + return ('no url for '+thing,None,None) m = regexp.search('\?'+thing+'id=(\d+)$', url) if not m: return ('no '+thing+'id',None,None) tid = m.group(1) - tname = m.string + tname = waritem.string if tname is None: return (thing+' name not just string',None,None) return (None,tid,tname) @@ -452,7 +447,7 @@ class FlagInfo(SomethingSoupInfo): if rel: return 'flag id twice!' if flagname in self.relation_byname: return 'flag name twice!' - rel = (flagname,flagid,[], thisdecl,othermin,othermax) + rel = (flagname,flagid,head, thisdecl,othermin,othermax) self.relations.append(rel) self.relation_byid[flagid] = rel self.relation_byname[flagid] = rel @@ -472,20 +467,24 @@ class FlagInfo(SomethingSoupInfo): how = (wi_warn, None) - for waritem in warinfo.contents: - debug('WARITEM '+``waritem``) - if isinstance(waritem, unicode): - waritem = waritem.strip() - if waritem: warn('unknown waritem '+``waritem``) - continue - if waritem.name == 'br': - continue - if waritem.name == 'b': - head = ''.join(waritem.findAll(text=True)) + for waritem in warinfo.findAll(['font','a']): + if waritem is None: break + if waritem.name == 'font': + colour = waritem.get('color',None) + if colour.lstrip('#') != '958A5F': + warn('strange colour %s in %s' % + (colour,``waritem``)) + continue + head = waritem.string + if head is None: + warn('no head string in '+``waritem``) + continue head = regexp.sub('\\s+', ' ', head).strip() head = head.rstrip(':') how = (head,) + warmap.get(head, (wi_warn,)) continue + assert(waritem.name == 'a') + debug('WARHOW %s(%s, waritem, *%s)' % (how[1], `how[0]`, `how[2:]`)) bad = how[1](how[0], waritem, *how[2:]) @@ -1249,7 +1248,7 @@ def do_flag_of(args, bu): pi = PirateInfo(args[0], max_age) if pi.flag is None: fi = None else: fi = FlagInfo(pi.flag[0], max_age) - print `fi` + print fi def do_standings_crew_of(args, bu): ci = prep_crew_of(args, bu, 60) -- 2.30.2