From 5ee6146a73510ae17e9dc8d78d46ec0098548c49 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Sun, 16 Jan 2011 14:38:14 +0000 Subject: [PATCH] yoweb-scrape: wip new flag and ocean functionality - before rework waritem parser --- yoweb-scrape | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 2 deletions(-) diff --git a/yoweb-scrape b/yoweb-scrape index 9b87ea3..68df216 100755 --- a/yoweb-scrape +++ b/yoweb-scrape @@ -387,6 +387,21 @@ class CrewInfo(SomethingSoupInfo): return `(self.crew, self.msgs)` class FlagInfo(SomethingSoupInfo): + # Public data members (after init): + # + # name # string + # + # relations[n] = (otherflagname, otherflagid, [stringfromyoweb], + # thisdeclaring, otherdeclaringmin, otherdeclaringmax) + # # where {this,other}declaring{,min,max} are: + # # -1 {this,other} is declaring war + # # 0 {this,other} is not doing either + # # +1 {this,other} is allying + # relation_byname[otherflagname] = relations[some_n] + # relation_byid[otherflagname] = relations[some_n] + # + # islands[n] = (islandname, islandid) + # def __init__(self, flagid, max_age=600): SomethingSoupInfo.__init__(self, 'flag/info.wm?flagid=', flagid, max_age) @@ -394,13 +409,92 @@ class FlagInfo(SomethingSoupInfo): def _find_flag(self): font2 = self._soup.find('font',{'size':'+2'}) - self.flag = font2.find('b').contents[0] + self.name = font2.find('b').contents[0] + + self.relations = [ ] + self.relation_byname = { } + self.relation_byid = { } + self.islands = [ ] + magnate = self._soup.find('img',{'src': '/yoweb/images/repute-MAGNATE.png'}) warinfo = (magnate.findParent('table').findParent('tr'). findNextSibling('tr').findNext('td',{'align':'left'})) + + def warn(m): + print >>sys.stderr, 'WARNING: '+m + + def wi_warn(head, waritem): + warn('unknown warmap item: %s: %s' % + (`head`, ``waritem``)) + + def wihelp_item(waritem, thing): + if waritem.name == 'a': + url = waritem.get('href', None) + if url is None: + return ('no url for '+thing,None,None) + else: + hr = waritem.find('a',{'href':True}) + if not hr: return ('no a for '+thing,None,None) + url = hr['href'] + m = regexp.search('\?'+thing+'id=(\d+)$', url) + if not m: return ('no '+thing+'id',None,None) + tid = m.group(1) + tname = m.string + if tname is None: + return (thing+' name not just string',None,None) + return (None,tid,tname) + + def wi_alwar(head, waritem, thisdecl, othermin, othermax): + (err,flagid,flagname) = wihelp_item(waritem,'flag') + if err: return err + rel = self.relation_byid.get(flagid, None) + if rel: return 'flag id twice!' + if flagname in self.relation_byname: + return 'flag name twice!' + rel = (flagname,flagid,[], thisdecl,othermin,othermax) + self.relations.append(rel) + self.relation_byid[flagid] = rel + self.relation_byname[flagid] = rel + + def wi_isle(head, waritem): + (err,isleid,islename) = wihelp_item(waritem,'island') + if err: return err + self.islands.append((isleid,islename)) + + warmap = { + 'Allied with': (wi_alwar,+1,+1,+1), + 'Declaring war against': (wi_alwar,-1, 0,+1), + 'At war with': (wi_alwar,-1,-1,-1), + 'Trying to form an alliance with': (wi_alwar,+1,-1,0), + 'Islands controlled by this flag': (wi_isle,), + } + + how = (wi_warn, None) + for waritem in warinfo.contents: - print 'ITEM ',`waritem` + debug('WARITEM '+``waritem``) + if isinstance(waritem, unicode): + waritem = waritem.strip() + if waritem: warn('unknown waritem '+``waritem``) + continue + if waritem.name == 'br': + continue + if waritem.name == 'b': + head = ''.join(waritem.findAll(text=True)) + head = regexp.sub('\\s+', ' ', head).strip() + head = head.rstrip(':') + how = (head,) + warmap.get(head, (wi_warn,)) + continue + debug('WARHOW %s(%s, waritem, *%s)' % + (how[1], `how[0]`, `how[2:]`)) + bad = how[1](how[0], waritem, *how[2:]) + if bad: + warn('bad waritem %s: %s: %s' % (`how[0]`, + bad, ``waritem``)) + + def __str__(self): + return `(self.name, self.islands, self.relations)` #---------- scraper for ocean info incl. embargoes etc. ---------- -- 2.30.2