+ if not duty in self.standings:
+ self.needs_msgs(skl)
+
+ def _find_crewflag(self, cf, yoweb_re):
+ things = self.soup.findAll('a', href=regexp.compile(yoweb_re))
+ if len(things) != 1:
+ self.msg('zero or several %s id references found' % cf)
+ return None
+ thing = things[0]
+ id_re = '\\b%sid\\=(\\w+)$' % cf
+ id_haystack = thing['href']
+ match = regexp.compile(id_re).search(id_haystack)
+ if match is None:
+ self.soupm(thing, ('incomprehensible %s id ref'+
+ ' (%s in %s)') % (cf, id_re, id_haystack))
+ return None
+ name = soup_text(thing)
+ return (match.group(1), name)
+
+ def __str__(self):
+ return `(self.crew, self.flag, self.standings, self.msgs)`
+
+class CrewInfo(SomethingSoupInfo):
+ # Public data members:
+ # ci.crew = [ ('Captain', ['Pirate', ...]),
+ # ('Senior Officer', [...]),
+ # ... ]
+ # pi.msgs = [ 'message describing problem with scrape' ]
+
+ def __init__(self, crewid):
+ SomethingSoupInfo.__init__(self,
+ 'crew/info.wm?crewid=', crewid)
+ self._find_crew()
+
+ def _find_crew(self):
+ self.crew = []
+ capts = self.soup.findAll('img',
+ src='/yoweb/images/crew-captain.png')
+ if len(capts) != 1:
+ self.msg('crew members: no. of captain images != 1')
+ return
+ tbl = capts[0]
+ while not tbl.find('a', href=pirate_ref_re):
+ tbl = tbl.findParent('table')
+ if not tbl:
+ self.msg('crew members: cannot find table')
+ return
+ current_rank_crew = None
+ crew_rank_re = regexp.compile('/yoweb/images/crew')
+ for row in tbl.contents:
+ # findAll(recurse=False)
+ if isinstance(row, unicode):
+ continue
+
+ is_rank = row.find('img', attrs={'src': crew_rank_re})
+ if is_rank:
+ rank = soup_text(row)
+ current_rank_crew = []
+ self.crew.append((rank, current_rank_crew))
+ continue
+ for cell in row.findAll('a', href=pirate_ref_re):
+ if current_rank_crew is None:
+ self.soupm(cell, 'crew members: crew'
+ ' before rank')
+ continue
+ current_rank_crew.append(soup_text(cell))