+class PirateInfo:
+ # Public data members:
+ # pi.skills = { 'Treasure Haul': 'Able' ... }
+ # pi.msgs = [ 'message describing problem with scrape' ]
+ def _log(self, m):
+ self.msgs.append(m)
+
+ def _logsoup(self, soup, m):
+ self._log(m + '; in ' + `soup`)
+
+ def __init__(self, pirate):
+ html = yoweb_fetch('pirate.wm?target=', pirate)
+ soup = BeautifulSoup(html,
+# convertEntities=BeautifulSoup.HTML_ENTITIES
+ )
+ imgs = soup.findAll('img',
+ src=regexp.compile('/yoweb/images/stat.*'))
+ re = regexp.compile(
+u'\s*\S*/([-A-Za-z]+)\s*$|\s*\S*/\S*\s*\(ocean\-wide\ \;([-A-Za-z]+)\)\s*$'
+ )
+ skills = { }
+ self.msgs = [ ]
+
+ for skill in duties:
+ skills[skill] = [ ]
+
+ for img in imgs:
+ try: duty = img['alt']
+ except KeyError: continue
+
+ if not duty in duties:
+ self._logsoup(img, 'unknown duty: "%s"' % duty)
+ continue
+ key = img.findParent('td')
+ if key is None:
+ self._logsoup(img, 'duty at root! "%s"' % duty)
+ continue
+ valelem = key.findNextSibling('td')
+ if valelem is None:
+ self._logsoup(key, 'duty missing sibling "%s"'
+ % duty)
+ continue
+ valstr = ''.join(valelem.findAll(text=True))
+ match = re.match(valstr)
+ if match is None:
+ self._logsoup(key, 'duty "%s" unparseable'+
+ ' standing "%s"' % (duty, valstr))
+ continue
+ standing = match.group(match.lastindex)
+ skills[duty].append(standing)
+
+ self.skills = { }
+
+ for duty in duties:
+ sl = skills[duty]
+ if len(sl) > 1:
+ self.log('duty "%s" multiple standings %s' %
+ (duty, `sl`))
+ continue
+ if not len(sl):
+ self.log('duty "%s" no standing found' % duty)
+ continue
+ standing = sl[0]
+ for i in range(0, len(standings)):
+ if standing == standings[i]:
+ self.skills[duty] = i
+ if not duty in self.skills:
+ self.log('duty "%s" unknown standing "%s"' %
+ (duty, standing))
+ all_skills_ok = True
+ for duty in duties:
+ if not duty in self.skills:
+ all_skills_ok = False
+ if all_skills_ok:
+ self.msgs = [ ]
+
+ def __str__(self):
+ return `self.skills`