import urllib
import urllib2
import errno
+import sys
+import re as regexp
+import optparse
from BeautifulSoup import BeautifulSoup
now = time.time()
+duties = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
+ '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
+ '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
+ '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
+
+standings = ('Able/Distinguished/Respected/Master/Renowned'+
+ '/Grand-Master/Legendary/Ultimate').split('/')
+
def fetch(url):
cache_corename = urllib.quote_plus(url)
cache_basename = "#%s#" % cache_corename
data = f.read()
f.close()
else:
+ os.sleep(1)
stream = urllib2.urlopen(url)
data = stream.read()
cache_ourname = "#%s~%d#" % (cache_corename, os.getpid())
url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (ocean, kind, tail)
return fetch(url)
-def get_pirate_info(pirate):
- html = yoweb_fetch('pirate.wm?target=', pirate)
- soup = BeautifulSoup(html)
- return `soup`
+class PirateInfo:
+ # Public data members:
+ # pi.skills = { 'Treasure Haul': 'Able' ... }
+ # pi.msgs = [ 'message describing problem with scrape' ]
+ def _log(self, m):
+ self.msgs.append(m)
+
+ def _logsoup(self, soup, m):
+ self._log(m + '; in ' + `soup`)
+
+ def __init__(self, pirate):
+ html = yoweb_fetch('pirate.wm?target=', pirate)
+ soup = BeautifulSoup(html,
+# convertEntities=BeautifulSoup.HTML_ENTITIES
+ )
+ imgs = soup.findAll('img',
+ src=regexp.compile('/yoweb/images/stat.*'))
+ re = regexp.compile(
+u'\s*\S*/([-A-Za-z]+)\s*$|\s*\S*/\S*\s*\(ocean\-wide\ \;([-A-Za-z]+)\)\s*$'
+ )
+ skills = { }
+ self.msgs = [ ]
+
+ for skill in duties:
+ skills[skill] = [ ]
+
+ for img in imgs:
+ try: duty = img['alt']
+ except KeyError: continue
+
+ if not duty in duties:
+ self._logsoup(img, 'unknown duty: "%s"' % duty)
+ continue
+ key = img.findParent('td')
+ if key is None:
+ self._logsoup(img, 'duty at root! "%s"' % duty)
+ continue
+ valelem = key.findNextSibling('td')
+ if valelem is None:
+ self._logsoup(key, 'duty missing sibling "%s"'
+ % duty)
+ continue
+ valstr = ''.join(valelem.findAll(text=True))
+ match = re.match(valstr)
+ if match is None:
+ self._logsoup(key, 'duty "%s" unparseable'+
+ ' standing "%s"' % (duty, valstr))
+ continue
+ standing = match.group(match.lastindex)
+ skills[duty].append(standing)
+
+ self.skills = { }
+
+ for duty in duties:
+ sl = skills[duty]
+ if len(sl) > 1:
+ self.log('duty "%s" multiple standings %s' %
+ (duty, `sl`))
+ continue
+ if not len(sl):
+ self.log('duty "%s" no standing found' % duty)
+ continue
+ standing = sl[0]
+ for i in range(0, len(standings)):
+ if standing == standings[i]:
+ self.skills[duty] = i
+ if not duty in self.skills:
+ self.log('duty "%s" unknown standing "%s"' %
+ (duty, standing))
+ all_skills_ok = True
+ for duty in duties:
+ if not duty in self.skills:
+ all_skills_ok = False
+ if all_skills_ok:
+ self.msgs = [ ]
+
+ def __str__(self):
+ return `self.skills`
def main():
os.chdir(os.getenv('HOME'))
# test program:
global ocean
ocean = 'midnight'
- test = get_pirate_info('Aristarchus')
+ test = PirateInfo('Anaplian')
print test
main()