import urllib
import urllib2
import errno
+import sys
import re as regexp
+import optparse
from BeautifulSoup import BeautifulSoup
now = time.time()
-duties = ('Swordfighting/Bilging/Sailing/Rigging/Navigation'+
- '/Battle Navigation/Carpentry/Rumble/Treasure Haul').split('/')
+duties = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
+ '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
+ '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
+ '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
standings = ('Able/Distinguished/Respected/Master/Renowned'+
'/Grand-Master/Legendary/Ultimate').split('/')
data = f.read()
f.close()
else:
+ os.sleep(1)
stream = urllib2.urlopen(url)
data = stream.read()
cache_ourname = "#%s~%d#" % (cache_corename, os.getpid())
return fetch(url)
class PirateInfo:
+ # Public data members:
+ # pi.skills = { 'Treasure Haul': 'Able' ... }
+ # pi.msgs = [ 'message describing problem with scrape' ]
+ def _log(self, m):
+ self.msgs.append(m)
+
+ def _logsoup(self, soup, m):
+ self._log(m + '; in ' + `soup`)
+
def __init__(self, pirate):
html = yoweb_fetch('pirate.wm?target=', pirate)
soup = BeautifulSoup(html,
# convertEntities=BeautifulSoup.HTML_ENTITIES
)
- imgs = soup.findAll('img')
+ imgs = soup.findAll('img',
+ src=regexp.compile('/yoweb/images/stat.*'))
re = regexp.compile(
u'\s*\S*/([-A-Za-z]+)\s*$|\s*\S*/\S*\s*\(ocean\-wide\ \;([-A-Za-z]+)\)\s*$'
)
skills = { }
+ self.msgs = [ ]
+
for skill in duties:
skills[skill] = [ ]
+
for img in imgs:
try: duty = img['alt']
except KeyError: continue
- print `duty`
- if not duty in duties: continue
+
+ if not duty in duties:
+ self._logsoup(img, 'unknown duty: "%s"' % duty)
+ continue
key = img.findParent('td')
- if key is None: continue
+ if key is None:
+ self._logsoup(img, 'duty at root! "%s"' % duty)
+ continue
valelem = key.findNextSibling('td')
- if valelem is None: continue
+ if valelem is None:
+ self._logsoup(key, 'duty missing sibling "%s"'
+ % duty)
+ continue
valstr = ''.join(valelem.findAll(text=True))
- print `duty`, `valstr`
match = re.match(valstr)
- if match is None: continue
- standing = match.group(1)
- skills[duty] = standing
-
-# print `duty`, `standing`
-# if standing not in standings: continue
-# for i in range(0, len(standings)):
-# print `duty`, `standing`, i
-# if standing == standings[i]:
-# print `skills[duty]`
-# skills[duty].append(i)
-
-# self.skills = { }
-# for skill in duties:
-
- self.skills = skills
+ if match is None:
+ self._logsoup(key, 'duty "%s" unparseable'+
+ ' standing "%s"' % (duty, valstr))
+ continue
+ standing = match.group(match.lastindex)
+ skills[duty].append(standing)
+
+ self.skills = { }
+
+ for duty in duties:
+ sl = skills[duty]
+ if len(sl) > 1:
+ self.log('duty "%s" multiple standings %s' %
+ (duty, `sl`))
+ continue
+ if not len(sl):
+ self.log('duty "%s" no standing found' % duty)
+ continue
+ standing = sl[0]
+ for i in range(0, len(standings)):
+ if standing == standings[i]:
+ self.skills[duty] = i
+ if not duty in self.skills:
+ self.log('duty "%s" unknown standing "%s"' %
+ (duty, standing))
+ all_skills_ok = True
+ for duty in duties:
+ if not duty in self.skills:
+ all_skills_ok = False
+ if all_skills_ok:
+ self.msgs = [ ]
+
def __str__(self):
return `self.skills`