From c223acffaeafa66967736cf71ef5792b5ef9e9bc Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 15 May 2009 01:21:45 +0100 Subject: [PATCH] WIP --- yoweb-scrape | 56 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/yoweb-scrape b/yoweb-scrape index 0a54940..6a066d0 100755 --- a/yoweb-scrape +++ b/yoweb-scrape @@ -5,6 +5,7 @@ import time import urllib import urllib2 import errno +import re as regexp from BeautifulSoup import BeautifulSoup @@ -13,6 +14,12 @@ ocean = 'ice' now = time.time() +duties = ('Swordfighting/Bilging/Sailing/Rigging/Navigation'+ + '/Battle Navigation/Carpentry/Rumble/Treasure Haul').split('/') + +standings = ('Able/Distinguished/Respected/Master/Renowned'+ + '/Grand-Master/Legendary/Ultimate').split('/') + def fetch(url): cache_corename = urllib.quote_plus(url) cache_basename = "#%s#" % cache_corename @@ -41,10 +48,49 @@ def yoweb_fetch(kind, tail): url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (ocean, kind, tail) return fetch(url) -def get_pirate_info(pirate): - html = yoweb_fetch('pirate.wm?target=', pirate) - soup = BeautifulSoup(html) - return `soup` +class PirateInfo: + def __init__(self, pirate): + html = yoweb_fetch('pirate.wm?target=', pirate) + soup = BeautifulSoup(html, +# convertEntities=BeautifulSoup.HTML_ENTITIES + ) + imgs = soup.findAll('img') + re = regexp.compile( +u'\s*\S*/([-A-Za-z]+)\s*$|\s*\S*/\S*\s*\(ocean\-wide\ \;([-A-Za-z]+)\)\s*$' + ) + skills = { } + for skill in duties: + skills[skill] = [ ] + for img in imgs: + try: duty = img['alt'] + except KeyError: continue + print `duty` + if not duty in duties: continue + key = img.findParent('td') + if key is None: continue + valelem = key.findNextSibling('td') + if valelem is None: continue + valstr = ''.join(valelem.findAll(text=True)) + print `duty`, `valstr` + match = re.match(valstr) + if match is None: continue + standing = match.group(1) + skills[duty] = standing + +# print `duty`, `standing` +# if standing not in standings: continue +# for i in range(0, len(standings)): +# print `duty`, `standing`, i +# if standing == standings[i]: +# print `skills[duty]` +# skills[duty].append(i) + +# self.skills = { } +# for skill in duties: + + self.skills = skills + def __str__(self): + return `self.skills` def main(): os.chdir(os.getenv('HOME')) @@ -70,7 +116,7 @@ def main(): # test program: global ocean ocean = 'midnight' - test = get_pirate_info('Aristarchus') + test = PirateInfo('Anaplian') print test main() -- 2.30.2