#!/usr/bin/python import os import time import urllib import urllib2 import errno import re as regexp from BeautifulSoup import BeautifulSoup max_age = 120 ocean = 'ice' now = time.time() duties = ('Swordfighting/Bilging/Sailing/Rigging/Navigation'+ '/Battle Navigation/Carpentry/Rumble/Treasure Haul').split('/') standings = ('Able/Distinguished/Respected/Master/Renowned'+ '/Grand-Master/Legendary/Ultimate').split('/') def fetch(url): cache_corename = urllib.quote_plus(url) cache_basename = "#%s#" % cache_corename try: f = file(cache_basename, 'r') except (OSError,IOError), oe: if oe.errno != errno.ENOENT: raise f = None if f is not None: s = os.fstat(f.fileno()) if now > s.st_mtime + max_age: f = None if f is not None: data = f.read() f.close() else: stream = urllib2.urlopen(url) data = stream.read() cache_ourname = "#%s~%d#" % (cache_corename, os.getpid()) f = file(cache_ourname, 'w') f.write(data) f.close() os.rename(cache_ourname, cache_basename) return data def yoweb_fetch(kind, tail): url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (ocean, kind, tail) return fetch(url) class PirateInfo: def __init__(self, pirate): html = yoweb_fetch('pirate.wm?target=', pirate) soup = BeautifulSoup(html, # convertEntities=BeautifulSoup.HTML_ENTITIES ) imgs = soup.findAll('img') re = regexp.compile( u'\s*\S*/([-A-Za-z]+)\s*$|\s*\S*/\S*\s*\(ocean\-wide\ \;([-A-Za-z]+)\)\s*$' ) skills = { } for skill in duties: skills[skill] = [ ] for img in imgs: try: duty = img['alt'] except KeyError: continue print `duty` if not duty in duties: continue key = img.findParent('td') if key is None: continue valelem = key.findNextSibling('td') if valelem is None: continue valstr = ''.join(valelem.findAll(text=True)) print `duty`, `valstr` match = re.match(valstr) if match is None: continue standing = match.group(1) skills[duty] = standing # print `duty`, `standing` # if standing not in standings: continue # for i in range(0, len(standings)): # print `duty`, `standing`, i # if standing == standings[i]: # print `skills[duty]` # skills[duty].append(i) # self.skills = { } # for skill in duties: self.skills = skills def __str__(self): return `self.skills` def main(): os.chdir(os.getenv('HOME')) cache_dir = '.yoweb-scrape-cache' try: os.chdir(cache_dir) except (OSError,IOError), oe: if oe.errno != errno.ENOENT: raise os.mkdir(cache_dir) os.chdir(cache_dir) for path in os.listdir('.'): if not path.startswith('#'): continue max_time = max_age if '~' in path: max_time = 10 try: s = os.stat(path) if now > s.st_mtime + max_time: os.remove(path) except (OSError,IOError), oe: if oe.errno != errno.ENOENT: raise # test program: global ocean ocean = 'midnight' test = PirateInfo('Anaplian') print test main()