#!/usr/bin/python import os import time import urllib import urllib2 import errno from BeautifulSoup import BeautifulSoup max_age = 120 ocean = 'ice' now = time.time() def fetch(url): cache_corename = urllib.quote_plus(url) cache_basename = "#%s#" % cache_corename try: f = file(cache_basename, 'r') except (OSError,IOError), oe: if oe.errno != errno.ENOENT: raise f = None if f is not None: s = os.fstat(f.fileno()) if now > s.st_mtime + max_age: f = None if f is not None: data = f.read() f.close() else: stream = urllib2.urlopen(url) data = stream.read() cache_ourname = "#%s~%d#" % (cache_corename, os.getpid()) f = file(cache_ourname, 'w') f.write(data) f.close() os.rename(cache_ourname, cache_basename) return data def yoweb_fetch(kind, tail): url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (ocean, kind, tail) return fetch(url) def get_pirate_info(pirate): html = yoweb_fetch('pirate.wm?target=', pirate) soup = BeautifulSoup(html) return `soup` def main(): os.chdir(os.getenv('HOME')) cache_dir = '.yoweb-scrape-cache' try: os.chdir(cache_dir) except (OSError,IOError), oe: if oe.errno != errno.ENOENT: raise os.mkdir(cache_dir) os.chdir(cache_dir) for path in os.listdir('.'): if not path.startswith('#'): continue max_time = max_age if '~' in path: max_time = 10 try: s = os.stat(path) if now > s.st_mtime + max_time: os.remove(path) except (OSError,IOError), oe: if oe.errno != errno.ENOENT: raise # test program: global ocean ocean = 'midnight' test = get_pirate_info('Aristarchus') print test main()