X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.web-live.git;a=blobdiff_plain;f=yoweb-scrape;h=4e12d7f6da3a57835e2f6a7952613b18a101c44e;hp=54ef0fe14bd62ade32cf38a1b9c2b9c78f88ffeb;hb=2943c3282218303bfaaf76ec6efc41089adf9e6d;hpb=46a1fd2eb3beef6334ea615c1dc97596ea08c35c diff --git a/yoweb-scrape b/yoweb-scrape index 54ef0fe..4e12d7f 100755 --- a/yoweb-scrape +++ b/yoweb-scrape @@ -10,22 +10,27 @@ import urllib2 import errno import sys import re as regexp +import random from optparse import OptionParser from BeautifulSoup import BeautifulSoup opts = None -duties = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+ + +puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+ '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+ '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+ '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/') -standingvals = ('Able/Distinguished/Respected/Master/Renowned'+ - '/Grand-Master/Legendary/Ultimate').split('/') +standingvals = ('Able/Distinguished/Respected/Master'+ + '/Renowned/Grand-Master/Legendary/Ultimate').split('/') pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm') +max_pirate_namelen = 20 + + def debug(m): if opts.debug: print >>sys.stderr, m @@ -40,6 +45,10 @@ class Fetcher: if oe.errno != errno.EEXIST: raise self._cache_scan(time.time()) + def _default_ocean(self): + if self.ocean is None: + self.ocean = 'ice' + def _cache_scan(self, now): # returns list of ages, unsorted ages = [] @@ -52,7 +61,7 @@ class Fetcher: if oe.errno != errno.ENOENT: raise continue age = now - s.st_mtime - if age > opts.max_age: + if age > opts.expire_age: debug('Fetcher expire %d %s' % (age, path)) try: os.remove(path) except (OSError,IOError), oe: @@ -68,17 +77,17 @@ class Fetcher: min_age = 1 need_wait = 0 for age in ages: - if age < min_age: + if age < min_age and age < 300: debug('Fetcher morewait min=%d age=%d' % (min_age, age)) need_wait = max(need_wait, min_age - age) - min_age *= 2 - min_age += 1 + min_age += 3 + min_age *= 1.25 if need_wait > 0: debug('Fetcher wait %d' % need_wait) time.sleep(need_wait) - def fetch(self, url): + def fetch(self, url, max_age): debug('Fetcher fetch %s' % url) cache_corename = urllib.quote_plus(url) cache_item = "%s/#%s#" % (self.cachedir, cache_corename) @@ -87,15 +96,17 @@ class Fetcher: if oe.errno != errno.ENOENT: raise f = None now = time.time() + max_age = max(opts.min_max_age, min(max_age, opts.expire_age)) if f is not None: s = os.fstat(f.fileno()) - if now > s.st_mtime + opts.max_age: - debug('Fetcher stale') + age = now - s.st_mtime + if age > max_age: + debug('Fetcher stale %d < %d'% (max_age, age)) f = None if f is not None: data = f.read() f.close() - debug('Fetcher cached') + debug('Fetcher cached %d > %d' % (max_age, age)) return data debug('Fetcher fetch') @@ -112,10 +123,11 @@ class Fetcher: debug('Fetcher stored') return data - def yoweb(self, kind, tail): + def yoweb(self, kind, tail, max_age): + self._default_ocean() url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % ( self.ocean, kind, tail) - return self.fetch(url) + return self.fetch(url, max_age) class SoupLog: def __init__(self): @@ -133,23 +145,25 @@ def soup_text(obj): return str.strip() class SomethingSoupInfo(SoupLog): - def __init__(self, kind, tail): + def __init__(self, kind, tail, max_age): SoupLog.__init__(self) - html = fetcher.yoweb(kind, tail) - self.soup = BeautifulSoup(html, + html = fetcher.yoweb(kind, tail, max_age) + self._soup = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES ) class PirateInfo(SomethingSoupInfo): # Public data members: # pi.standings = { 'Treasure Haul': 'Able' ... } + # pi.name = name # pi.crew = (id, name) # pi.flag = (id, name) # pi.msgs = [ 'message describing problem with scrape' ] - def __init__(self, pirate): + def __init__(self, pirate, max_age=300): SomethingSoupInfo.__init__(self, - 'pirate.wm?target=', pirate) + 'pirate.wm?target=', pirate, max_age) + self.name = pirate self._find_standings() self.crew = self._find_crewflag('crew', '^/yoweb/crew/info\\.wm') @@ -157,69 +171,69 @@ class PirateInfo(SomethingSoupInfo): '^/yoweb/flag/info\\.wm') def _find_standings(self): - imgs = self.soup.findAll('img', + imgs = self._soup.findAll('img', src=regexp.compile('/yoweb/images/stat.*')) re = regexp.compile( u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$' ) standings = { } - for skill in duties: + for skill in puzzles: standings[skill] = [ ] skl = SoupLog() for img in imgs: - try: duty = img['alt'] + try: puzzle = img['alt'] except KeyError: continue - if not duty in duties: - skl.soupm(img, 'unknown duty: "%s"' % duty) + if not puzzle in puzzles: + skl.soupm(img, 'unknown puzzle: "%s"' % puzzle) continue key = img.findParent('td') if key is None: - skl.soupm(img, 'duty at root! "%s"' % duty) + skl.soupm(img, 'puzzle at root! "%s"' % puzzle) continue valelem = key.findNextSibling('td') if valelem is None: - skl.soupm(key, 'duty missing sibling "%s"' - % duty) + skl.soupm(key, 'puzzle missing sibling "%s"' + % puzzle) continue valstr = soup_text(valelem) match = re.match(valstr) if match is None: - skl.soupm(key, ('duty "%s" unparseable'+ - ' standing "%s"') % (duty, valstr)) + skl.soupm(key, ('puzzle "%s" unparseable'+ + ' standing "%s"') % (puzzle, valstr)) continue standing = match.group(match.lastindex) - standings[duty].append(standing) + standings[puzzle].append(standing) self.standings = { } - for duty in duties: - sl = standings[duty] + for puzzle in puzzles: + sl = standings[puzzle] if len(sl) > 1: - skl.msg('duty "%s" multiple standings %s' % - (duty, `sl`)) + skl.msg('puzzle "%s" multiple standings %s' % + (puzzle, `sl`)) continue if not len(sl): - skl.msg('duty "%s" no standing found' % duty) + skl.msg('puzzle "%s" no standing found' % puzzle) continue standing = sl[0] for i in range(0, len(standingvals)-1): if standing == standingvals[i]: - self.standings[duty] = i - if not duty in self.standings: - skl.msg('duty "%s" unknown standing "%s"' % - (duty, standing)) + self.standings[puzzle] = i + if not puzzle in self.standings: + skl.msg('puzzle "%s" unknown standing "%s"' % + (puzzle, standing)) all_standings_ok = True - for duty in duties: - if not duty in self.standings: + for puzzle in puzzles: + if not puzzle in self.standings: self.needs_msgs(skl) def _find_crewflag(self, cf, yoweb_re): - things = self.soup.findAll('a', href=regexp.compile(yoweb_re)) + things = self._soup.findAll('a', href=regexp.compile(yoweb_re)) if len(things) != 1: self.msg('zero or several %s id references found' % cf) return None @@ -244,14 +258,14 @@ class CrewInfo(SomethingSoupInfo): # ... ] # pi.msgs = [ 'message describing problem with scrape' ] - def __init__(self, crewid): + def __init__(self, crewid, max_age=300): SomethingSoupInfo.__init__(self, - 'crew/info.wm?crewid=', crewid) + 'crew/info.wm?crewid=', crewid, max_age) self._find_crew() def _find_crew(self): self.crew = [] - capts = self.soup.findAll('img', + capts = self._soup.findAll('img', src='/yoweb/images/crew-captain.png') if len(capts) != 1: self.msg('crew members: no. of captain images != 1') @@ -285,6 +299,65 @@ class CrewInfo(SomethingSoupInfo): def __str__(self): return `(self.crew, self.msgs)` +class StandingsTable: + def __init__(self, use_puzzles=None, col_width=6): + if use_puzzles is None: + if opts.ship_duty: + use_puzzles=[ + 'Navigating','Battle Navigation', + 'Gunning', + ['Sailing','Rigging'], + 'Bilging', + 'Carpentry', + 'Treasure Haul' + ] + else: + use_puzzles=puzzles + self._puzzles = use_puzzles + self.s = '' + self._cw = col_width-1 + + def _pline(self, pirate, puzstrs): + self.s += ' %-*s' % (max_pirate_namelen, pirate) + for v in puzstrs: + self.s += ' %-*.*s' % (self._cw,self._cw, v) + self.s += '\n' + + def _puzstr(self, pi, puzzle): + if not isinstance(puzzle,list): puzzle = [puzzle] + try: standing = max([pi.standings[p] for p in puzzle]) + except KeyError: return '?' + if not standing: return '' + s = '' + if self._cw > 4: + c1 = standingvals[standing][0] + if standing < 3: c1 = c1.lower() # 3 = Master + s += `standing` + if self._cw > 5: + s += ' ' + s += '*' * (standing / 2) + s += '+' * (standing % 2) + return s + + def headings(self): + def puzn_redact(name): + if isinstance(name,list): + return '/'.join( + ["%.*s" % (self._cw/2, puzn_redact(n)) + for n in name]) + spc = name.find(' ') + if spc < 0: return name + return name[0:min(4,spc)] + name[spc+1:] + self._pline('', map(puzn_redact, self._puzzles)) + def literalline(self, line): + self.s += line + '\n' + def pirate(self, pi): + puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles] + self._pline(pi.name, puzstrs) + + def results(self): + return self.s + def do_pirate(pirates, bu): print '{' for pirate in pirates: @@ -292,17 +365,26 @@ def do_pirate(pirates, bu): print '%s: %s,' % (`pirate`, info) print '}' -def prep_crew_of(args, bu): +def prep_crew_of(args, bu, max_age=300): if len(args) != 1: bu('crew-of takes one pirate name') - pi = PirateInfo(args[0]) - return CrewInfo(pi.crew[0]) + pi = PirateInfo(args[0], max_age) + return CrewInfo(pi.crew[0], max_age) def do_crew_of(args, bu): ci = prep_crew_of(args, bu) print ci -#def do_dutytab_crew_of(pirates, badusage): -# if len(pirates) != 1: badusage('dutytab-crew-of takes one pirate name') +def do_standings_crew_of(args, bu): + ci = prep_crew_of(args, bu, 60) + tab = StandingsTable() + tab.headings() + for (rank, members) in ci.crew: + if not members: continue + tab.literalline('%s:' % rank) + for p in members: + pi = PirateInfo(p, random.randint(900,1800)) + tab.pirate(pi) + print tab.results() def main(): global opts, fetcher @@ -312,11 +394,11 @@ def main(): actions: yoweb-scrape [--ocean OCEAN ...] pirate PIRATE yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE - yoweb-scrape [--ocean OCEAN ...] dutytab-crew-of PIRATE + yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE + yoweb-scrape [--ocean OCEAN ...] ship-aid CHAT-LOG ''') ao = pa.add_option - ao('-O','--ocean',dest='ocean', metavar='OCEAN', - default='ice', + ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None, help='select ocean OCEAN') ao('--cache-dir', dest='cache_dir', metavar='DIR', default='~/.yoweb-scrape-cache', @@ -325,7 +407,12 @@ actions: help='enable debugging output') ao('-q','--quiet', action='store_true', dest='quiet', help='suppress warning output') + + ao('--ship-duty', action='store_true', dest='ship_duty', + help='show ship duty station puzzles') + (opts,args) = pa.parse_args() + random.seed() if len(args) < 1: pa.error('need a mode argument') @@ -336,7 +423,8 @@ actions: except KeyError: pa.error('unknown mode "%s"' % mode) # fixed parameters - opts.max_age = 240 + opts.min_max_age = 60 + opts.expire_age = 3600 if opts.cache_dir.startswith('~/'): opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]