X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-test.git;a=blobdiff_plain;f=yoweb-scrape;h=cc98ea3b21b869154ed0a46ca5aec941d7f71a71;hp=a8a1a1169b0ee5f4ddb4b5548575d53b3c9ae348;hb=64c50ed9acbac5a8d4e7feeac256c1437f3d8b6b;hpb=dc69873c133ffbe5ca50bd7cefbc84d5608e7de0 diff --git a/yoweb-scrape b/yoweb-scrape index a8a1a11..cc98ea3 100755 --- a/yoweb-scrape +++ b/yoweb-scrape @@ -1,5 +1,7 @@ #!/usr/bin/python +#---------- setup ---------- + import signal signal.signal(signal.SIGINT, signal.SIG_DFL) @@ -11,12 +13,14 @@ import errno import sys import re as regexp import random +import curses from optparse import OptionParser from BeautifulSoup import BeautifulSoup opts = None +#---------- YPP parameters and arrays ---------- puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+ '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+ @@ -31,9 +35,19 @@ pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm') max_pirate_namelen = 12 +#---------- general utilities ---------- + def debug(m): - if opts.debug: - print >>sys.stderr, m + if opts.debug > 0: + print m + +def format_time_interval(ti): + if ti < 120: return '%d:%02d' % (ti / 60, ti % 60) + if ti < 7200: return '%2dm' % (ti / 60) + if ti < 86400: return '%dh' % (ti / 3600) + return '%dd' % (ti / 86400) + +#---------- caching and rate-limiting data fetcher ---------- class Fetcher: def __init__(self, ocean, cachedir): @@ -45,9 +59,9 @@ class Fetcher: if oe.errno != errno.EEXIST: raise self._cache_scan(time.time()) - def _default_ocean(self): + def default_ocean(self, ocean='ice'): if self.ocean is None: - self.ocean = 'ice' + self.ocean = ocean def _cache_scan(self, now): # returns list of ages, unsorted @@ -70,7 +84,7 @@ class Fetcher: ages.append(age) return ages - def _rate_limit_cache_clean(self, now): + def need_wait(self, now): ages = self._cache_scan(now) ages.sort() debug('Fetcher ages ' + `ages`) @@ -83,6 +97,10 @@ class Fetcher: need_wait = max(need_wait, min_age - age) min_age += 3 min_age *= 1.25 + return need_wait + + def _rate_limit_cache_clean(self, now): + need_wait = self.need_wait(now) if need_wait > 0: debug('Fetcher wait %d' % need_wait) time.sleep(need_wait) @@ -124,11 +142,13 @@ class Fetcher: return data def yoweb(self, kind, tail, max_age): - self._default_ocean() + self.default_ocean() url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % ( self.ocean, kind, tail) return self.fetch(url, max_age) +#---------- logging assistance for troubled screenscrapers ---------- + class SoupLog: def __init__(self): self.msgs = [ ] @@ -152,6 +172,8 @@ class SomethingSoupInfo(SoupLog): convertEntities=BeautifulSoup.HTML_ENTITIES ) +#---------- scraper for pirate pages ---------- + class PirateInfo(SomethingSoupInfo): # Public data members: # pi.standings = { 'Treasure Haul': 'Able' ... } @@ -216,7 +238,7 @@ u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A- skl.msg('puzzle "%s" multiple standings %s' % (puzzle, `sl`)) continue - if not len(sl): + if not sl: skl.msg('puzzle "%s" no standing found' % puzzle) continue standing = sl[0] @@ -251,6 +273,8 @@ u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A- def __str__(self): return `(self.crew, self.flag, self.standings, self.msgs)` +#---------- scraper for crew pages ---------- + class CrewInfo(SomethingSoupInfo): # Public data members: # ci.crew = [ ('Captain', ['Pirate', ...]), @@ -280,7 +304,7 @@ class CrewInfo(SomethingSoupInfo): crew_rank_re = regexp.compile('/yoweb/images/crew') for row in tbl.contents: # findAll(recurse=False) - if isinstance(row, unicode): + if isinstance(row,basestring): continue is_rank = row.find('img', attrs={'src': crew_rank_re}) @@ -299,6 +323,8 @@ class CrewInfo(SomethingSoupInfo): def __str__(self): return `(self.crew, self.msgs)` +#---------- pretty-printer for tables of pirate puzzle standings ---------- + class StandingsTable: def __init__(self, use_puzzles=None, col_width=6): if use_puzzles is None: @@ -317,10 +343,12 @@ class StandingsTable: self.s = '' self._cw = col_width-1 - def _pline(self, pirate, puzstrs): + def _pline(self, pirate, puzstrs, extra): self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate) for v in puzstrs: self.s += ' %-*.*s' % (self._cw,self._cw, v) + if extra: + self.s += ' ' + extra self.s += '\n' def _puzstr(self, pi, puzzle): @@ -348,16 +376,296 @@ class StandingsTable: spc = name.find(' ') if spc < 0: return name return name[0:min(4,spc)] + name[spc+1:] - self._pline('', map(puzn_redact, self._puzzles)) + self._pline('', map(puzn_redact, self._puzzles), None) def literalline(self, line): self.s += line + '\n' - def pirate(self, pi): + def pirate_dummy(self, name, standingstring, extra=None): + self._pline(name, standingstring * len(self._puzzles), extra) + def pirate(self, pi, extra=None): puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles] - self._pline(pi.name, puzstrs) + self._pline(pi.name, puzstrs, extra) def results(self): return self.s +#---------- chat log parser ---------- + +class PirateAboard: + # This is essentially a transparent, dumb, data class. + # pa.v + # pa.name + # pa.last_time + # pa.last_event + # pa.gunner + # pa.last_chat_time + # pa.last_chat_chan + # pa.pi + + def __init__(pa, pn, v, time, event): + pa.name = pn + pa.v = v + pa.last_time = time + pa.last_event = event + pa.last_chat_time = None + pa.last_chat_chan = None + pa.gunner = False + pa.pi = None + + def pirate_info(pa): + if not pa.pi and not fetcher.need_wait(time.time()): + pa.pi = PirateInfo(pa.name, 3600) + return pa.pi + +class ChatLogTracker: + # This is quite complex so we make it opaque. Use the + # official invokers, accessors etc. + + def __init__(self, myself_pi, logfn): + self._pl = {} # self._pl['Pirate'] = + self._vl = {} # self._vl['Vessel']['Pirate'] = PirateAboard + # self._vl['Vessel']['#lastaboard'] + self._v = None # self._v = + self._vessel = None # self._vl[self._vessel] + self._date = None + self._myself = myself_pi + self._need_redisplay = False + self._f = file(logfn) + self._lbuf = '' + self._progress = [0, os.fstat(self._f.fileno()).st_size] + + def force_redisplay(self): + self._need_redisplay = True + + def _onboard_event(self,timestamp,pirate,event): + try: pa = self._pl[pirate] + except KeyError: pa = None + if pa is not None and pa.v is self._v: + pa.last_time = timestamp + pa.last_event = event + else: + if pa is not None: del pa.v[pirate] + pa = PirateAboard(pirate, self._v, timestamp, event) + self._pl[pirate] = pa + self._v[pirate] = pa + self._v['#lastaboard'] = timestamp + self.force_redisplay() + return pa + + def _trash_vessel(self, v): + for pn in v: + if pn.startswith('#'): continue + del self._pl[pn] + self.force_redisplay() + + def expire_garbage(self, timestamp): + for (vn,v) in list(self._vl.iteritems()): + la = v['#lastaboard'] + if timestamp - la > opts.ship_reboard_clearout: + self._debug_line_disposition(timestamp,'', + 'stale reset '+vn) + self._trash_vessel(v) + del self._vl[vn] + + def clear_vessel(self, timestamp): + if self._v is not None: + self._trash_vessel(self._v) + self._v = {'#lastaboard': timestamp} + self._vl[self._vessel] = self._v + + def _debug_line_disposition(self,timestamp,l,m): + debug('CLT %13s %-30s %s' % (timestamp,m,l)) + + def chatline(self,l): + rm = lambda re: regexp.match(re,l) + d = lambda m: self._debug_line_disposition(timestamp,l,m) + timestamp = None + + m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$') + if m: + self._date = [int(x) for x in m.groups()] + self._previous_timestamp = None + return d('date '+`self._date`) + + if self._date is None: + return d('date unset') + + m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ') + if not m: + return d('no timestamp') + + while True: + time_tuple = (self._date + + [int(x) for x in m.groups()] + + [-1,-1,-1]) + timestamp = time.mktime(time_tuple) + if timestamp >= self._previous_timestamp: break + self._date[2] += 1 + self._debug_line_disposition(timestamp,'', + 'new date '+`self._date`) + + self._previous_timestamp = timestamp + + l = l[l.find(' ')+1:] + + def ob_x(who,event): + return self._onboard_event(timestamp, who, event) + def ob1(did): ob_x(m.group(1), did); return d(did) + def oba(did): return ob1('%s %s' % (did, m.group(2))) + + m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$') + if m: + pn = self._myself.name + self._vessel = m.group(1) + dm = 'boarding' + + try: self._v = self._vl[self._vessel] + except KeyError: self._v = None; dm += ' new' + + if self._v is not None: la = self._v['#lastaboard'] + else: la = 0; dm += ' ?la' + + if timestamp - la > opts.ship_reboard_clearout: + self.clear_vessel(timestamp) + dm += ' stale' + + ob_x(pn, 'we boarded') + self.expire_garbage(timestamp) + return d(dm) + + if self._v is None: + return d('no vessel') + + m = rm('(\\w+) has come aboard\\.$') + if m: return ob1('boarded'); + + m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$') + if m: + (who,what) = m.groups() + pa = ob_x(who,'ord '+what) + if what == 'Gunning': + pa.gunner = True + return d('duty order') + + m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$') + if m: oba('stopped'); return d("end") + + def chat(what): + who = m.group(1) + try: pa = self._pl[who] + except KeyError: return d('chat mystery') + if pa.v is self._v: + pa.last_chat_time = timestamp + pa.last_chat_chan = what + self.force_redisplay() + return d(what+' chat') + + m = rm('(\\w+) (?:issued an order|ordered everyone) "') + if m: return ob1('general order'); + + m = rm('(\\w+) says, "') + if m: return chat('public') + + m = rm('(\\w+) tells ye, "') + if m: return chat('private') + + m = rm('(\\w+) flag officer chats, "') + if m: return chat('flag officer') + + m = rm('(\\w+) officer chats, "') + if m: return chat('officer') + + m = rm('Game over\\. Winners: ([A-Za-z, ]+)\\.$') + if m: + pl = m.group(1).split(', ') + if not self._myself.name in pl: + return d('lost boarding battle') + for pn in pl: + if ' ' in pn: continue + ob_x(pn,'won boarding battle') + return d('won boarding battle') + + m = rm('(\\w+) is eliminated\\!') + if m: return ob1('eliminated in fray'); + + m = rm('(\\w+) has left the vessel\.') + if m: + who = m.group(1) + ob_x(who, 'disembarked') + del self._v[who] + del self._pl[who] + return d('disembarked') + + return d('not matched') + + def _str_vessel(self, vn, v): + s = ' vessel %s\n' % vn + s += ' '*20 + "%-*s %13s\n" % ( + max_pirate_namelen, '#lastaboard', + v['#lastaboard']) + for pn in sorted(v.keys()): + if pn.startswith('#'): continue + pa = v[pn] + assert pa.v == v + assert self._pl[pn] == pa + s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % ( + (' ','G')[pa.gunner], + max_pirate_namelen, pn, + pa.last_time, pa.last_event, + pa.last_chat_time, pa.last_chat_chan) + return s + + def __str__(self): + s = '''>self._f, 'Starting up, %s on the %s ocean' % ( + pirate, ocean) + def caughtup(self): + self._f.write(' \r') + self._f.flush() + +#----- modes which use the chat log parser are quite complex ----- + +def prep_chat_log(args, bu, + progress=ProgressPrintPercentage(), + max_myself_age=3600): + if len(args) != 1: bu('this action takes only chat log filename') + logfn = args[0] + logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_' + match = regexp.match(logfn_re, logfn) + if not match: bu('chat log filename is not in expected format') + (pirate, ocean) = match.groups() + fetcher.default_ocean(ocean) + + myself = PirateInfo(pirate,max_myself_age) + progress.show_init(pirate, fetcher.ocean) + track = ChatLogTracker(myself, logfn) + + opts.debug -= 1 + track.catchup(progress) + opts.debug += 1 + + track.force_redisplay() + + return (myself, track) + +def do_track_chat_log(args, bu): + (myself, track) = prep_chat_log(args, bu) + while True: + track.catchup() + if track.changed(): + print track + time.sleep(1) + +#----- ship management aid ----- + +class Display_dumb(ProgressPrintPercentage): + def __init__(self): + ProgressPrintPercentage.__init__(self) + def show(self, s): + print '\n\n', s; + def realstart(self): + pass + +class Display_overwrite(ProgressPrintPercentage): + def __init__(self): + ProgressPrintPercentage.__init__(self) + + null = file('/dev/null','w') + curses.setupterm(fd=null.fileno()) + + self._clear = curses.tigetstr('clear') + if not self._clear: + self._debug('missing clear!') + self.show = Display_dumb.show + return + + self._t = {'el':'', 'ed':''} + if not self._init_sophisticated(): + for k in self._t.keys(): self._t[k] = '' + self._t['ho'] = self._clear + + def _debug(self,m): debug('display overwrite: '+m) + + def _init_sophisticated(self): + for k in self._t.keys(): + s = curses.tigetstr(k) + self._t[k] = s + self._t['ho'] = curses.tigetstr('ho') + if not self._t['ho']: + cup = curses.tigetstr('cup') + self._t['ho'] = curses.tparm(cup,0,0) + missing = [k for k in self._t.keys() if not self._t[k]] + if missing: + self.debug('missing '+(' '.join(missing))) + return 0 + return 1 + + def show(self, s): + w = sys.stdout.write + def wti(k): w(self._t[k]) + + wti('ho') + nl = '' + for l in s.rstrip().split('\n'): + w(nl) + w(l) + wti('el') + nl = '\r\n' + wti('ed') + w(' ') + sys.stdout.flush() + + def realstart(self): + sys.stdout.write(self._clear) + sys.stdout.flush() + + +def do_ship_aid(args, bu): + if opts.ship_duty is None: opts.ship_duty = True + + displayer = globals()['Display_'+opts.display]() + rotate_nya = '/-\\' + + (myself, track) = prep_chat_log(args, bu, progress=displayer) + + def timeevent(t,e): + if t is None: return ' ' * 22 + return " %-4s %-16s" % (format_time_interval(now - t),e) + + displayer.realstart() + + while True: + track.catchup() + now = time.time() + + s = "%s" % track.myname() + + vn = track.vessel() + if vn is None: s += " not on a vessel?!" + else: s += " on board the %s" % vn + s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S") + + tbl = StandingsTable() + tbl.headings() + + for pa in track.aboard(): + pi = pa.pirate_info() + + xs = '' + if pa.gunner: xs += 'G ' + else: xs += ' ' + xs += timeevent(pa.last_time, pa.last_event) + xs += timeevent(pa.last_chat_time, pa.last_chat_chan) + + if pi is None: + tbl.pirate_dummy(pa.name, rotate_nya[0], xs) + else: + tbl.pirate(pi, xs) + + s += tbl.results() + + displayer.show(s) + time.sleep(1) + rotate_nya = rotate_nya[1:2] + rotate_nya[0] + +#---------- main program ---------- + def main(): global opts, fetcher @@ -395,7 +866,12 @@ actions: yoweb-scrape [--ocean OCEAN ...] pirate PIRATE yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE - yoweb-scrape [--ocean OCEAN ...] ship-aid CHAT-LOG + yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG + yoweb-scrape [options] ship-aid CHAT-LOG (must be .../PIRATE_OCEAN_chat-log*) + +display modes (for --display) apply to ship-aid: + --display=dumb just print new information, scrolling the screen + --display=overwrite use cursor motion, selective clear, etc. to redraw at top ''') ao = pa.add_option ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None, @@ -403,13 +879,20 @@ actions: ao('--cache-dir', dest='cache_dir', metavar='DIR', default='~/.yoweb-scrape-cache', help='cache yoweb pages in DIR') - ao('-D','--debug', action='store_true', dest='debug', default=False, + ao('-D','--debug', action='count', dest='debug', default=0, help='enable debugging output') + ao('--debug-fd', action='count', dest='debug_fd', + help='write any debugging output to specified fd') ao('-q','--quiet', action='store_true', dest='quiet', help='suppress warning output') + ao('--display', action='store', dest='display', + type='choice', choices=['dumb','overwrite'], + help='how to display ship aid') ao('--ship-duty', action='store_true', dest='ship_duty', help='show ship duty station puzzles') + ao('--all-puzzles', action='store_false', dest='ship_duty', + help='show all puzzles, not just ship duty stations') (opts,args) = pa.parse_args() random.seed() @@ -417,6 +900,9 @@ actions: if len(args) < 1: pa.error('need a mode argument') + if opts.debug_fd is not None: + opts.debug_file = fdopen(opts.debug_fd, 'w') + mode = args[0] mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_') try: mode_fn = globals()[mode_fn_name] @@ -425,10 +911,18 @@ actions: # fixed parameters opts.min_max_age = 60 opts.expire_age = 3600 + opts.ship_reboard_clearout = 3600 if opts.cache_dir.startswith('~/'): opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:] + if opts.display is None: + if ((opts.debug > 0 and opts.debug_fd is None) + or not os.isatty(sys.stdout.fileno())): + opts.display = 'dumb' + else: + opts.display = 'overwrite' + fetcher = Fetcher(opts.ocean, opts.cache_dir) mode_fn(args[1:], pa.error)