X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-test.git;a=blobdiff_plain;f=yoweb-scrape;h=04e392d7d53e79a25d382275f575030e420d18c2;hp=9d8f69dd0f8cd9bb96cbccc5ec7faa35437e1bd5;hb=8ebb23fcb201abe4afc25a4f28f510e435443bc2;hpb=82fe9647121ac3d86f84208152a65177f4f8b335 diff --git a/yoweb-scrape b/yoweb-scrape index 9d8f69d..04e392d 100755 --- a/yoweb-scrape +++ b/yoweb-scrape @@ -1,5 +1,7 @@ #!/usr/bin/python +#---------- setup ---------- + import signal signal.signal(signal.SIGINT, signal.SIG_DFL) @@ -11,12 +13,14 @@ import errno import sys import re as regexp import random +import curses from optparse import OptionParser from BeautifulSoup import BeautifulSoup opts = None +#---------- YPP parameters and arrays ---------- puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+ '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+ @@ -31,10 +35,20 @@ pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm') max_pirate_namelen = 12 +#---------- general utilities ---------- + def debug(m): - if opts.debug: + if opts.debug > 0: print m +def format_time_interval(ti): + if ti < 120: return '%d:%02d' % (ti / 60, ti % 60) + if ti < 7200: return '%2dm' % (ti / 60) + if ti < 86400: return '%dh' % (ti / 3600) + return '%dd' % (ti / 86400) + +#---------- caching and rate-limiting data fetcher ---------- + class Fetcher: def __init__(self, ocean, cachedir): debug('Fetcher init %s' % cachedir) @@ -45,9 +59,9 @@ class Fetcher: if oe.errno != errno.EEXIST: raise self._cache_scan(time.time()) - def _default_ocean(self): + def default_ocean(self, ocean='ice'): if self.ocean is None: - self.ocean = 'ice' + self.ocean = ocean def _cache_scan(self, now): # returns list of ages, unsorted @@ -70,7 +84,7 @@ class Fetcher: ages.append(age) return ages - def _rate_limit_cache_clean(self, now): + def need_wait(self, now): ages = self._cache_scan(now) ages.sort() debug('Fetcher ages ' + `ages`) @@ -83,6 +97,10 @@ class Fetcher: need_wait = max(need_wait, min_age - age) min_age += 3 min_age *= 1.25 + return need_wait + + def _rate_limit_cache_clean(self, now): + need_wait = self.need_wait(now) if need_wait > 0: debug('Fetcher wait %d' % need_wait) time.sleep(need_wait) @@ -124,11 +142,13 @@ class Fetcher: return data def yoweb(self, kind, tail, max_age): - self._default_ocean() + self.default_ocean() url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % ( self.ocean, kind, tail) return self.fetch(url, max_age) +#---------- logging assistance for troubled screenscrapers ---------- + class SoupLog: def __init__(self): self.msgs = [ ] @@ -152,6 +172,8 @@ class SomethingSoupInfo(SoupLog): convertEntities=BeautifulSoup.HTML_ENTITIES ) +#---------- scraper for pirate pages ---------- + class PirateInfo(SomethingSoupInfo): # Public data members: # pi.standings = { 'Treasure Haul': 'Able' ... } @@ -251,6 +273,8 @@ u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A- def __str__(self): return `(self.crew, self.flag, self.standings, self.msgs)` +#---------- scraper for crew pages ---------- + class CrewInfo(SomethingSoupInfo): # Public data members: # ci.crew = [ ('Captain', ['Pirate', ...]), @@ -280,7 +304,7 @@ class CrewInfo(SomethingSoupInfo): crew_rank_re = regexp.compile('/yoweb/images/crew') for row in tbl.contents: # findAll(recurse=False) - if isinstance(row, unicode): + if isinstance(row,basestring): continue is_rank = row.find('img', attrs={'src': crew_rank_re}) @@ -299,6 +323,8 @@ class CrewInfo(SomethingSoupInfo): def __str__(self): return `(self.crew, self.msgs)` +#---------- pretty-printer for tables of pirate puzzle standings ---------- + class StandingsTable: def __init__(self, use_puzzles=None, col_width=6): if use_puzzles is None: @@ -317,10 +343,12 @@ class StandingsTable: self.s = '' self._cw = col_width-1 - def _pline(self, pirate, puzstrs): + def _pline(self, pirate, puzstrs, extra): self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate) for v in puzstrs: self.s += ' %-*.*s' % (self._cw,self._cw, v) + if extra: + self.s += ' ' + extra self.s += '\n' def _puzstr(self, pi, puzzle): @@ -348,61 +376,51 @@ class StandingsTable: spc = name.find(' ') if spc < 0: return name return name[0:min(4,spc)] + name[spc+1:] - self._pline('', map(puzn_redact, self._puzzles)) + self._pline('', map(puzn_redact, self._puzzles), None) def literalline(self, line): self.s += line + '\n' - def pirate(self, pi): + def pirate_dummy(self, name, standingstring, extra=None): + self._pline(name, standingstring * len(self._puzzles), extra) + def pirate(self, pi, extra=None): puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles] - self._pline(pi.name, puzstrs) + self._pline(pi.name, puzstrs, extra) def results(self): return self.s -def do_pirate(pirates, bu): - print '{' - for pirate in pirates: - info = PirateInfo(pirate) - print '%s: %s,' % (`pirate`, info) - print '}' - -def prep_crew_of(args, bu, max_age=300): - if len(args) != 1: bu('crew-of takes one pirate name') - pi = PirateInfo(args[0], max_age) - return CrewInfo(pi.crew[0], max_age) - -def do_crew_of(args, bu): - ci = prep_crew_of(args, bu) - print ci - -def do_standings_crew_of(args, bu): - ci = prep_crew_of(args, bu, 60) - tab = StandingsTable() - tab.headings() - for (rank, members) in ci.crew: - if not members: continue - tab.literalline('%s:' % rank) - for p in members: - pi = PirateInfo(p, random.randint(900,1800)) - tab.pirate(pi) - print tab.results() +#---------- chat log parser ---------- class PirateAboard: - # pa.v - # pa.last_time - # pa.last_event - # pa.gunner - # pa.last_chat_time - # pa.last_chat_chan - def __init__(pa, v, time, event): + # This is essentially a transparent, dumb, data class. + # pa.v + # pa.name + # pa.last_time + # pa.last_event + # pa.gunner + # pa.last_chat_time + # pa.last_chat_chan + # pa.pi + + def __init__(pa, pn, v, time, event): + pa.name = pn pa.v = v pa.last_time = time pa.last_event = event pa.last_chat_time = None pa.last_chat_chan = None pa.gunner = False + pa.pi = None + + def pirate_info(pa): + if not pa.pi and not fetcher.need_wait(time.time()): + pa.pi = PirateInfo(pa.name, 3600) + return pa.pi class ChatLogTracker: - def __init__(self, myself_pi): + # This is quite complex so we make it opaque. Use the + # official invokers, accessors etc. + + def __init__(self, myself_pi, logfn): self._pl = {} # self._pl['Pirate'] = self._vl = {} # self._vl['Vessel']['Pirate'] = PirateAboard # self._vl['Vessel']['#lastaboard'] @@ -411,8 +429,11 @@ class ChatLogTracker: self._date = None self._myself = myself_pi self._need_redisplay = False + self._f = file(logfn) + self._lbuf = '' + self._progress = [0, os.fstat(self._f.fileno()).st_size] - def _refresh(self): + def force_redisplay(self): self._need_redisplay = True def _onboard_event(self,timestamp,pirate,event): @@ -423,17 +444,18 @@ class ChatLogTracker: pa.last_event = event else: if pa is not None: del pa.v[pirate] - pa = PirateAboard(self._v, timestamp, event) + pa = PirateAboard(pirate, self._v, timestamp, event) self._pl[pirate] = pa self._v[pirate] = pa self._v['#lastaboard'] = timestamp - self._refresh() + self.force_redisplay() return pa def _trash_vessel(self, v): for pn in v: if pn.startswith('#'): continue del self._pl[pn] + self.force_redisplay() def expire_garbage(self, timestamp): for (vn,v) in list(self._vl.iteritems()): @@ -460,7 +482,8 @@ class ChatLogTracker: m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$') if m: - self._date = m.groups() + self._date = [int(x) for x in m.groups()] + self._previous_timestamp = None return d('date '+`self._date`) if self._date is None: @@ -470,9 +493,18 @@ class ChatLogTracker: if not m: return d('no timestamp') - time_tuple = [int(x) for x in self._date + m.groups()] - time_tuple += (-1,-1,-1) - timestamp = time.mktime(time_tuple) + while True: + time_tuple = (self._date + + [int(x) for x in m.groups()] + + [-1,-1,-1]) + timestamp = time.mktime(time_tuple) + if timestamp >= self._previous_timestamp: break + self._date[2] += 1 + self._debug_line_disposition(timestamp,'', + 'new date '+`self._date`) + + self._previous_timestamp = timestamp + l = l[l.find(' ')+1:] def ob_x(who,event): @@ -480,6 +512,11 @@ class ChatLogTracker: def ob1(did): ob_x(m.group(1), did); return d(did) def oba(did): return ob1('%s %s' % (did, m.group(2))) + def disembark(who, how): + ob_x(who, 'leaving '+how) + del self._v[who] + del self._pl[who] + m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$') if m: pn = self._myself.name @@ -509,13 +546,13 @@ class ChatLogTracker: m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$') if m: (who,what) = m.groups() - pa = ob_x(who,'ordered '+what) + pa = ob_x(who,'ord '+what) if what == 'Gunning': pa.gunner = True return d('duty order') m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$') - if m: oba('stopped'); return d('stopped') + if m: oba('stopped'); return d("end") def chat(what): who = m.group(1) @@ -524,8 +561,35 @@ class ChatLogTracker: if pa.v is self._v: pa.last_chat_time = timestamp pa.last_chat_chan = what - self._refresh() - return d(what+' chat') + self.force_redisplay() + return d('chat '+what) + + def chat_metacmd(what): + (cmdr, metacmd) = m.groups() + metacmd = regexp.sub('\\s+', ' ', metacmd).strip() + m2 = regexp.match( + '/([ad]) (?:([A-Za-z ]+)\\s*:)?([A-Za-z ]+)$', + metacmd) + if not m2: return chat(what) + + (cmd, vn, targets) = m2.groups() + + if cmdr == self._myself.name: how = 'manual: /%s' % cmd + else: how = '/%s %s' % (cmd,cmdr) + if cmd == 'a': each = ob_x + else: each = disembark + + if vn is not None: + vn = vn.title() + if not regexp.match( + '(?:.* )?' + vn + '$', + self._vessel): + return chat('/%s %s:' % (cmd,vn)) + + for target in targets.split(' '): + if not target: continue + each(target.title(), how) + return d('/%s' % cmd) m = rm('(\\w+) (?:issued an order|ordered everyone) "') if m: return ob1('general order'); @@ -536,11 +600,14 @@ class ChatLogTracker: m = rm('(\\w+) tells ye, "') if m: return chat('private') + m = rm('Ye told (\\w+), "(.*)"$') + if m: return chat_metacmd('private') + m = rm('(\\w+) flag officer chats, "') if m: return chat('flag officer') - m = rm('(\\w+) officer chats, "') - if m: return chat('officer') + m = rm('(\\w+) officer chats, "(.*)"$') + if m: return chat_metacmd('officer') m = rm('Game over\\. Winners: ([A-Za-z, ]+)\\.$') if m: @@ -557,10 +624,7 @@ class ChatLogTracker: m = rm('(\\w+) has left the vessel\.') if m: - who = m.group(1) - ob_x(who, 'disembarked') - del self._v[who] - del self._pl[who] + disembark(m.group(1), 'disembarked') return d('disembarked') return d('not matched') @@ -601,28 +665,231 @@ class ChatLogTracker: s += '>\n' return s -def do_ship_aid(args, bu): - if len(args) != 1: bu('ship-aid takes only chat log filename') + def catchup(self, progress=None): + while True: + more = self._f.readline() + if not more: break + + self._progress[0] += len(more) + if progress: progress.progress(*self._progress) + + self._lbuf += more + if self._lbuf.endswith('\n'): + self.chatline(self._lbuf.rstrip()) + self._lbuf = '' + if opts.debug >= 2: + debug(self.__str__()) + if progress: progress.caughtup() + + def changed(self): + rv = self._need_redisplay + self._need_redisplay = False + return rv + def myname(self): + # returns our pirate name + return self._myself.name + def vessel(self): + # returns the vessel we're aboard or None + return self._vessel + def aboard(self): + # returns a list of PirateAboard sorted by name + if self._v is None: return [] + return [ self._v[pn] + for pn in sorted(self._v.keys()) + if not pn.startswith('#') ] + +#---------- implementations of actual operation modes ---------- + +def do_pirate(pirates, bu): + print '{' + for pirate in pirates: + info = PirateInfo(pirate) + print '%s: %s,' % (`pirate`, info) + print '}' + +def prep_crew_of(args, bu, max_age=300): + if len(args) != 1: bu('crew-of takes one pirate name') + pi = PirateInfo(args[0], max_age) + if pi.crew is None: return None + return CrewInfo(pi.crew[0], max_age) + +def do_crew_of(args, bu): + ci = prep_crew_of(args, bu) + print ci + +def do_standings_crew_of(args, bu): + ci = prep_crew_of(args, bu, 60) + tab = StandingsTable() + tab.headings() + for (rank, members) in ci.crew: + if not members: continue + tab.literalline('%s:' % rank) + for p in members: + pi = PirateInfo(p, random.randint(900,1800)) + tab.pirate(pi) + print tab.results() + +class ProgressPrintPercentage: + def __init__(self, f=sys.stdout): + self._f = f + def progress_string(self,done,total): + return "scan chat logs %3d%%\r" % ((done*100) / total) + def progress(self,*a): + self._f.write(self.progress_string(*a)) + self._f.flush() + def show_init(self, pirate, ocean): + print >>self._f, 'Starting up, %s on the %s ocean' % ( + pirate, ocean) + def caughtup(self): + self._f.write(' \r') + self._f.flush() + +#----- modes which use the chat log parser are quite complex ----- + +def prep_chat_log(args, bu, + progress=ProgressPrintPercentage(), + max_myself_age=3600): + if len(args) != 1: bu('this action takes only chat log filename') logfn = args[0] - logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_chat-log-\\w+$' + logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_' match = regexp.match(logfn_re, logfn) - if not match: bu('ship-aid chat log filename is not in default format') - (pirate, fetcher.ocean) = match.groups() - myself_pi = PirateInfo(pirate,3600) - track = ChatLogTracker(myself_pi) - f = file(logfn) - l = '' + if not match: bu('chat log filename is not in expected format') + (pirate, ocean) = match.groups() + fetcher.default_ocean(ocean) + + myself = PirateInfo(pirate,max_myself_age) + progress.show_init(pirate, fetcher.ocean) + track = ChatLogTracker(myself, logfn) + + opts.debug -= 2 + track.catchup(progress) + opts.debug += 2 + + track.force_redisplay() + + return (myself, track) + +def do_track_chat_log(args, bu): + (myself, track) = prep_chat_log(args, bu) while True: - l += f.readline() - if l.endswith('\n'): - track.chatline(l.rstrip()) - l = '' + track.catchup() + if track.changed(): print track - continue - if l: - continue - print track - os.sleep(1) + time.sleep(1) + +#----- ship management aid ----- + +class Display_dumb(ProgressPrintPercentage): + def __init__(self): + ProgressPrintPercentage.__init__(self) + def show(self, s): + print '\n\n', s; + def realstart(self): + pass + +class Display_overwrite(ProgressPrintPercentage): + def __init__(self): + ProgressPrintPercentage.__init__(self) + + null = file('/dev/null','w') + curses.setupterm(fd=null.fileno()) + + self._clear = curses.tigetstr('clear') + if not self._clear: + self._debug('missing clear!') + self.show = Display_dumb.show + return + + self._t = {'el':'', 'ed':''} + if not self._init_sophisticated(): + for k in self._t.keys(): self._t[k] = '' + self._t['ho'] = self._clear + + def _debug(self,m): debug('display overwrite: '+m) + + def _init_sophisticated(self): + for k in self._t.keys(): + s = curses.tigetstr(k) + self._t[k] = s + self._t['ho'] = curses.tigetstr('ho') + if not self._t['ho']: + cup = curses.tigetstr('cup') + self._t['ho'] = curses.tparm(cup,0,0) + missing = [k for k in self._t.keys() if not self._t[k]] + if missing: + self.debug('missing '+(' '.join(missing))) + return 0 + return 1 + + def show(self, s): + w = sys.stdout.write + def wti(k): w(self._t[k]) + + wti('ho') + nl = '' + for l in s.rstrip().split('\n'): + w(nl) + w(l) + wti('el') + nl = '\r\n' + wti('ed') + w(' ') + sys.stdout.flush() + + def realstart(self): + sys.stdout.write(self._clear) + sys.stdout.flush() + + +def do_ship_aid(args, bu): + if opts.ship_duty is None: opts.ship_duty = True + + displayer = globals()['Display_'+opts.display]() + rotate_nya = '/-\\' + + (myself, track) = prep_chat_log(args, bu, progress=displayer) + + def timeevent(t,e): + if t is None: return ' ' * 22 + return " %-4s %-16s" % (format_time_interval(now - t),e) + + displayer.realstart() + + while True: + track.catchup() + now = time.time() + + s = "%s" % track.myname() + + vn = track.vessel() + if vn is None: s += " not on a vessel?!" + else: s += " on board the %s" % vn + s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S") + + tbl = StandingsTable() + tbl.headings() + + for pa in track.aboard(): + pi = pa.pirate_info() + + xs = '' + if pa.gunner: xs += 'G ' + else: xs += ' ' + xs += timeevent(pa.last_time, pa.last_event) + xs += timeevent(pa.last_chat_time, pa.last_chat_chan) + + if pi is None: + tbl.pirate_dummy(pa.name, rotate_nya[0], xs) + else: + tbl.pirate(pi, xs) + + s += tbl.results() + + displayer.show(s) + time.sleep(1) + rotate_nya = rotate_nya[1:3] + rotate_nya[0] + +#---------- main program ---------- def main(): global opts, fetcher @@ -633,7 +900,12 @@ actions: yoweb-scrape [--ocean OCEAN ...] pirate PIRATE yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE - yoweb-scrape [--ocean OCEAN ...] ship-aid CHAT-LOG + yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG + yoweb-scrape [options] ship-aid CHAT-LOG (must be .../PIRATE_OCEAN_chat-log*) + +display modes (for --display) apply to ship-aid: + --display=dumb just print new information, scrolling the screen + --display=overwrite use cursor motion, selective clear, etc. to redraw at top ''') ao = pa.add_option ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None, @@ -641,13 +913,20 @@ actions: ao('--cache-dir', dest='cache_dir', metavar='DIR', default='~/.yoweb-scrape-cache', help='cache yoweb pages in DIR') - ao('-D','--debug', action='store_true', dest='debug', default=False, + ao('-D','--debug', action='count', dest='debug', default=0, help='enable debugging output') + ao('--debug-fd', action='count', dest='debug_fd', + help='write any debugging output to specified fd') ao('-q','--quiet', action='store_true', dest='quiet', help='suppress warning output') + ao('--display', action='store', dest='display', + type='choice', choices=['dumb','overwrite'], + help='how to display ship aid') ao('--ship-duty', action='store_true', dest='ship_duty', help='show ship duty station puzzles') + ao('--all-puzzles', action='store_false', dest='ship_duty', + help='show all puzzles, not just ship duty stations') (opts,args) = pa.parse_args() random.seed() @@ -655,6 +934,9 @@ actions: if len(args) < 1: pa.error('need a mode argument') + if opts.debug_fd is not None: + opts.debug_file = fdopen(opts.debug_fd, 'w') + mode = args[0] mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_') try: mode_fn = globals()[mode_fn_name] @@ -668,6 +950,13 @@ actions: if opts.cache_dir.startswith('~/'): opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:] + if opts.display is None: + if ((opts.debug > 0 and opts.debug_fd is None) + or not os.isatty(sys.stdout.fileno())): + opts.display = 'dumb' + else: + opts.display = 'overwrite' + fetcher = Fetcher(opts.ocean, opts.cache_dir) mode_fn(args[1:], pa.error)