X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.main.git;a=blobdiff_plain;f=yoweb-scrape;h=c3e6d73513adc3d5516252d8bd5c06ef706a26bd;hp=1450d2029d861a31e48c8050ad0974b2a79baf3c;hb=7b1ebd0829f7e41fd3299cc1400c83a0c89a7552;hpb=17901a7fd29680cdcaa62551b8ac93c53680e20a diff --git a/yoweb-scrape b/yoweb-scrape index 1450d20..c3e6d73 100755 --- a/yoweb-scrape +++ b/yoweb-scrape @@ -13,6 +13,7 @@ import errno import sys import re as regexp import random +import curses from optparse import OptionParser from BeautifulSoup import BeautifulSoup @@ -38,7 +39,12 @@ max_pirate_namelen = 12 def debug(m): if opts.debug > 0: - print m + print >>opts.debug_file, m + +def sleep(seconds): + if opts.debug > 0: + opts.debug_file.flush() + time.sleep(seconds) def format_time_interval(ti): if ti < 120: return '%d:%02d' % (ti / 60, ti % 60) @@ -58,9 +64,9 @@ class Fetcher: if oe.errno != errno.EEXIST: raise self._cache_scan(time.time()) - def _default_ocean(self): + def default_ocean(self, ocean='ice'): if self.ocean is None: - self.ocean = 'ice' + self.ocean = ocean def _cache_scan(self, now): # returns list of ages, unsorted @@ -102,7 +108,7 @@ class Fetcher: need_wait = self.need_wait(now) if need_wait > 0: debug('Fetcher wait %d' % need_wait) - time.sleep(need_wait) + sleep(need_wait) def fetch(self, url, max_age): debug('Fetcher fetch %s' % url) @@ -141,7 +147,7 @@ class Fetcher: return data def yoweb(self, kind, tail, max_age): - self._default_ocean() + self.default_ocean() url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % ( self.ocean, kind, tail) return self.fetch(url, max_age) @@ -422,57 +428,143 @@ class ChatLogTracker: def __init__(self, myself_pi, logfn): self._pl = {} # self._pl['Pirate'] = self._vl = {} # self._vl['Vessel']['Pirate'] = PirateAboard - # self._vl['Vessel']['#lastaboard'] - self._v = None # self._v = - self._vessel = None # self._vl[self._vessel] + # self._vl['Vessel']['#lastinfo'] + # self._vl['Vessel']['#name'] + # self._v = self._vl[self._vessel] self._date = None self._myself = myself_pi - self._need_redisplay = False self._f = file(logfn) self._lbuf = '' self._progress = [0, os.fstat(self._f.fileno()).st_size] + self._disembark_myself() + self._need_redisplay = False - def _refresh(self): + def _disembark_myself(self): + self._v = None + self._vessel = None + self.force_redisplay() + + def force_redisplay(self): self._need_redisplay = True - def _onboard_event(self,timestamp,pirate,event): - try: pa = self._pl[pirate] - except KeyError: pa = None - if pa is not None and pa.v is self._v: + def _vessel_updated(self, v, timestamp): + v['#lastinfo'] = timestamp + self.force_redisplay() + + def _onboard_event(self,v,timestamp,pirate,event): + pa = self._pl.get(pirate, None) + if pa is not None and pa.v is v: pa.last_time = timestamp pa.last_event = event else: if pa is not None: del pa.v[pirate] - pa = PirateAboard(pirate, self._v, timestamp, event) + pa = PirateAboard(pirate, v, timestamp, event) self._pl[pirate] = pa - self._v[pirate] = pa - self._v['#lastaboard'] = timestamp - self._refresh() + v[pirate] = pa + self._vessel_updated(v, timestamp) return pa def _trash_vessel(self, v): for pn in v: if pn.startswith('#'): continue del self._pl[pn] - self._refresh() + vn = v['#name'] + del self._vl[vn] + if v is self._v: self._disembark_myself() + self.force_redisplay() + + def _vessel_stale(self, v, timestamp): + return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout + + def _vessel_check_expire(self, v, timestamp): + if not self._vessel_stale(v, timestamp): + return v + self._debug_line_disposition(timestamp,'', + 'stale-reset ' + v['#name']) + self._trash_vessel(v) + return None def expire_garbage(self, timestamp): - for (vn,v) in list(self._vl.iteritems()): - la = v['#lastaboard'] - if timestamp - la > opts.ship_reboard_clearout: - self._debug_line_disposition(timestamp,'', - 'stale reset '+vn) - self._trash_vessel(v) - del self._vl[vn] - - def clear_vessel(self, timestamp): - if self._v is not None: - self._trash_vessel(self._v) - self._v = {'#lastaboard': timestamp} - self._vl[self._vessel] = self._v + for v in self._vl.values(): + self._vessel_check_expire(v, timestamp) + + def _vessel_lookup(self, vn, timestamp, dml=[], create=False): + v = self._vl.get(vn, None) + if v is not None: + v = self._vessel_check_expire(v, timestamp) + if v is not None: + dml.append('found') + return v + if not create: + dml.append('no') + dml.append('new') + self._vl[vn] = v = { '#name': vn } + self._vessel_updated(v, timestamp) + return v + + def _find_matching_vessel(self, pattern, timestamp, cmdr, + dml=[], create=False): + # use when a commander pirate `cmdr' specified a vessel + # by name `pattern' (either may be None) + # if create is true, will create the vessel + # record if an exact name is specified + + if (pattern is not None and + not '*' in pattern + and len(pattern.split(' ')) == 2): + vn = pattern.title() + dml.append('exact') + return self._vessel_lookup( + vn, timestamp, dml=dml, create=create) + + if pattern is None: + pattern_check = lambda vn: True + else: + re = '(?:.* )?%s$' % pattern.lower().replace('*','.*') + pattern_check = regexp.compile(re, regexp.I).match + + tries = [] + + cmdr_pa = self._pl.get(cmdr, None) + if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr')) + + tries.append((self._v, 'here')) + tried_vns = [] + + for (v, dm) in tries: + if v is None: dml.append(dm+'?'); continue + + vn = v['#name'] + if not pattern_check(vn): + tried_vns.append(vn) + dml.append(dm+'#') + continue + + dml.append(dm+'!') + return v + + if pattern is not None and '*' in pattern: + search = [ + (vn,v) + for (vn,v) in self._vl.iteritems() + if not self._vessel_stale(v, timestamp) + if pattern_check(vn) + ] + #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % ( + # re, + # '/'.join(tried_vns), + # '/'.join([vn for (vn,v) in search]))) + + if len(search)==1: + dml.append('one') + return search[0][1] + elif search: + dml.append('many') + else: + dml.append('none') def _debug_line_disposition(self,timestamp,l,m): - debug('CLT %13s %-30s %s' % (timestamp,m,l)) + debug('CLT %13s %-40s %s' % (timestamp,m,l)) def chatline(self,l): rm = lambda re: regexp.match(re,l) @@ -481,7 +573,8 @@ class ChatLogTracker: m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$') if m: - self._date = m.groups() + self._date = [int(x) for x in m.groups()] + self._previous_timestamp = None return d('date '+`self._date`) if self._date is None: @@ -491,35 +584,43 @@ class ChatLogTracker: if not m: return d('no timestamp') - time_tuple = [int(x) for x in self._date + m.groups()] - time_tuple += (-1,-1,-1) - timestamp = time.mktime(time_tuple) + while True: + time_tuple = (self._date + + [int(x) for x in m.groups()] + + [-1,-1,-1]) + timestamp = time.mktime(time_tuple) + if timestamp >= self._previous_timestamp: break + self._date[2] += 1 + self._debug_line_disposition(timestamp,'', + 'new date '+`self._date`) + + self._previous_timestamp = timestamp + l = l[l.find(' ')+1:] - def ob_x(who,event): - return self._onboard_event(timestamp, who, event) + def ob_x(pirate,event): + return self._onboard_event( + self._v, timestamp, pirate, event) def ob1(did): ob_x(m.group(1), did); return d(did) def oba(did): return ob1('%s %s' % (did, m.group(2))) + def disembark(v, timestamp, pirate, event): + self._onboard_event( + v, timestamp, pirate, 'leaving '+event) + del v[pirate] + del self._pl[pirate] + m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$') if m: + dm = ['boarding'] pn = self._myself.name - self._vessel = m.group(1) - dm = 'boarding' - - try: self._v = self._vl[self._vessel] - except KeyError: self._v = None; dm += ' new' - - if self._v is not None: la = self._v['#lastaboard'] - else: la = 0; dm += ' ?la' - - if timestamp - la > opts.ship_reboard_clearout: - self.clear_vessel(timestamp) - dm += ' stale' - + vn = m.group(1) + v = self._vessel_lookup(vn, timestamp, dm, create=True) + self._vessel = vn + self._v = v ob_x(pn, 'we boarded') self.expire_garbage(timestamp) - return d(dm) + return d(' '.join(dm)) if self._v is None: return d('no vessel') @@ -538,15 +639,54 @@ class ChatLogTracker: m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$') if m: oba('stopped'); return d("end") - def chat(what): - who = m.group(1) - try: pa = self._pl[who] - except KeyError: return d('chat mystery') - if pa.v is self._v: - pa.last_chat_time = timestamp - pa.last_chat_chan = what - self._refresh() - return d(what+' chat') + def chat_core(speaker, chan): + try: pa = self._pl[speaker] + except KeyError: return 'mystery' + if pa.v is not self._v: return 'elsewhere' + pa.last_chat_time = timestamp + pa.last_chat_chan = chan + self.force_redisplay() + return 'here' + + def chat(chan): + speaker = m.group(1) + dm = chat_core(speaker, chan) + return d('chat %s %s' % (chan, dm)) + + def chat_metacmd(chan): + (cmdr, metacmd) = m.groups() + metacmd = regexp.sub('\\s+', ' ', metacmd).strip() + m2 = regexp.match( + '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$', + metacmd) + if not m2: return chat(chan) + + (cmd, pattern, targets) = m2.groups() + dml = ['cmd', chan, cmd] + + if cmd == 'a': each = self._onboard_event + else: each = disembark + + if cmdr == self._myself.name: + dml.append('self') + how = 'cmd: %s' % cmd + else: + dml.append('other') + how = 'cmd: %s %s' % (cmd,cmdr) + + v = self._find_matching_vessel( + pattern, timestamp, cmdr, dml, create=True) + + if v is not None: + targets = targets.strip().split(' ') + dml.append(`len(targets)`) + for target in targets: + each(v, timestamp, target.title(), how) + self._vessel_updated(v, timestamp) + + dm = ' '.join(dml) + chat_core(cmdr, 'cmd '+chan) + return d(dm) m = rm('(\\w+) (?:issued an order|ordered everyone) "') if m: return ob1('general order'); @@ -557,11 +697,14 @@ class ChatLogTracker: m = rm('(\\w+) tells ye, "') if m: return chat('private') + m = rm('Ye told (\\w+), "(.*)"$') + if m: return chat_metacmd('private') + m = rm('(\\w+) flag officer chats, "') if m: return chat('flag officer') - m = rm('(\\w+) officer chats, "') - if m: return chat('officer') + m = rm('(\\w+) officer chats, "(.*)"$') + if m: return chat_metacmd('officer') m = rm('Game over\\. Winners: ([A-Za-z, ]+)\\.$') if m: @@ -578,19 +721,18 @@ class ChatLogTracker: m = rm('(\\w+) has left the vessel\.') if m: - who = m.group(1) - ob_x(who, 'disembarked') - del self._v[who] - del self._pl[who] + pirate = m.group(1) + disembark(self._v, timestamp, pirate, 'disembarked') return d('disembarked') - return d('not matched') + return d('not-matched') def _str_vessel(self, vn, v): s = ' vessel %s\n' % vn s += ' '*20 + "%-*s %13s\n" % ( - max_pirate_namelen, '#lastaboard', - v['#lastaboard']) + max_pirate_namelen, '#lastinfo', + v['#lastinfo']) + assert v['#name'] == vn for pn in sorted(v.keys()): if pn.startswith('#'): continue pa = v[pn] @@ -634,6 +776,8 @@ class ChatLogTracker: if self._lbuf.endswith('\n'): self.chatline(self._lbuf.rstrip()) self._lbuf = '' + if opts.debug >= 2: + debug(self.__str__()) if progress: progress.caughtup() def changed(self): @@ -648,6 +792,7 @@ class ChatLogTracker: return self._vessel def aboard(self): # returns a list of PirateAboard sorted by name + if self._v is None: return [] return [ self._v[pn] for pn in sorted(self._v.keys()) if not pn.startswith('#') ] @@ -684,10 +829,16 @@ def do_standings_crew_of(args, bu): print tab.results() class ProgressPrintPercentage: - def __init__(self, f=sys.stdout): self._f = f - def progress(self,done,total): - self._f.write("scan chat logs %3d%%\r" % ((done*100) / total)) + def __init__(self, f=sys.stdout): + self._f = f + def progress_string(self,done,total): + return "scan chat logs %3d%%\r" % ((done*100) / total) + def progress(self,*a): + self._f.write(self.progress_string(*a)) self._f.flush() + def show_init(self, pirate, ocean): + print >>self._f, 'Starting up, %s on the %s ocean' % ( + pirate, ocean) def caughtup(self): self._f.write(' \r') self._f.flush() @@ -699,17 +850,21 @@ def prep_chat_log(args, bu, max_myself_age=3600): if len(args) != 1: bu('this action takes only chat log filename') logfn = args[0] - logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_chat-log-\\w+$' + logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_' match = regexp.match(logfn_re, logfn) - if not match: bu('chat log filename is not in default format') - (pirate, fetcher.ocean) = match.groups() + if not match: bu('chat log filename is not in expected format') + (pirate, ocean) = match.groups() + fetcher.default_ocean(ocean) myself = PirateInfo(pirate,max_myself_age) + progress.show_init(pirate, fetcher.ocean) track = ChatLogTracker(myself, logfn) - opts.debug -= 1 + opts.debug -= 2 track.catchup(progress) - opts.debug += 1 + opts.debug += 2 + + track.force_redisplay() return (myself, track) @@ -719,19 +874,86 @@ def do_track_chat_log(args, bu): track.catchup() if track.changed(): print track - time.sleep(1) + sleep(1) + +#----- ship management aid ----- + +class Display_dumb(ProgressPrintPercentage): + def __init__(self): + ProgressPrintPercentage.__init__(self) + def show(self, s): + print '\n\n', s; + def realstart(self): + pass + +class Display_overwrite(ProgressPrintPercentage): + def __init__(self): + ProgressPrintPercentage.__init__(self) + + null = file('/dev/null','w') + curses.setupterm(fd=null.fileno()) + + self._clear = curses.tigetstr('clear') + if not self._clear: + self._debug('missing clear!') + self.show = Display_dumb.show + return + + self._t = {'el':'', 'ed':''} + if not self._init_sophisticated(): + for k in self._t.keys(): self._t[k] = '' + self._t['ho'] = self._clear + + def _debug(self,m): debug('display overwrite: '+m) + + def _init_sophisticated(self): + for k in self._t.keys(): + s = curses.tigetstr(k) + self._t[k] = s + self._t['ho'] = curses.tigetstr('ho') + if not self._t['ho']: + cup = curses.tigetstr('cup') + self._t['ho'] = curses.tparm(cup,0,0) + missing = [k for k in self._t.keys() if not self._t[k]] + if missing: + self.debug('missing '+(' '.join(missing))) + return 0 + return 1 + + def show(self, s): + w = sys.stdout.write + def wti(k): w(self._t[k]) + + wti('ho') + nl = '' + for l in s.rstrip().split('\n'): + w(nl) + w(l) + wti('el') + nl = '\r\n' + wti('ed') + w(' ') + sys.stdout.flush() + + def realstart(self): + sys.stdout.write(self._clear) + sys.stdout.flush() + def do_ship_aid(args, bu): if opts.ship_duty is None: opts.ship_duty = True - (myself, track) = prep_chat_log(args, bu) - + displayer = globals()['Display_'+opts.display]() rotate_nya = '/-\\' + (myself, track) = prep_chat_log(args, bu, progress=displayer) + def timeevent(t,e): if t is None: return ' ' * 22 return " %-4s %-16s" % (format_time_interval(now - t),e) + displayer.realstart() + while True: track.catchup() now = time.time() @@ -739,10 +961,9 @@ def do_ship_aid(args, bu): s = "%s" % track.myname() vn = track.vessel() - if vn is None: print s + " ...?"; return - - s += " on board the %s at %s\n" % ( - vn, time.strftime("%Y-%m-%d %H:%M:%S")) + if vn is None: s += " not on a vessel?!" + else: s += " on board the %s" % vn + s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S") tbl = StandingsTable() tbl.headings() @@ -763,10 +984,9 @@ def do_ship_aid(args, bu): s += tbl.results() - print '\n\n', s; - - time.sleep(1) - rotate_nya = rotate_nya[1:2] + rotate_nya[0] + displayer.show(s) + sleep(1) + rotate_nya = rotate_nya[1:3] + rotate_nya[0] #---------- main program ---------- @@ -780,7 +1000,11 @@ actions: yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG - yoweb-scrape [--ocean OCEAN ...] ship-aid CHAT-LOG + yoweb-scrape [options] ship-aid CHAT-LOG (must be .../PIRATE_OCEAN_chat-log*) + +display modes (for --display) apply to ship-aid: + --display=dumb just print new information, scrolling the screen + --display=overwrite use cursor motion, selective clear, etc. to redraw at top ''') ao = pa.add_option ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None, @@ -790,8 +1014,13 @@ actions: help='cache yoweb pages in DIR') ao('-D','--debug', action='count', dest='debug', default=0, help='enable debugging output') + ao('--debug-fd', type='int', dest='debug_fd', + help='write any debugging output to specified fd') ao('-q','--quiet', action='store_true', dest='quiet', help='suppress warning output') + ao('--display', action='store', dest='display', + type='choice', choices=['dumb','overwrite'], + help='how to display ship aid') ao('--ship-duty', action='store_true', dest='ship_duty', help='show ship duty station puzzles') @@ -804,6 +1033,11 @@ actions: if len(args) < 1: pa.error('need a mode argument') + if opts.debug_fd is not None: + opts.debug_file = os.fdopen(opts.debug_fd, 'w') + else: + opts.debug_file = sys.stdout + mode = args[0] mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_') try: mode_fn = globals()[mode_fn_name] @@ -817,6 +1051,13 @@ actions: if opts.cache_dir.startswith('~/'): opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:] + if opts.display is None: + if ((opts.debug > 0 and opts.debug_fd is None) + or not os.isatty(sys.stdout.fileno())): + opts.display = 'dumb' + else: + opts.display = 'overwrite' + fetcher = Fetcher(opts.ocean, opts.cache_dir) mode_fn(args[1:], pa.error)