X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-test.git;a=blobdiff_plain;f=yoweb-scrape;h=3e77ffdec575e95e48368ffbbb195ca6711d6bbe;hp=ca174e27647efdba100a326e8559e9f6fd62d5c4;hb=11373a544443dbaf6ffccf5559df07b959548922;hpb=f6ea97a00f4890926e35de02645891f93c0d65a4 diff --git a/yoweb-scrape b/yoweb-scrape index ca174e2..3e77ffd 100755 --- a/yoweb-scrape +++ b/yoweb-scrape @@ -32,8 +32,8 @@ max_pirate_namelen = 12 def debug(m): - if opts.debug: - print >>sys.stderr, m + if opts.debug > 0: + print m class Fetcher: def __init__(self, ocean, cachedir): @@ -70,7 +70,7 @@ class Fetcher: ages.append(age) return ages - def _rate_limit_cache_clean(self, now): + def need_wait(self, now): ages = self._cache_scan(now) ages.sort() debug('Fetcher ages ' + `ages`) @@ -83,6 +83,10 @@ class Fetcher: need_wait = max(need_wait, min_age - age) min_age += 3 min_age *= 1.25 + return need_wait + + def _rate_limit_cache_clean(self, now): + need_wait = self.need_wait(now) if need_wait > 0: debug('Fetcher wait %d' % need_wait) time.sleep(need_wait) @@ -280,7 +284,7 @@ class CrewInfo(SomethingSoupInfo): crew_rank_re = regexp.compile('/yoweb/images/crew') for row in tbl.contents: # findAll(recurse=False) - if isinstance(row, unicode): + if isinstance(row,basestring): continue is_rank = row.find('img', attrs={'src': crew_rank_re}) @@ -317,10 +321,12 @@ class StandingsTable: self.s = '' self._cw = col_width-1 - def _pline(self, pirate, puzstrs): + def _pline(self, pirate, puzstrs, extra): self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate) for v in puzstrs: self.s += ' %-*.*s' % (self._cw,self._cw, v) + if extra: + self.s += ' ' + extra self.s += '\n' def _puzstr(self, pi, puzzle): @@ -348,12 +354,14 @@ class StandingsTable: spc = name.find(' ') if spc < 0: return name return name[0:min(4,spc)] + name[spc+1:] - self._pline('', map(puzn_redact, self._puzzles)) + self._pline('', map(puzn_redact, self._puzzles), None) def literalline(self, line): self.s += line + '\n' - def pirate(self, pi): + def pirate_dummy(self, name, standingstring, extra=None): + self._pline(name, standingstring * len(self._puzzles), extra) + def pirate(self, pi, extra=None): puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles] - self._pline(pi.name, puzstrs) + self._pline(pi.name, puzstrs, extra) def results(self): return self.s @@ -388,17 +396,31 @@ def do_standings_crew_of(args, bu): class PirateAboard: # pa.v + # pa.name # pa.last_time # pa.last_event # pa.gunner - def __init__(pa, v, time, event): + # pa.last_chat_time + # pa.last_chat_chan + # pa.pi + + def __init__(pa, pn, v, time, event): + pa.name = pn pa.v = v pa.last_time = time pa.last_event = event + pa.last_chat_time = None + pa.last_chat_chan = None pa.gunner = False + pa.pi = None -class ShipCrewTracker: - def __init__(self, myself_pi): + def pirate_info(pa): + if not pa.pi and not fetcher.need_wait(time.time()): + pa.pi = PirateInfo(pa.name, 3600) + return pa.pi + +class ChatLogTracker: + def __init__(self, myself_pi, logfn): self._pl = {} # self._pl['Pirate'] = self._vl = {} # self._vl['Vessel']['Pirate'] = PirateAboard # self._vl['Vessel']['#lastaboard'] @@ -407,6 +429,9 @@ class ShipCrewTracker: self._date = None self._myself = myself_pi self._need_redisplay = False + self._f = file(logfn) + self._lbuf = '' + self._progress = [0, os.fstat(self._f.fileno()).st_size] def _refresh(self): self._need_redisplay = True @@ -419,109 +444,301 @@ class ShipCrewTracker: pa.last_event = event else: if pa is not None: del pa.v[pirate] - pa = PirateAboard(self._v, timestamp, event) + pa = PirateAboard(pirate, self._v, timestamp, event) self._pl[pirate] = pa self._v[pirate] = pa self._v['#lastaboard'] = timestamp self._refresh() return pa + def _trash_vessel(self, v): + for pn in v: + if pn.startswith('#'): continue + del self._pl[pn] + self._refresh() + + def expire_garbage(self, timestamp): + for (vn,v) in list(self._vl.iteritems()): + la = v['#lastaboard'] + if timestamp - la > opts.ship_reboard_clearout: + self._debug_line_disposition(timestamp,'', + 'stale reset '+vn) + self._trash_vessel(v) + del self._vl[vn] + def clear_vessel(self, timestamp): if self._v is not None: - for p in self._v: - if p.startswith('#'): continue - del self._pl[p] + self._trash_vessel(self._v) self._v = {'#lastaboard': timestamp} + self._vl[self._vessel] = self._v - def _warn(self, m): - pass + def _debug_line_disposition(self,timestamp,l,m): + debug('CLT %13s %-30s %s' % (timestamp,m,l)) def chatline(self,l): rm = lambda re: regexp.match(re,l) + d = lambda m: self._debug_line_disposition(timestamp,l,m) + timestamp = None + m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$') if m: self._date = m.groups() - return + return d('date '+`self._date`) + if self._date is None: - return + return d('date unset') + m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ') if not m: - self._warn('undated? '+l) - return + return d('no timestamp') time_tuple = [int(x) for x in self._date + m.groups()] time_tuple += (-1,-1,-1) - print `time_tuple` timestamp = time.mktime(time_tuple) l = l[l.find(' ')+1:] - ob = lambda who, event: self._onboard_event( - timestamp, who, event) - oba = lambda m, did: ob( - m.group(1), '%s %s' % (did, m.group(2))) + def ob_x(who,event): + return self._onboard_event(timestamp, who, event) + def ob1(did): ob_x(m.group(1), did); return d(did) + def oba(did): return ob1('%s %s' % (did, m.group(2))) m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$') if m: + pn = self._myself.name self._vessel = m.group(1) + dm = 'boarding' + try: self._v = self._vl[self._vessel] - except KeyError: self._v = None + except KeyError: self._v = None; dm += ' new' + if self._v is not None: la = self._v['#lastaboard'] - else: la = 0 - if timestamp - la > 3600: + else: la = 0; dm += ' ?la' + + if timestamp - la > opts.ship_reboard_clearout: self.clear_vessel(timestamp) - ob(self._myself.name, 'we boarded') + dm += ' stale' + + ob_x(pn, 'we boarded') + self.expire_garbage(timestamp) + return d(dm) - if self._v is None: return + if self._v is None: + return d('no vessel') + + m = rm('(\\w+) has come aboard\\.$') + if m: return ob1('boarded'); m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$') if m: - pa = oba(m, 'ordered') - if m.group(2) == 'Gunning': + (who,what) = m.groups() + pa = ob_x(who,'ord '+what) + if what == 'Gunning': pa.gunner = True - return + return d('duty order') m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$') - if m: oba(m,'abandoned'); return + if m: oba('stopped'); return d("end") + + def chat(what): + who = m.group(1) + try: pa = self._pl[who] + except KeyError: return d('chat mystery') + if pa.v is self._v: + pa.last_chat_time = timestamp + pa.last_chat_chan = what + self._refresh() + return d(what+' chat') + + m = rm('(\\w+) (?:issued an order|ordered everyone) "') + if m: return ob1('general order'); m = rm('(\\w+) says, "') - if m: ob(m.group(1), 'talked'); return + if m: return chat('public') + + m = rm('(\\w+) tells ye, "') + if m: return chat('private') + + m = rm('(\\w+) flag officer chats, "') + if m: return chat('flag officer') + + m = rm('(\\w+) officer chats, "') + if m: return chat('officer') + + m = rm('Game over\\. Winners: ([A-Za-z, ]+)\\.$') + if m: + pl = m.group(1).split(', ') + if not self._myself.name in pl: + return d('lost boarding battle') + for pn in pl: + if ' ' in pn: continue + ob_x(pn,'won boarding battle') + return d('won boarding battle') + + m = rm('(\\w+) is eliminated\\!') + if m: return ob1('eliminated in fray'); m = rm('(\\w+) has left the vessel\.') if m: who = m.group(1) - ob(who, 'disembarked') + ob_x(who, 'disembarked') del self._v[who] del self._pl[who] - return + return d('disembarked') + + return d('not matched') + + def _str_vessel(self, vn, v): + s = ' vessel %s\n' % vn + s += ' '*20 + "%-*s %13s\n" % ( + max_pirate_namelen, '#lastaboard', + v['#lastaboard']) + for pn in sorted(v.keys()): + if pn.startswith('#'): continue + pa = v[pn] + assert pa.v == v + assert self._pl[pn] == pa + s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % ( + (' ','G')[pa.gunner], + max_pirate_namelen, pn, + pa.last_time, pa.last_event, + pa.last_chat_time, pa.last_chat_chan) + return s - return + def __str__(self): + s = '''"+l+"<" - l += f.readline() - if l.endswith('\n'): - l.rstrip() - print "2>"+l+"<" - track.chatline(l) -# print `track.__dict__` - l = '' - continue - if l: - continue - print "3>EOF<" - print `track.__dict__` - os.sleep(1) + track.catchup() + now = time.time() + + s = "%s" % track.myname() + + vn = track.vessel() + if vn is None: print s + " ...?"; return + + s += " on board the %s at %s\n" % ( + vn, time.strftime("%Y-%m-%d %H:%M:%S")) + + tbl = StandingsTable() + tbl.headings() + + for pa in track.aboard(): + pi = pa.pirate_info() + + xs = '' + xs += timeevent(pa.last_time, pa.last_event) + xs += timeevent(pa.last_chat_time, pa.last_chat_chan) + + if pi is None: + tbl.pirate_dummy(pa.name, rotate_nya[0], xs) + else: + tbl.pirate(pi, xs) + + s += tbl.results() + + print '\n\n', s; + + time.sleep(1) + rotate_nya = rotate_nya[1:2] + rotate_nya[0] def main(): global opts, fetcher @@ -532,6 +749,7 @@ actions: yoweb-scrape [--ocean OCEAN ...] pirate PIRATE yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE + yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG yoweb-scrape [--ocean OCEAN ...] ship-aid CHAT-LOG ''') ao = pa.add_option @@ -540,13 +758,15 @@ actions: ao('--cache-dir', dest='cache_dir', metavar='DIR', default='~/.yoweb-scrape-cache', help='cache yoweb pages in DIR') - ao('-D','--debug', action='store_true', dest='debug', default=False, + ao('-D','--debug', action='count', dest='debug', default=0, help='enable debugging output') ao('-q','--quiet', action='store_true', dest='quiet', help='suppress warning output') ao('--ship-duty', action='store_true', dest='ship_duty', help='show ship duty station puzzles') + ao('--all-puzzles', action='store_false', dest='ship_duty', + help='show all puzzles, not just ship duty stations') (opts,args) = pa.parse_args() random.seed() @@ -562,6 +782,7 @@ actions: # fixed parameters opts.min_max_age = 60 opts.expire_age = 3600 + opts.ship_reboard_clearout = 3600 if opts.cache_dir.startswith('~/'): opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]