chiark / gitweb /
prettify the code a bit
[ypp-sc-tools.web-live.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 from optparse import OptionParser
17
18 from BeautifulSoup import BeautifulSoup
19
20 opts = None
21
22 #---------- YPP parameters and arrays ----------
23
24 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
25         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
26         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
27         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
28
29 standingvals = ('Able/Distinguished/Respected/Master'+
30                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
31
32 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
33
34 max_pirate_namelen = 12
35
36
37 #---------- general utilities ----------
38
39 def debug(m):
40         if opts.debug > 0:
41                 print m
42
43 #---------- caching and rate-limiting data fetcher ----------
44
45 class Fetcher:
46         def __init__(self, ocean, cachedir):
47                 debug('Fetcher init %s' % cachedir)
48                 self.ocean = ocean
49                 self.cachedir = cachedir
50                 try: os.mkdir(cachedir)
51                 except (OSError,IOError), oe:
52                         if oe.errno != errno.EEXIST: raise
53                 self._cache_scan(time.time())
54
55         def _default_ocean(self):
56                 if self.ocean is None:
57                         self.ocean = 'ice'
58
59         def _cache_scan(self, now):
60                 # returns list of ages, unsorted
61                 ages = []
62                 debug('Fetcher   scan_cache')
63                 for leaf in os.listdir(self.cachedir):
64                         if not leaf.startswith('#'): continue
65                         path = self.cachedir + '/' + leaf
66                         try: s = os.stat(path)
67                         except (OSError,IOError), oe:
68                                 if oe.errno != errno.ENOENT: raise
69                                 continue
70                         age = now - s.st_mtime
71                         if age > opts.expire_age:
72                                 debug('Fetcher    expire %d %s' % (age, path))
73                                 try: os.remove(path)
74                                 except (OSError,IOError), oe:
75                                         if oe.errno != errno.ENOENT: raise
76                                 continue
77                         ages.append(age)
78                 return ages
79
80         def need_wait(self, now):
81                 ages = self._cache_scan(now)
82                 ages.sort()
83                 debug('Fetcher   ages ' + `ages`)
84                 min_age = 1
85                 need_wait = 0
86                 for age in ages:
87                         if age < min_age and age < 300:
88                                 debug('Fetcher   morewait min=%d age=%d' %
89                                         (min_age, age))
90                                 need_wait = max(need_wait, min_age - age)
91                         min_age += 3
92                         min_age *= 1.25
93                 return need_wait
94
95         def _rate_limit_cache_clean(self, now):
96                 need_wait = self.need_wait(now)
97                 if need_wait > 0:
98                         debug('Fetcher   wait %d' % need_wait)
99                         time.sleep(need_wait)
100
101         def fetch(self, url, max_age):
102                 debug('Fetcher fetch %s' % url)
103                 cache_corename = urllib.quote_plus(url)
104                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
105                 try: f = file(cache_item, 'r')
106                 except (OSError,IOError), oe:
107                         if oe.errno != errno.ENOENT: raise
108                         f = None
109                 now = time.time()
110                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
111                 if f is not None:
112                         s = os.fstat(f.fileno())
113                         age = now - s.st_mtime
114                         if age > max_age:
115                                 debug('Fetcher  stale %d < %d'% (max_age, age))
116                                 f = None
117                 if f is not None:
118                         data = f.read()
119                         f.close()
120                         debug('Fetcher  cached %d > %d' % (max_age, age))
121                         return data
122
123                 debug('Fetcher  fetch')
124                 self._rate_limit_cache_clean(now)
125
126                 stream = urllib2.urlopen(url)
127                 data = stream.read()
128                 cache_tmp = "%s/#%s~%d#" % (
129                         self.cachedir, cache_corename, os.getpid())
130                 f = file(cache_tmp, 'w')
131                 f.write(data)
132                 f.close()
133                 os.rename(cache_tmp, cache_item)
134                 debug('Fetcher  stored')
135                 return data
136
137         def yoweb(self, kind, tail, max_age):
138                 self._default_ocean()
139                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
140                         self.ocean, kind, tail)
141                 return self.fetch(url, max_age)
142
143 #---------- logging assistance for troubled screenscrapers ----------
144
145 class SoupLog:
146         def __init__(self):
147                 self.msgs = [ ]
148         def msg(self, m):
149                 self.msgs.append(m)
150         def soupm(self, obj, m):
151                 self.msg(m + '; in ' + `obj`)
152         def needs_msgs(self, child_souplog):
153                 self.msgs += child_souplog.msgs
154                 child_souplog.msgs = [ ]
155
156 def soup_text(obj):
157         str = ''.join(obj.findAll(text=True))
158         return str.strip()
159
160 class SomethingSoupInfo(SoupLog):
161         def __init__(self, kind, tail, max_age):
162                 SoupLog.__init__(self)
163                 html = fetcher.yoweb(kind, tail, max_age)
164                 self._soup = BeautifulSoup(html,
165                         convertEntities=BeautifulSoup.HTML_ENTITIES
166                         )
167
168 #---------- scraper for pirate pages ----------
169
170 class PirateInfo(SomethingSoupInfo):
171         # Public data members:
172         #  pi.standings = { 'Treasure Haul': 'Able' ... }
173         #  pi.name = name
174         #  pi.crew = (id, name)
175         #  pi.flag = (id, name)
176         #  pi.msgs = [ 'message describing problem with scrape' ]
177                 
178         def __init__(self, pirate, max_age=300):
179                 SomethingSoupInfo.__init__(self,
180                         'pirate.wm?target=', pirate, max_age)
181                 self.name = pirate
182                 self._find_standings()
183                 self.crew = self._find_crewflag('crew',
184                         '^/yoweb/crew/info\\.wm')
185                 self.flag = self._find_crewflag('flag',
186                         '^/yoweb/flag/info\\.wm')
187
188         def _find_standings(self):
189                 imgs = self._soup.findAll('img',
190                         src=regexp.compile('/yoweb/images/stat.*'))
191                 re = regexp.compile(
192 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
193                         )
194                 standings = { }
195
196                 for skill in puzzles:
197                         standings[skill] = [ ]
198
199                 skl = SoupLog()
200
201                 for img in imgs:
202                         try: puzzle = img['alt']
203                         except KeyError: continue
204
205                         if not puzzle in puzzles:
206                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
207                                 continue
208                         key = img.findParent('td')
209                         if key is None:
210                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
211                                 continue
212                         valelem = key.findNextSibling('td')
213                         if valelem is None:
214                                 skl.soupm(key, 'puzzle missing sibling "%s"'
215                                         % puzzle)
216                                 continue
217                         valstr = soup_text(valelem)
218                         match = re.match(valstr)
219                         if match is None:
220                                 skl.soupm(key, ('puzzle "%s" unparseable'+
221                                         ' standing "%s"') % (puzzle, valstr))
222                                 continue
223                         standing = match.group(match.lastindex)
224                         standings[puzzle].append(standing)
225
226                 self.standings = { }
227
228                 for puzzle in puzzles:
229                         sl = standings[puzzle]
230                         if len(sl) > 1:
231                                 skl.msg('puzzle "%s" multiple standings %s' %
232                                                 (puzzle, `sl`))
233                                 continue
234                         if not sl:
235                                 skl.msg('puzzle "%s" no standing found' % puzzle)
236                                 continue
237                         standing = sl[0]
238                         for i in range(0, len(standingvals)-1):
239                                 if standing == standingvals[i]:
240                                         self.standings[puzzle] = i
241                         if not puzzle in self.standings:
242                                 skl.msg('puzzle "%s" unknown standing "%s"' %
243                                         (puzzle, standing))
244
245                 all_standings_ok = True
246                 for puzzle in puzzles:
247                         if not puzzle in self.standings:
248                                 self.needs_msgs(skl)
249
250         def _find_crewflag(self, cf, yoweb_re):
251                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
252                 if len(things) != 1:
253                         self.msg('zero or several %s id references found' % cf)
254                         return None
255                 thing = things[0]
256                 id_re = '\\b%sid\\=(\\w+)$' % cf
257                 id_haystack = thing['href']
258                 match = regexp.compile(id_re).search(id_haystack)
259                 if match is None:
260                         self.soupm(thing, ('incomprehensible %s id ref'+
261                                 ' (%s in %s)') % (cf, id_re, id_haystack))
262                         return None
263                 name = soup_text(thing)
264                 return (match.group(1), name)
265
266         def __str__(self):
267                 return `(self.crew, self.flag, self.standings, self.msgs)`
268
269 #---------- scraper for crew pages ----------
270
271 class CrewInfo(SomethingSoupInfo):
272         # Public data members:
273         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
274         #              ('Senior Officer', [...]),
275         #               ... ]
276         #  pi.msgs = [ 'message describing problem with scrape' ]
277
278         def __init__(self, crewid, max_age=300):
279                 SomethingSoupInfo.__init__(self,
280                         'crew/info.wm?crewid=', crewid, max_age)
281                 self._find_crew()
282
283         def _find_crew(self):
284                 self.crew = []
285                 capts = self._soup.findAll('img',
286                         src='/yoweb/images/crew-captain.png')
287                 if len(capts) != 1:
288                         self.msg('crew members: no. of captain images != 1')
289                         return
290                 tbl = capts[0]
291                 while not tbl.find('a', href=pirate_ref_re):
292                         tbl = tbl.findParent('table')
293                         if not tbl:
294                                 self.msg('crew members: cannot find table')
295                                 return
296                 current_rank_crew = None
297                 crew_rank_re = regexp.compile('/yoweb/images/crew')
298                 for row in tbl.contents:
299                         # findAll(recurse=False)
300                         if isinstance(row,basestring):
301                                 continue
302
303                         is_rank = row.find('img', attrs={'src': crew_rank_re})
304                         if is_rank:
305                                 rank = soup_text(row)
306                                 current_rank_crew = []
307                                 self.crew.append((rank, current_rank_crew))
308                                 continue
309                         for cell in row.findAll('a', href=pirate_ref_re):
310                                 if current_rank_crew is None:
311                                         self.soupm(cell, 'crew members: crew'
312                                                 ' before rank')
313                                         continue
314                                 current_rank_crew.append(soup_text(cell))
315
316         def __str__(self):
317                 return `(self.crew, self.msgs)`
318
319 #---------- pretty-printer for tables of pirate puzzle standings ----------
320
321 class StandingsTable:
322         def __init__(self, use_puzzles=None, col_width=6):
323                 if use_puzzles is None:
324                         if opts.ship_duty:
325                                 use_puzzles=[
326                                         'Navigating','Battle Navigation',
327                                         'Gunning',
328                                         ['Sailing','Rigging'],
329                                         'Bilging',
330                                         'Carpentry',
331                                         'Treasure Haul'
332                                 ]
333                         else:
334                                 use_puzzles=puzzles
335                 self._puzzles = use_puzzles
336                 self.s = ''
337                 self._cw = col_width-1
338
339         def _pline(self, pirate, puzstrs, extra):
340                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
341                 for v in puzstrs:
342                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
343                 if extra:
344                         self.s += ' ' + extra
345                 self.s += '\n'
346
347         def _puzstr(self, pi, puzzle):
348                 if not isinstance(puzzle,list): puzzle = [puzzle]
349                 try: standing = max([pi.standings[p] for p in puzzle])
350                 except KeyError: return '?'
351                 if not standing: return ''
352                 s = ''
353                 if self._cw > 4:
354                         c1 = standingvals[standing][0]
355                         if standing < 3: c1 = c1.lower() # 3 = Master
356                         s += `standing`
357                 if self._cw > 5:
358                         s += ' '
359                 s += '*' * (standing / 2)
360                 s += '+' * (standing % 2)
361                 return s
362
363         def headings(self):
364                 def puzn_redact(name):
365                         if isinstance(name,list):
366                                 return '/'.join(
367                                         ["%.*s" % (self._cw/2, puzn_redact(n))
368                                          for n in name])
369                         spc = name.find(' ')
370                         if spc < 0: return name
371                         return name[0:min(4,spc)] + name[spc+1:]
372                 self._pline('', map(puzn_redact, self._puzzles), None)
373         def literalline(self, line):
374                 self.s += line + '\n'
375         def pirate_dummy(self, name, standingstring, extra=None):
376                 self._pline(name, standingstring * len(self._puzzles), extra)
377         def pirate(self, pi, extra=None):
378                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
379                 self._pline(pi.name, puzstrs, extra)
380
381         def results(self):
382                 return self.s
383
384 #---------- chat log parser ----------
385
386 class PirateAboard:
387         # This is essentially a transparent, dumb, data class.
388         #  pa.v
389         #  pa.name
390         #  pa.last_time
391         #  pa.last_event
392         #  pa.gunner
393         #  pa.last_chat_time
394         #  pa.last_chat_chan
395         #  pa.pi
396
397         def __init__(pa, pn, v, time, event):
398                 pa.name = pn
399                 pa.v = v
400                 pa.last_time = time
401                 pa.last_event = event
402                 pa.last_chat_time = None
403                 pa.last_chat_chan = None
404                 pa.gunner = False
405                 pa.pi = None
406
407         def pirate_info(pa):
408                 if not pa.pi and not fetcher.need_wait(time.time()):
409                         pa.pi = PirateInfo(pa.name, 3600)
410                 return pa.pi
411
412 class ChatLogTracker:
413         # This is quite complex so we make it opaque.  Use the
414         # official invokers, accessors etc.
415
416         def __init__(self, myself_pi, logfn):
417                 self._pl = {}   # self._pl['Pirate'] =
418                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
419                                 # self._vl['Vessel']['#lastaboard']
420                 self._v = None          # self._v =
421                 self._vessel = None     #       self._vl[self._vessel]
422                 self._date = None
423                 self._myself = myself_pi
424                 self._need_redisplay = False
425                 self._f = file(logfn)
426                 self._lbuf = ''
427                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
428
429         def _refresh(self):
430                 self._need_redisplay = True
431
432         def _onboard_event(self,timestamp,pirate,event):
433                 try: pa = self._pl[pirate]
434                 except KeyError: pa = None
435                 if pa is not None and pa.v is self._v:
436                         pa.last_time = timestamp
437                         pa.last_event = event
438                 else:
439                         if pa is not None: del pa.v[pirate]
440                         pa = PirateAboard(pirate, self._v, timestamp, event)
441                         self._pl[pirate] = pa
442                         self._v[pirate] = pa
443                 self._v['#lastaboard'] = timestamp
444                 self._refresh()
445                 return pa
446
447         def _trash_vessel(self, v):
448                 for pn in v:
449                         if pn.startswith('#'): continue
450                         del self._pl[pn]
451                 self._refresh()
452
453         def expire_garbage(self, timestamp):
454                 for (vn,v) in list(self._vl.iteritems()):
455                         la = v['#lastaboard']
456                         if timestamp - la > opts.ship_reboard_clearout:
457                                 self._debug_line_disposition(timestamp,'',
458                                         'stale reset '+vn)
459                                 self._trash_vessel(v)
460                                 del self._vl[vn]
461
462         def clear_vessel(self, timestamp):
463                 if self._v is not None:
464                         self._trash_vessel(self._v)
465                 self._v = {'#lastaboard': timestamp}
466                 self._vl[self._vessel] = self._v
467
468         def _debug_line_disposition(self,timestamp,l,m):
469                 debug('CLT %13s %-30s %s' % (timestamp,m,l))
470
471         def chatline(self,l):
472                 rm = lambda re: regexp.match(re,l)
473                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
474                 timestamp = None
475
476                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
477                 if m:
478                         self._date = m.groups()
479                         return d('date '+`self._date`)
480
481                 if self._date is None:
482                         return d('date unset')
483
484                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
485                 if not m:
486                         return d('no timestamp')
487
488                 time_tuple = [int(x) for x in self._date + m.groups()]
489                 time_tuple += (-1,-1,-1)
490                 timestamp = time.mktime(time_tuple)
491                 l = l[l.find(' ')+1:]
492
493                 def ob_x(who,event):
494                         return self._onboard_event(timestamp, who, event)
495                 def ob1(did): ob_x(m.group(1), did); return d(did)
496                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
497
498                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
499                 if m:
500                         pn = self._myself.name
501                         self._vessel = m.group(1)
502                         dm = 'boarding'
503
504                         try:             self._v = self._vl[self._vessel]
505                         except KeyError: self._v = None; dm += ' new'
506                         
507                         if self._v is not None:  la = self._v['#lastaboard']
508                         else:                    la = 0; dm += ' ?la'
509
510                         if timestamp - la > opts.ship_reboard_clearout:
511                                 self.clear_vessel(timestamp)
512                                 dm += ' stale'
513
514                         ob_x(pn, 'we boarded')
515                         self.expire_garbage(timestamp)
516                         return d(dm)
517
518                 if self._v is None:
519                         return d('no vessel')
520
521                 m = rm('(\\w+) has come aboard\\.$')
522                 if m: return ob1('boarded');
523
524                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
525                 if m:
526                         (who,what) = m.groups()
527                         pa = ob_x(who,'ord '+what)
528                         if what == 'Gunning':
529                                 pa.gunner = True
530                         return d('duty order')
531
532                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
533                 if m: oba('stopped'); return d("end")
534
535                 def chat(what):
536                         who = m.group(1)
537                         try: pa = self._pl[who]
538                         except KeyError: return d('chat mystery')
539                         if pa.v is self._v:
540                                 pa.last_chat_time = timestamp
541                                 pa.last_chat_chan = what
542                                 self._refresh()
543                                 return d(what+' chat')
544
545                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
546                 if m: return ob1('general order');
547
548                 m = rm('(\\w+) says, "')
549                 if m: return chat('public')
550
551                 m = rm('(\\w+) tells ye, "')
552                 if m: return chat('private')
553
554                 m = rm('(\\w+) flag officer chats, "')
555                 if m: return chat('flag officer')
556
557                 m = rm('(\\w+) officer chats, "')
558                 if m: return chat('officer')
559
560                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
561                 if m:
562                         pl = m.group(1).split(', ')
563                         if not self._myself.name in pl:
564                                 return d('lost boarding battle')
565                         for pn in pl:
566                                 if ' ' in pn: continue
567                                 ob_x(pn,'won boarding battle')
568                         return d('won boarding battle')
569
570                 m = rm('(\\w+) is eliminated\\!')
571                 if m: return ob1('eliminated in fray');
572
573                 m = rm('(\\w+) has left the vessel\.')
574                 if m:
575                         who = m.group(1)
576                         ob_x(who, 'disembarked')
577                         del self._v[who]
578                         del self._pl[who]
579                         return d('disembarked')
580
581                 return d('not matched')
582
583         def _str_vessel(self, vn, v):
584                 s = ' vessel %s\n' % vn
585                 s += ' '*20 + "%-*s   %13s\n" % (
586                                 max_pirate_namelen, '#lastaboard',
587                                 v['#lastaboard'])
588                 for pn in sorted(v.keys()):
589                         if pn.startswith('#'): continue
590                         pa = v[pn]
591                         assert pa.v == v
592                         assert self._pl[pn] == pa
593                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
594                                 (' ','G')[pa.gunner],
595                                 max_pirate_namelen, pn,
596                                 pa.last_time, pa.last_event,
597                                 pa.last_chat_time, pa.last_chat_chan)
598                 return s
599
600         def __str__(self):
601                 s = '''<ChatLogTracker
602  myself %s
603  vessel %s
604 '''                     % (self._myself.name, self._vessel)
605                 assert ((self._v is None and self._vessel is None) or
606                         (self._v is self._vl[self._vessel]))
607                 if self._vessel is not None:
608                         s += self._str_vessel(self._vessel, self._v)
609                 for vn in sorted(self._vl.keys()):
610                         if vn == self._vessel: continue
611                         s += self._str_vessel(vn, self._vl[vn])
612                 for p in self._pl:
613                         pa = self._pl[p]
614                         assert pa.v[p] is pa
615                         assert pa.v in self._vl.values()
616                 s += '>\n'
617                 return s
618
619         def catchup(self, progress=None):
620                 while True:
621                         more = self._f.readline()
622                         if not more: break
623
624                         self._progress[0] += len(more)
625                         if progress: progress.progress(*self._progress)
626
627                         self._lbuf += more
628                         if self._lbuf.endswith('\n'):
629                                 self.chatline(self._lbuf.rstrip())
630                                 self._lbuf = ''
631                 if progress: progress.caughtup()
632
633         def changed(self):
634                 rv = self._need_redisplay
635                 self._need_redisplay = False
636                 return rv
637         def myname(self):
638                 # returns our pirate name
639                 return self._myself.name
640         def vessel(self):
641                 # returns the vessel we're aboard or None
642                 return self._vessel
643         def aboard(self):
644                 # returns a list of PirateAboard sorted by name
645                 return [ self._v[pn]
646                          for pn in sorted(self._v.keys())
647                          if not pn.startswith('#') ]
648
649 #---------- implementations of actual operation modes ----------
650
651 def do_pirate(pirates, bu):
652         print '{'
653         for pirate in pirates:
654                 info = PirateInfo(pirate)
655                 print '%s: %s,' % (`pirate`, info)
656         print '}'
657
658 def prep_crew_of(args, bu, max_age=300):
659         if len(args) != 1: bu('crew-of takes one pirate name')
660         pi = PirateInfo(args[0], max_age)
661         if pi.crew is None: return None
662         return CrewInfo(pi.crew[0], max_age)
663
664 def do_crew_of(args, bu):
665         ci = prep_crew_of(args, bu)
666         print ci
667
668 def do_standings_crew_of(args, bu):
669         ci = prep_crew_of(args, bu, 60)
670         tab = StandingsTable()
671         tab.headings()
672         for (rank, members) in ci.crew:
673                 if not members: continue
674                 tab.literalline('%s:' % rank)
675                 for p in members:
676                         pi = PirateInfo(p, random.randint(900,1800))
677                         tab.pirate(pi)
678         print tab.results()
679
680 class ProgressPrintPercentage:
681         def __init__(self, f=sys.stdout): self._f = f
682         def progress(self,done,total):
683                 self._f.write("scan chat logs %3d%%\r" % ((done*100) / total))
684                 self._f.flush()
685         def caughtup(self):
686                 self._f.write('                   \r')
687                 self._f.flush()
688
689 #----- modes which use the chat log parser are quite complex -----
690
691 def prep_chat_log(args, bu,
692                 progress=ProgressPrintPercentage(),
693                 max_myself_age=3600):
694         if len(args) != 1: bu('this action takes only chat log filename')
695         logfn = args[0]
696         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_chat-log-\\w+$'
697         match = regexp.match(logfn_re, logfn)
698         if not match: bu('chat log filename is not in default format')
699         (pirate, fetcher.ocean) = match.groups()
700         
701         myself = PirateInfo(pirate,max_myself_age)
702         track = ChatLogTracker(myself, logfn)
703
704         opts.debug -= 1
705         track.catchup(progress)
706         opts.debug += 1
707
708         return (myself, track)
709
710 def do_track_chat_log(args, bu):
711         (myself, track) = prep_chat_log(args, bu)
712         while True:
713                 track.catchup()
714                 if track.changed():
715                         print track
716                 time.sleep(1)
717
718 def format_time_interval(ti):
719         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
720         if ti < 7200: return '%2dm' % (ti / 60)
721         if ti < 86400: return '%dh' % (ti / 3600)
722         return '%dd' % (ti / 86400)
723
724 def do_ship_aid(args, bu):
725         if opts.ship_duty is None: opts.ship_duty = True
726
727         (myself, track) = prep_chat_log(args, bu)
728
729         rotate_nya = '/-\\'
730
731         def timeevent(t,e):
732                 if t is None: return ' ' * 22
733                 return " %-4s %-16s" % (format_time_interval(now - t),e)
734
735         while True:
736                 track.catchup()
737                 now = time.time()
738
739                 s = "%s" % track.myname()
740
741                 vn = track.vessel()
742                 if vn is None: print s + " ...?"; return
743
744                 s += " on board the %s at %s\n" % (
745                         vn, time.strftime("%Y-%m-%d %H:%M:%S"))
746
747                 tbl = StandingsTable()
748                 tbl.headings()
749
750                 for pa in track.aboard():
751                         pi = pa.pirate_info()
752
753                         xs = ''
754                         if pa.gunner: xs += 'G '
755                         else: xs += '  '
756                         xs += timeevent(pa.last_time, pa.last_event)
757                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
758
759                         if pi is None:
760                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
761                         else:
762                                 tbl.pirate(pi, xs)
763
764                 s += tbl.results()
765
766                 print '\n\n', s;
767
768                 time.sleep(1)
769                 rotate_nya = rotate_nya[1:2] + rotate_nya[0]
770
771 #---------- main program ----------
772
773 def main():
774         global opts, fetcher
775
776         pa = OptionParser(
777 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
778 actions:
779  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
780  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
781  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
782  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
783  yoweb-scrape [--ocean OCEAN ...] ship-aid CHAT-LOG
784 ''')
785         ao = pa.add_option
786         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
787                 help='select ocean OCEAN')
788         ao('--cache-dir', dest='cache_dir', metavar='DIR',
789                 default='~/.yoweb-scrape-cache',
790                 help='cache yoweb pages in DIR')
791         ao('-D','--debug', action='count', dest='debug', default=0,
792                 help='enable debugging output')
793         ao('-q','--quiet', action='store_true', dest='quiet',
794                 help='suppress warning output')
795
796         ao('--ship-duty', action='store_true', dest='ship_duty',
797                 help='show ship duty station puzzles')
798         ao('--all-puzzles', action='store_false', dest='ship_duty',
799                 help='show all puzzles, not just ship duty stations')
800
801         (opts,args) = pa.parse_args()
802         random.seed()
803
804         if len(args) < 1:
805                 pa.error('need a mode argument')
806
807         mode = args[0]
808         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
809         try: mode_fn = globals()[mode_fn_name]
810         except KeyError: pa.error('unknown mode "%s"' % mode)
811
812         # fixed parameters
813         opts.min_max_age = 60
814         opts.expire_age = 3600
815         opts.ship_reboard_clearout = 3600
816
817         if opts.cache_dir.startswith('~/'):
818                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
819
820         fetcher = Fetcher(opts.ocean, opts.cache_dir)
821
822         mode_fn(args[1:], pa.error)
823
824 main()