chiark / gitweb /
Move format_time_interval to top
[ypp-sc-tools.db-live.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 from optparse import OptionParser
17
18 from BeautifulSoup import BeautifulSoup
19
20 opts = None
21
22 #---------- YPP parameters and arrays ----------
23
24 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
25         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
26         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
27         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
28
29 standingvals = ('Able/Distinguished/Respected/Master'+
30                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
31
32 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
33
34 max_pirate_namelen = 12
35
36
37 #---------- general utilities ----------
38
39 def debug(m):
40         if opts.debug > 0:
41                 print m
42
43 def format_time_interval(ti):
44         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
45         if ti < 7200: return '%2dm' % (ti / 60)
46         if ti < 86400: return '%dh' % (ti / 3600)
47         return '%dd' % (ti / 86400)
48
49 #---------- caching and rate-limiting data fetcher ----------
50
51 class Fetcher:
52         def __init__(self, ocean, cachedir):
53                 debug('Fetcher init %s' % cachedir)
54                 self.ocean = ocean
55                 self.cachedir = cachedir
56                 try: os.mkdir(cachedir)
57                 except (OSError,IOError), oe:
58                         if oe.errno != errno.EEXIST: raise
59                 self._cache_scan(time.time())
60
61         def _default_ocean(self):
62                 if self.ocean is None:
63                         self.ocean = 'ice'
64
65         def _cache_scan(self, now):
66                 # returns list of ages, unsorted
67                 ages = []
68                 debug('Fetcher   scan_cache')
69                 for leaf in os.listdir(self.cachedir):
70                         if not leaf.startswith('#'): continue
71                         path = self.cachedir + '/' + leaf
72                         try: s = os.stat(path)
73                         except (OSError,IOError), oe:
74                                 if oe.errno != errno.ENOENT: raise
75                                 continue
76                         age = now - s.st_mtime
77                         if age > opts.expire_age:
78                                 debug('Fetcher    expire %d %s' % (age, path))
79                                 try: os.remove(path)
80                                 except (OSError,IOError), oe:
81                                         if oe.errno != errno.ENOENT: raise
82                                 continue
83                         ages.append(age)
84                 return ages
85
86         def need_wait(self, now):
87                 ages = self._cache_scan(now)
88                 ages.sort()
89                 debug('Fetcher   ages ' + `ages`)
90                 min_age = 1
91                 need_wait = 0
92                 for age in ages:
93                         if age < min_age and age < 300:
94                                 debug('Fetcher   morewait min=%d age=%d' %
95                                         (min_age, age))
96                                 need_wait = max(need_wait, min_age - age)
97                         min_age += 3
98                         min_age *= 1.25
99                 return need_wait
100
101         def _rate_limit_cache_clean(self, now):
102                 need_wait = self.need_wait(now)
103                 if need_wait > 0:
104                         debug('Fetcher   wait %d' % need_wait)
105                         time.sleep(need_wait)
106
107         def fetch(self, url, max_age):
108                 debug('Fetcher fetch %s' % url)
109                 cache_corename = urllib.quote_plus(url)
110                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
111                 try: f = file(cache_item, 'r')
112                 except (OSError,IOError), oe:
113                         if oe.errno != errno.ENOENT: raise
114                         f = None
115                 now = time.time()
116                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
117                 if f is not None:
118                         s = os.fstat(f.fileno())
119                         age = now - s.st_mtime
120                         if age > max_age:
121                                 debug('Fetcher  stale %d < %d'% (max_age, age))
122                                 f = None
123                 if f is not None:
124                         data = f.read()
125                         f.close()
126                         debug('Fetcher  cached %d > %d' % (max_age, age))
127                         return data
128
129                 debug('Fetcher  fetch')
130                 self._rate_limit_cache_clean(now)
131
132                 stream = urllib2.urlopen(url)
133                 data = stream.read()
134                 cache_tmp = "%s/#%s~%d#" % (
135                         self.cachedir, cache_corename, os.getpid())
136                 f = file(cache_tmp, 'w')
137                 f.write(data)
138                 f.close()
139                 os.rename(cache_tmp, cache_item)
140                 debug('Fetcher  stored')
141                 return data
142
143         def yoweb(self, kind, tail, max_age):
144                 self._default_ocean()
145                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
146                         self.ocean, kind, tail)
147                 return self.fetch(url, max_age)
148
149 #---------- logging assistance for troubled screenscrapers ----------
150
151 class SoupLog:
152         def __init__(self):
153                 self.msgs = [ ]
154         def msg(self, m):
155                 self.msgs.append(m)
156         def soupm(self, obj, m):
157                 self.msg(m + '; in ' + `obj`)
158         def needs_msgs(self, child_souplog):
159                 self.msgs += child_souplog.msgs
160                 child_souplog.msgs = [ ]
161
162 def soup_text(obj):
163         str = ''.join(obj.findAll(text=True))
164         return str.strip()
165
166 class SomethingSoupInfo(SoupLog):
167         def __init__(self, kind, tail, max_age):
168                 SoupLog.__init__(self)
169                 html = fetcher.yoweb(kind, tail, max_age)
170                 self._soup = BeautifulSoup(html,
171                         convertEntities=BeautifulSoup.HTML_ENTITIES
172                         )
173
174 #---------- scraper for pirate pages ----------
175
176 class PirateInfo(SomethingSoupInfo):
177         # Public data members:
178         #  pi.standings = { 'Treasure Haul': 'Able' ... }
179         #  pi.name = name
180         #  pi.crew = (id, name)
181         #  pi.flag = (id, name)
182         #  pi.msgs = [ 'message describing problem with scrape' ]
183                 
184         def __init__(self, pirate, max_age=300):
185                 SomethingSoupInfo.__init__(self,
186                         'pirate.wm?target=', pirate, max_age)
187                 self.name = pirate
188                 self._find_standings()
189                 self.crew = self._find_crewflag('crew',
190                         '^/yoweb/crew/info\\.wm')
191                 self.flag = self._find_crewflag('flag',
192                         '^/yoweb/flag/info\\.wm')
193
194         def _find_standings(self):
195                 imgs = self._soup.findAll('img',
196                         src=regexp.compile('/yoweb/images/stat.*'))
197                 re = regexp.compile(
198 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
199                         )
200                 standings = { }
201
202                 for skill in puzzles:
203                         standings[skill] = [ ]
204
205                 skl = SoupLog()
206
207                 for img in imgs:
208                         try: puzzle = img['alt']
209                         except KeyError: continue
210
211                         if not puzzle in puzzles:
212                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
213                                 continue
214                         key = img.findParent('td')
215                         if key is None:
216                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
217                                 continue
218                         valelem = key.findNextSibling('td')
219                         if valelem is None:
220                                 skl.soupm(key, 'puzzle missing sibling "%s"'
221                                         % puzzle)
222                                 continue
223                         valstr = soup_text(valelem)
224                         match = re.match(valstr)
225                         if match is None:
226                                 skl.soupm(key, ('puzzle "%s" unparseable'+
227                                         ' standing "%s"') % (puzzle, valstr))
228                                 continue
229                         standing = match.group(match.lastindex)
230                         standings[puzzle].append(standing)
231
232                 self.standings = { }
233
234                 for puzzle in puzzles:
235                         sl = standings[puzzle]
236                         if len(sl) > 1:
237                                 skl.msg('puzzle "%s" multiple standings %s' %
238                                                 (puzzle, `sl`))
239                                 continue
240                         if not sl:
241                                 skl.msg('puzzle "%s" no standing found' % puzzle)
242                                 continue
243                         standing = sl[0]
244                         for i in range(0, len(standingvals)-1):
245                                 if standing == standingvals[i]:
246                                         self.standings[puzzle] = i
247                         if not puzzle in self.standings:
248                                 skl.msg('puzzle "%s" unknown standing "%s"' %
249                                         (puzzle, standing))
250
251                 all_standings_ok = True
252                 for puzzle in puzzles:
253                         if not puzzle in self.standings:
254                                 self.needs_msgs(skl)
255
256         def _find_crewflag(self, cf, yoweb_re):
257                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
258                 if len(things) != 1:
259                         self.msg('zero or several %s id references found' % cf)
260                         return None
261                 thing = things[0]
262                 id_re = '\\b%sid\\=(\\w+)$' % cf
263                 id_haystack = thing['href']
264                 match = regexp.compile(id_re).search(id_haystack)
265                 if match is None:
266                         self.soupm(thing, ('incomprehensible %s id ref'+
267                                 ' (%s in %s)') % (cf, id_re, id_haystack))
268                         return None
269                 name = soup_text(thing)
270                 return (match.group(1), name)
271
272         def __str__(self):
273                 return `(self.crew, self.flag, self.standings, self.msgs)`
274
275 #---------- scraper for crew pages ----------
276
277 class CrewInfo(SomethingSoupInfo):
278         # Public data members:
279         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
280         #              ('Senior Officer', [...]),
281         #               ... ]
282         #  pi.msgs = [ 'message describing problem with scrape' ]
283
284         def __init__(self, crewid, max_age=300):
285                 SomethingSoupInfo.__init__(self,
286                         'crew/info.wm?crewid=', crewid, max_age)
287                 self._find_crew()
288
289         def _find_crew(self):
290                 self.crew = []
291                 capts = self._soup.findAll('img',
292                         src='/yoweb/images/crew-captain.png')
293                 if len(capts) != 1:
294                         self.msg('crew members: no. of captain images != 1')
295                         return
296                 tbl = capts[0]
297                 while not tbl.find('a', href=pirate_ref_re):
298                         tbl = tbl.findParent('table')
299                         if not tbl:
300                                 self.msg('crew members: cannot find table')
301                                 return
302                 current_rank_crew = None
303                 crew_rank_re = regexp.compile('/yoweb/images/crew')
304                 for row in tbl.contents:
305                         # findAll(recurse=False)
306                         if isinstance(row,basestring):
307                                 continue
308
309                         is_rank = row.find('img', attrs={'src': crew_rank_re})
310                         if is_rank:
311                                 rank = soup_text(row)
312                                 current_rank_crew = []
313                                 self.crew.append((rank, current_rank_crew))
314                                 continue
315                         for cell in row.findAll('a', href=pirate_ref_re):
316                                 if current_rank_crew is None:
317                                         self.soupm(cell, 'crew members: crew'
318                                                 ' before rank')
319                                         continue
320                                 current_rank_crew.append(soup_text(cell))
321
322         def __str__(self):
323                 return `(self.crew, self.msgs)`
324
325 #---------- pretty-printer for tables of pirate puzzle standings ----------
326
327 class StandingsTable:
328         def __init__(self, use_puzzles=None, col_width=6):
329                 if use_puzzles is None:
330                         if opts.ship_duty:
331                                 use_puzzles=[
332                                         'Navigating','Battle Navigation',
333                                         'Gunning',
334                                         ['Sailing','Rigging'],
335                                         'Bilging',
336                                         'Carpentry',
337                                         'Treasure Haul'
338                                 ]
339                         else:
340                                 use_puzzles=puzzles
341                 self._puzzles = use_puzzles
342                 self.s = ''
343                 self._cw = col_width-1
344
345         def _pline(self, pirate, puzstrs, extra):
346                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
347                 for v in puzstrs:
348                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
349                 if extra:
350                         self.s += ' ' + extra
351                 self.s += '\n'
352
353         def _puzstr(self, pi, puzzle):
354                 if not isinstance(puzzle,list): puzzle = [puzzle]
355                 try: standing = max([pi.standings[p] for p in puzzle])
356                 except KeyError: return '?'
357                 if not standing: return ''
358                 s = ''
359                 if self._cw > 4:
360                         c1 = standingvals[standing][0]
361                         if standing < 3: c1 = c1.lower() # 3 = Master
362                         s += `standing`
363                 if self._cw > 5:
364                         s += ' '
365                 s += '*' * (standing / 2)
366                 s += '+' * (standing % 2)
367                 return s
368
369         def headings(self):
370                 def puzn_redact(name):
371                         if isinstance(name,list):
372                                 return '/'.join(
373                                         ["%.*s" % (self._cw/2, puzn_redact(n))
374                                          for n in name])
375                         spc = name.find(' ')
376                         if spc < 0: return name
377                         return name[0:min(4,spc)] + name[spc+1:]
378                 self._pline('', map(puzn_redact, self._puzzles), None)
379         def literalline(self, line):
380                 self.s += line + '\n'
381         def pirate_dummy(self, name, standingstring, extra=None):
382                 self._pline(name, standingstring * len(self._puzzles), extra)
383         def pirate(self, pi, extra=None):
384                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
385                 self._pline(pi.name, puzstrs, extra)
386
387         def results(self):
388                 return self.s
389
390 #---------- chat log parser ----------
391
392 class PirateAboard:
393         # This is essentially a transparent, dumb, data class.
394         #  pa.v
395         #  pa.name
396         #  pa.last_time
397         #  pa.last_event
398         #  pa.gunner
399         #  pa.last_chat_time
400         #  pa.last_chat_chan
401         #  pa.pi
402
403         def __init__(pa, pn, v, time, event):
404                 pa.name = pn
405                 pa.v = v
406                 pa.last_time = time
407                 pa.last_event = event
408                 pa.last_chat_time = None
409                 pa.last_chat_chan = None
410                 pa.gunner = False
411                 pa.pi = None
412
413         def pirate_info(pa):
414                 if not pa.pi and not fetcher.need_wait(time.time()):
415                         pa.pi = PirateInfo(pa.name, 3600)
416                 return pa.pi
417
418 class ChatLogTracker:
419         # This is quite complex so we make it opaque.  Use the
420         # official invokers, accessors etc.
421
422         def __init__(self, myself_pi, logfn):
423                 self._pl = {}   # self._pl['Pirate'] =
424                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
425                                 # self._vl['Vessel']['#lastaboard']
426                 self._v = None          # self._v =
427                 self._vessel = None     #       self._vl[self._vessel]
428                 self._date = None
429                 self._myself = myself_pi
430                 self._need_redisplay = False
431                 self._f = file(logfn)
432                 self._lbuf = ''
433                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
434
435         def _refresh(self):
436                 self._need_redisplay = True
437
438         def _onboard_event(self,timestamp,pirate,event):
439                 try: pa = self._pl[pirate]
440                 except KeyError: pa = None
441                 if pa is not None and pa.v is self._v:
442                         pa.last_time = timestamp
443                         pa.last_event = event
444                 else:
445                         if pa is not None: del pa.v[pirate]
446                         pa = PirateAboard(pirate, self._v, timestamp, event)
447                         self._pl[pirate] = pa
448                         self._v[pirate] = pa
449                 self._v['#lastaboard'] = timestamp
450                 self._refresh()
451                 return pa
452
453         def _trash_vessel(self, v):
454                 for pn in v:
455                         if pn.startswith('#'): continue
456                         del self._pl[pn]
457                 self._refresh()
458
459         def expire_garbage(self, timestamp):
460                 for (vn,v) in list(self._vl.iteritems()):
461                         la = v['#lastaboard']
462                         if timestamp - la > opts.ship_reboard_clearout:
463                                 self._debug_line_disposition(timestamp,'',
464                                         'stale reset '+vn)
465                                 self._trash_vessel(v)
466                                 del self._vl[vn]
467
468         def clear_vessel(self, timestamp):
469                 if self._v is not None:
470                         self._trash_vessel(self._v)
471                 self._v = {'#lastaboard': timestamp}
472                 self._vl[self._vessel] = self._v
473
474         def _debug_line_disposition(self,timestamp,l,m):
475                 debug('CLT %13s %-30s %s' % (timestamp,m,l))
476
477         def chatline(self,l):
478                 rm = lambda re: regexp.match(re,l)
479                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
480                 timestamp = None
481
482                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
483                 if m:
484                         self._date = m.groups()
485                         return d('date '+`self._date`)
486
487                 if self._date is None:
488                         return d('date unset')
489
490                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
491                 if not m:
492                         return d('no timestamp')
493
494                 time_tuple = [int(x) for x in self._date + m.groups()]
495                 time_tuple += (-1,-1,-1)
496                 timestamp = time.mktime(time_tuple)
497                 l = l[l.find(' ')+1:]
498
499                 def ob_x(who,event):
500                         return self._onboard_event(timestamp, who, event)
501                 def ob1(did): ob_x(m.group(1), did); return d(did)
502                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
503
504                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
505                 if m:
506                         pn = self._myself.name
507                         self._vessel = m.group(1)
508                         dm = 'boarding'
509
510                         try:             self._v = self._vl[self._vessel]
511                         except KeyError: self._v = None; dm += ' new'
512                         
513                         if self._v is not None:  la = self._v['#lastaboard']
514                         else:                    la = 0; dm += ' ?la'
515
516                         if timestamp - la > opts.ship_reboard_clearout:
517                                 self.clear_vessel(timestamp)
518                                 dm += ' stale'
519
520                         ob_x(pn, 'we boarded')
521                         self.expire_garbage(timestamp)
522                         return d(dm)
523
524                 if self._v is None:
525                         return d('no vessel')
526
527                 m = rm('(\\w+) has come aboard\\.$')
528                 if m: return ob1('boarded');
529
530                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
531                 if m:
532                         (who,what) = m.groups()
533                         pa = ob_x(who,'ord '+what)
534                         if what == 'Gunning':
535                                 pa.gunner = True
536                         return d('duty order')
537
538                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
539                 if m: oba('stopped'); return d("end")
540
541                 def chat(what):
542                         who = m.group(1)
543                         try: pa = self._pl[who]
544                         except KeyError: return d('chat mystery')
545                         if pa.v is self._v:
546                                 pa.last_chat_time = timestamp
547                                 pa.last_chat_chan = what
548                                 self._refresh()
549                                 return d(what+' chat')
550
551                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
552                 if m: return ob1('general order');
553
554                 m = rm('(\\w+) says, "')
555                 if m: return chat('public')
556
557                 m = rm('(\\w+) tells ye, "')
558                 if m: return chat('private')
559
560                 m = rm('(\\w+) flag officer chats, "')
561                 if m: return chat('flag officer')
562
563                 m = rm('(\\w+) officer chats, "')
564                 if m: return chat('officer')
565
566                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
567                 if m:
568                         pl = m.group(1).split(', ')
569                         if not self._myself.name in pl:
570                                 return d('lost boarding battle')
571                         for pn in pl:
572                                 if ' ' in pn: continue
573                                 ob_x(pn,'won boarding battle')
574                         return d('won boarding battle')
575
576                 m = rm('(\\w+) is eliminated\\!')
577                 if m: return ob1('eliminated in fray');
578
579                 m = rm('(\\w+) has left the vessel\.')
580                 if m:
581                         who = m.group(1)
582                         ob_x(who, 'disembarked')
583                         del self._v[who]
584                         del self._pl[who]
585                         return d('disembarked')
586
587                 return d('not matched')
588
589         def _str_vessel(self, vn, v):
590                 s = ' vessel %s\n' % vn
591                 s += ' '*20 + "%-*s   %13s\n" % (
592                                 max_pirate_namelen, '#lastaboard',
593                                 v['#lastaboard'])
594                 for pn in sorted(v.keys()):
595                         if pn.startswith('#'): continue
596                         pa = v[pn]
597                         assert pa.v == v
598                         assert self._pl[pn] == pa
599                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
600                                 (' ','G')[pa.gunner],
601                                 max_pirate_namelen, pn,
602                                 pa.last_time, pa.last_event,
603                                 pa.last_chat_time, pa.last_chat_chan)
604                 return s
605
606         def __str__(self):
607                 s = '''<ChatLogTracker
608  myself %s
609  vessel %s
610 '''                     % (self._myself.name, self._vessel)
611                 assert ((self._v is None and self._vessel is None) or
612                         (self._v is self._vl[self._vessel]))
613                 if self._vessel is not None:
614                         s += self._str_vessel(self._vessel, self._v)
615                 for vn in sorted(self._vl.keys()):
616                         if vn == self._vessel: continue
617                         s += self._str_vessel(vn, self._vl[vn])
618                 for p in self._pl:
619                         pa = self._pl[p]
620                         assert pa.v[p] is pa
621                         assert pa.v in self._vl.values()
622                 s += '>\n'
623                 return s
624
625         def catchup(self, progress=None):
626                 while True:
627                         more = self._f.readline()
628                         if not more: break
629
630                         self._progress[0] += len(more)
631                         if progress: progress.progress(*self._progress)
632
633                         self._lbuf += more
634                         if self._lbuf.endswith('\n'):
635                                 self.chatline(self._lbuf.rstrip())
636                                 self._lbuf = ''
637                 if progress: progress.caughtup()
638
639         def changed(self):
640                 rv = self._need_redisplay
641                 self._need_redisplay = False
642                 return rv
643         def myname(self):
644                 # returns our pirate name
645                 return self._myself.name
646         def vessel(self):
647                 # returns the vessel we're aboard or None
648                 return self._vessel
649         def aboard(self):
650                 # returns a list of PirateAboard sorted by name
651                 return [ self._v[pn]
652                          for pn in sorted(self._v.keys())
653                          if not pn.startswith('#') ]
654
655 #---------- implementations of actual operation modes ----------
656
657 def do_pirate(pirates, bu):
658         print '{'
659         for pirate in pirates:
660                 info = PirateInfo(pirate)
661                 print '%s: %s,' % (`pirate`, info)
662         print '}'
663
664 def prep_crew_of(args, bu, max_age=300):
665         if len(args) != 1: bu('crew-of takes one pirate name')
666         pi = PirateInfo(args[0], max_age)
667         if pi.crew is None: return None
668         return CrewInfo(pi.crew[0], max_age)
669
670 def do_crew_of(args, bu):
671         ci = prep_crew_of(args, bu)
672         print ci
673
674 def do_standings_crew_of(args, bu):
675         ci = prep_crew_of(args, bu, 60)
676         tab = StandingsTable()
677         tab.headings()
678         for (rank, members) in ci.crew:
679                 if not members: continue
680                 tab.literalline('%s:' % rank)
681                 for p in members:
682                         pi = PirateInfo(p, random.randint(900,1800))
683                         tab.pirate(pi)
684         print tab.results()
685
686 class ProgressPrintPercentage:
687         def __init__(self, f=sys.stdout): self._f = f
688         def progress(self,done,total):
689                 self._f.write("scan chat logs %3d%%\r" % ((done*100) / total))
690                 self._f.flush()
691         def caughtup(self):
692                 self._f.write('                   \r')
693                 self._f.flush()
694
695 #----- modes which use the chat log parser are quite complex -----
696
697 def prep_chat_log(args, bu,
698                 progress=ProgressPrintPercentage(),
699                 max_myself_age=3600):
700         if len(args) != 1: bu('this action takes only chat log filename')
701         logfn = args[0]
702         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_chat-log-\\w+$'
703         match = regexp.match(logfn_re, logfn)
704         if not match: bu('chat log filename is not in default format')
705         (pirate, fetcher.ocean) = match.groups()
706         
707         myself = PirateInfo(pirate,max_myself_age)
708         track = ChatLogTracker(myself, logfn)
709
710         opts.debug -= 1
711         track.catchup(progress)
712         opts.debug += 1
713
714         return (myself, track)
715
716 def do_track_chat_log(args, bu):
717         (myself, track) = prep_chat_log(args, bu)
718         while True:
719                 track.catchup()
720                 if track.changed():
721                         print track
722                 time.sleep(1)
723
724 def do_ship_aid(args, bu):
725         if opts.ship_duty is None: opts.ship_duty = True
726
727         (myself, track) = prep_chat_log(args, bu)
728
729         rotate_nya = '/-\\'
730
731         def timeevent(t,e):
732                 if t is None: return ' ' * 22
733                 return " %-4s %-16s" % (format_time_interval(now - t),e)
734
735         while True:
736                 track.catchup()
737                 now = time.time()
738
739                 s = "%s" % track.myname()
740
741                 vn = track.vessel()
742                 if vn is None: print s + " ...?"; return
743
744                 s += " on board the %s at %s\n" % (
745                         vn, time.strftime("%Y-%m-%d %H:%M:%S"))
746
747                 tbl = StandingsTable()
748                 tbl.headings()
749
750                 for pa in track.aboard():
751                         pi = pa.pirate_info()
752
753                         xs = ''
754                         if pa.gunner: xs += 'G '
755                         else: xs += '  '
756                         xs += timeevent(pa.last_time, pa.last_event)
757                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
758
759                         if pi is None:
760                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
761                         else:
762                                 tbl.pirate(pi, xs)
763
764                 s += tbl.results()
765
766                 print '\n\n', s;
767
768                 time.sleep(1)
769                 rotate_nya = rotate_nya[1:2] + rotate_nya[0]
770
771 #---------- main program ----------
772
773 def main():
774         global opts, fetcher
775
776         pa = OptionParser(
777 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
778 actions:
779  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
780  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
781  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
782  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
783  yoweb-scrape [--ocean OCEAN ...] ship-aid CHAT-LOG
784 ''')
785         ao = pa.add_option
786         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
787                 help='select ocean OCEAN')
788         ao('--cache-dir', dest='cache_dir', metavar='DIR',
789                 default='~/.yoweb-scrape-cache',
790                 help='cache yoweb pages in DIR')
791         ao('-D','--debug', action='count', dest='debug', default=0,
792                 help='enable debugging output')
793         ao('-q','--quiet', action='store_true', dest='quiet',
794                 help='suppress warning output')
795
796         ao('--ship-duty', action='store_true', dest='ship_duty',
797                 help='show ship duty station puzzles')
798         ao('--all-puzzles', action='store_false', dest='ship_duty',
799                 help='show all puzzles, not just ship duty stations')
800
801         (opts,args) = pa.parse_args()
802         random.seed()
803
804         if len(args) < 1:
805                 pa.error('need a mode argument')
806
807         mode = args[0]
808         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
809         try: mode_fn = globals()[mode_fn_name]
810         except KeyError: pa.error('unknown mode "%s"' % mode)
811
812         # fixed parameters
813         opts.min_max_age = 60
814         opts.expire_age = 3600
815         opts.ship_reboard_clearout = 3600
816
817         if opts.cache_dir.startswith('~/'):
818                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
819
820         fetcher = Fetcher(opts.ocean, opts.cache_dir)
821
822         mode_fn(args[1:], pa.error)
823
824 main()