chiark / gitweb /
Revamp machinery for lookup up ships in chat log tracker
[ypp-sc-tools.db-live.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 from optparse import OptionParser
18
19 from BeautifulSoup import BeautifulSoup
20
21 opts = None
22
23 #---------- YPP parameters and arrays ----------
24
25 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
26         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
27         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
28         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
29
30 standingvals = ('Able/Distinguished/Respected/Master'+
31                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
32
33 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
34
35 max_pirate_namelen = 12
36
37
38 #---------- general utilities ----------
39
40 def debug(m):
41         if opts.debug > 0:
42                 print >>opts.debug_file, m
43
44 def format_time_interval(ti):
45         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
46         if ti < 7200: return '%2dm' % (ti / 60)
47         if ti < 86400: return '%dh' % (ti / 3600)
48         return '%dd' % (ti / 86400)
49
50 #---------- caching and rate-limiting data fetcher ----------
51
52 class Fetcher:
53         def __init__(self, ocean, cachedir):
54                 debug('Fetcher init %s' % cachedir)
55                 self.ocean = ocean
56                 self.cachedir = cachedir
57                 try: os.mkdir(cachedir)
58                 except (OSError,IOError), oe:
59                         if oe.errno != errno.EEXIST: raise
60                 self._cache_scan(time.time())
61
62         def default_ocean(self, ocean='ice'):
63                 if self.ocean is None:
64                         self.ocean = ocean
65
66         def _cache_scan(self, now):
67                 # returns list of ages, unsorted
68                 ages = []
69                 debug('Fetcher   scan_cache')
70                 for leaf in os.listdir(self.cachedir):
71                         if not leaf.startswith('#'): continue
72                         path = self.cachedir + '/' + leaf
73                         try: s = os.stat(path)
74                         except (OSError,IOError), oe:
75                                 if oe.errno != errno.ENOENT: raise
76                                 continue
77                         age = now - s.st_mtime
78                         if age > opts.expire_age:
79                                 debug('Fetcher    expire %d %s' % (age, path))
80                                 try: os.remove(path)
81                                 except (OSError,IOError), oe:
82                                         if oe.errno != errno.ENOENT: raise
83                                 continue
84                         ages.append(age)
85                 return ages
86
87         def need_wait(self, now):
88                 ages = self._cache_scan(now)
89                 ages.sort()
90                 debug('Fetcher   ages ' + `ages`)
91                 min_age = 1
92                 need_wait = 0
93                 for age in ages:
94                         if age < min_age and age < 300:
95                                 debug('Fetcher   morewait min=%d age=%d' %
96                                         (min_age, age))
97                                 need_wait = max(need_wait, min_age - age)
98                         min_age += 3
99                         min_age *= 1.25
100                 return need_wait
101
102         def _rate_limit_cache_clean(self, now):
103                 need_wait = self.need_wait(now)
104                 if need_wait > 0:
105                         debug('Fetcher   wait %d' % need_wait)
106                         time.sleep(need_wait)
107
108         def fetch(self, url, max_age):
109                 debug('Fetcher fetch %s' % url)
110                 cache_corename = urllib.quote_plus(url)
111                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
112                 try: f = file(cache_item, 'r')
113                 except (OSError,IOError), oe:
114                         if oe.errno != errno.ENOENT: raise
115                         f = None
116                 now = time.time()
117                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
118                 if f is not None:
119                         s = os.fstat(f.fileno())
120                         age = now - s.st_mtime
121                         if age > max_age:
122                                 debug('Fetcher  stale %d < %d'% (max_age, age))
123                                 f = None
124                 if f is not None:
125                         data = f.read()
126                         f.close()
127                         debug('Fetcher  cached %d > %d' % (max_age, age))
128                         return data
129
130                 debug('Fetcher  fetch')
131                 self._rate_limit_cache_clean(now)
132
133                 stream = urllib2.urlopen(url)
134                 data = stream.read()
135                 cache_tmp = "%s/#%s~%d#" % (
136                         self.cachedir, cache_corename, os.getpid())
137                 f = file(cache_tmp, 'w')
138                 f.write(data)
139                 f.close()
140                 os.rename(cache_tmp, cache_item)
141                 debug('Fetcher  stored')
142                 return data
143
144         def yoweb(self, kind, tail, max_age):
145                 self.default_ocean()
146                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
147                         self.ocean, kind, tail)
148                 return self.fetch(url, max_age)
149
150 #---------- logging assistance for troubled screenscrapers ----------
151
152 class SoupLog:
153         def __init__(self):
154                 self.msgs = [ ]
155         def msg(self, m):
156                 self.msgs.append(m)
157         def soupm(self, obj, m):
158                 self.msg(m + '; in ' + `obj`)
159         def needs_msgs(self, child_souplog):
160                 self.msgs += child_souplog.msgs
161                 child_souplog.msgs = [ ]
162
163 def soup_text(obj):
164         str = ''.join(obj.findAll(text=True))
165         return str.strip()
166
167 class SomethingSoupInfo(SoupLog):
168         def __init__(self, kind, tail, max_age):
169                 SoupLog.__init__(self)
170                 html = fetcher.yoweb(kind, tail, max_age)
171                 self._soup = BeautifulSoup(html,
172                         convertEntities=BeautifulSoup.HTML_ENTITIES
173                         )
174
175 #---------- scraper for pirate pages ----------
176
177 class PirateInfo(SomethingSoupInfo):
178         # Public data members:
179         #  pi.standings = { 'Treasure Haul': 'Able' ... }
180         #  pi.name = name
181         #  pi.crew = (id, name)
182         #  pi.flag = (id, name)
183         #  pi.msgs = [ 'message describing problem with scrape' ]
184                 
185         def __init__(self, pirate, max_age=300):
186                 SomethingSoupInfo.__init__(self,
187                         'pirate.wm?target=', pirate, max_age)
188                 self.name = pirate
189                 self._find_standings()
190                 self.crew = self._find_crewflag('crew',
191                         '^/yoweb/crew/info\\.wm')
192                 self.flag = self._find_crewflag('flag',
193                         '^/yoweb/flag/info\\.wm')
194
195         def _find_standings(self):
196                 imgs = self._soup.findAll('img',
197                         src=regexp.compile('/yoweb/images/stat.*'))
198                 re = regexp.compile(
199 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
200                         )
201                 standings = { }
202
203                 for skill in puzzles:
204                         standings[skill] = [ ]
205
206                 skl = SoupLog()
207
208                 for img in imgs:
209                         try: puzzle = img['alt']
210                         except KeyError: continue
211
212                         if not puzzle in puzzles:
213                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
214                                 continue
215                         key = img.findParent('td')
216                         if key is None:
217                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
218                                 continue
219                         valelem = key.findNextSibling('td')
220                         if valelem is None:
221                                 skl.soupm(key, 'puzzle missing sibling "%s"'
222                                         % puzzle)
223                                 continue
224                         valstr = soup_text(valelem)
225                         match = re.match(valstr)
226                         if match is None:
227                                 skl.soupm(key, ('puzzle "%s" unparseable'+
228                                         ' standing "%s"') % (puzzle, valstr))
229                                 continue
230                         standing = match.group(match.lastindex)
231                         standings[puzzle].append(standing)
232
233                 self.standings = { }
234
235                 for puzzle in puzzles:
236                         sl = standings[puzzle]
237                         if len(sl) > 1:
238                                 skl.msg('puzzle "%s" multiple standings %s' %
239                                                 (puzzle, `sl`))
240                                 continue
241                         if not sl:
242                                 skl.msg('puzzle "%s" no standing found' % puzzle)
243                                 continue
244                         standing = sl[0]
245                         for i in range(0, len(standingvals)-1):
246                                 if standing == standingvals[i]:
247                                         self.standings[puzzle] = i
248                         if not puzzle in self.standings:
249                                 skl.msg('puzzle "%s" unknown standing "%s"' %
250                                         (puzzle, standing))
251
252                 all_standings_ok = True
253                 for puzzle in puzzles:
254                         if not puzzle in self.standings:
255                                 self.needs_msgs(skl)
256
257         def _find_crewflag(self, cf, yoweb_re):
258                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
259                 if len(things) != 1:
260                         self.msg('zero or several %s id references found' % cf)
261                         return None
262                 thing = things[0]
263                 id_re = '\\b%sid\\=(\\w+)$' % cf
264                 id_haystack = thing['href']
265                 match = regexp.compile(id_re).search(id_haystack)
266                 if match is None:
267                         self.soupm(thing, ('incomprehensible %s id ref'+
268                                 ' (%s in %s)') % (cf, id_re, id_haystack))
269                         return None
270                 name = soup_text(thing)
271                 return (match.group(1), name)
272
273         def __str__(self):
274                 return `(self.crew, self.flag, self.standings, self.msgs)`
275
276 #---------- scraper for crew pages ----------
277
278 class CrewInfo(SomethingSoupInfo):
279         # Public data members:
280         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
281         #              ('Senior Officer', [...]),
282         #               ... ]
283         #  pi.msgs = [ 'message describing problem with scrape' ]
284
285         def __init__(self, crewid, max_age=300):
286                 SomethingSoupInfo.__init__(self,
287                         'crew/info.wm?crewid=', crewid, max_age)
288                 self._find_crew()
289
290         def _find_crew(self):
291                 self.crew = []
292                 capts = self._soup.findAll('img',
293                         src='/yoweb/images/crew-captain.png')
294                 if len(capts) != 1:
295                         self.msg('crew members: no. of captain images != 1')
296                         return
297                 tbl = capts[0]
298                 while not tbl.find('a', href=pirate_ref_re):
299                         tbl = tbl.findParent('table')
300                         if not tbl:
301                                 self.msg('crew members: cannot find table')
302                                 return
303                 current_rank_crew = None
304                 crew_rank_re = regexp.compile('/yoweb/images/crew')
305                 for row in tbl.contents:
306                         # findAll(recurse=False)
307                         if isinstance(row,basestring):
308                                 continue
309
310                         is_rank = row.find('img', attrs={'src': crew_rank_re})
311                         if is_rank:
312                                 rank = soup_text(row)
313                                 current_rank_crew = []
314                                 self.crew.append((rank, current_rank_crew))
315                                 continue
316                         for cell in row.findAll('a', href=pirate_ref_re):
317                                 if current_rank_crew is None:
318                                         self.soupm(cell, 'crew members: crew'
319                                                 ' before rank')
320                                         continue
321                                 current_rank_crew.append(soup_text(cell))
322
323         def __str__(self):
324                 return `(self.crew, self.msgs)`
325
326 #---------- pretty-printer for tables of pirate puzzle standings ----------
327
328 class StandingsTable:
329         def __init__(self, use_puzzles=None, col_width=6):
330                 if use_puzzles is None:
331                         if opts.ship_duty:
332                                 use_puzzles=[
333                                         'Navigating','Battle Navigation',
334                                         'Gunning',
335                                         ['Sailing','Rigging'],
336                                         'Bilging',
337                                         'Carpentry',
338                                         'Treasure Haul'
339                                 ]
340                         else:
341                                 use_puzzles=puzzles
342                 self._puzzles = use_puzzles
343                 self.s = ''
344                 self._cw = col_width-1
345
346         def _pline(self, pirate, puzstrs, extra):
347                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
348                 for v in puzstrs:
349                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
350                 if extra:
351                         self.s += ' ' + extra
352                 self.s += '\n'
353
354         def _puzstr(self, pi, puzzle):
355                 if not isinstance(puzzle,list): puzzle = [puzzle]
356                 try: standing = max([pi.standings[p] for p in puzzle])
357                 except KeyError: return '?'
358                 if not standing: return ''
359                 s = ''
360                 if self._cw > 4:
361                         c1 = standingvals[standing][0]
362                         if standing < 3: c1 = c1.lower() # 3 = Master
363                         s += `standing`
364                 if self._cw > 5:
365                         s += ' '
366                 s += '*' * (standing / 2)
367                 s += '+' * (standing % 2)
368                 return s
369
370         def headings(self):
371                 def puzn_redact(name):
372                         if isinstance(name,list):
373                                 return '/'.join(
374                                         ["%.*s" % (self._cw/2, puzn_redact(n))
375                                          for n in name])
376                         spc = name.find(' ')
377                         if spc < 0: return name
378                         return name[0:min(4,spc)] + name[spc+1:]
379                 self._pline('', map(puzn_redact, self._puzzles), None)
380         def literalline(self, line):
381                 self.s += line + '\n'
382         def pirate_dummy(self, name, standingstring, extra=None):
383                 self._pline(name, standingstring * len(self._puzzles), extra)
384         def pirate(self, pi, extra=None):
385                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
386                 self._pline(pi.name, puzstrs, extra)
387
388         def results(self):
389                 return self.s
390
391 #---------- chat log parser ----------
392
393 class PirateAboard:
394         # This is essentially a transparent, dumb, data class.
395         #  pa.v
396         #  pa.name
397         #  pa.last_time
398         #  pa.last_event
399         #  pa.gunner
400         #  pa.last_chat_time
401         #  pa.last_chat_chan
402         #  pa.pi
403
404         def __init__(pa, pn, v, time, event):
405                 pa.name = pn
406                 pa.v = v
407                 pa.last_time = time
408                 pa.last_event = event
409                 pa.last_chat_time = None
410                 pa.last_chat_chan = None
411                 pa.gunner = False
412                 pa.pi = None
413
414         def pirate_info(pa):
415                 if not pa.pi and not fetcher.need_wait(time.time()):
416                         pa.pi = PirateInfo(pa.name, 3600)
417                 return pa.pi
418
419 class ChatLogTracker:
420         # This is quite complex so we make it opaque.  Use the
421         # official invokers, accessors etc.
422
423         def __init__(self, myself_pi, logfn):
424                 self._pl = {}   # self._pl['Pirate'] =
425                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
426                                 # self._vl['Vessel']['#lastinfo']
427                 self._v = None          # self._v =
428                 self._vessel = None     #       self._vl[self._vessel]
429                 self._date = None
430                 self._myself = myself_pi
431                 self._need_redisplay = False
432                 self._f = file(logfn)
433                 self._lbuf = ''
434                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
435
436         def force_redisplay(self):
437                 self._need_redisplay = True
438
439         def _onboard_event(self,timestamp,pirate,event):
440                 try: pa = self._pl[pirate]
441                 except KeyError: pa = None
442                 if pa is not None and pa.v is self._v:
443                         pa.last_time = timestamp
444                         pa.last_event = event
445                 else:
446                         if pa is not None: del pa.v[pirate]
447                         pa = PirateAboard(pirate, self._v, timestamp, event)
448                         self._pl[pirate] = pa
449                         self._v[pirate] = pa
450                 self._v['#lastinfo'] = timestamp
451                 self.force_redisplay()
452                 return pa
453
454         def _trash_vessel(self, v):
455                 for pn in v:
456                         if pn.startswith('#'): continue
457                         del self._pl[pn]
458                 self.force_redisplay()
459
460         def expire_garbage(self, timestamp):
461                 for (vn,v) in list(self._vl.iteritems()):
462                         la = v['#lastinfo']
463                         if timestamp - la > opts.ship_reboard_clearout:
464                                 self._debug_line_disposition(timestamp,'',
465                                         'stale reset '+vn)
466                                 self._trash_vessel(v)
467                                 del self._vl[vn]
468
469         def _create_vessel(self, vn, timestamp):
470                 self._vl[vn] = v = { '#lastinfo': timestamp }
471                 return v
472
473         def _update_vessel_lookup(self, vn, timestamp, dml):
474                 v = self._vl.get(vn, None)
475                 if v is None:
476                         dml.append('new')
477                         v = self._create_vessel(vn, timestamp)
478                 elif timestamp - v['#lastinfo'] > opts.ship_reboard_clearout:
479                         dml.append('stale')
480                         self._trash_vessel(v)
481                         v = self._create_vessel(vn, timestamp)
482                 else:
483                         dml.append('current')
484                 return v
485
486         def _debug_line_disposition(self,timestamp,l,m):
487                 debug('CLT %13s %-30s %s' % (timestamp,m,l))
488
489         def chatline(self,l):
490                 rm = lambda re: regexp.match(re,l)
491                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
492                 timestamp = None
493
494                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
495                 if m:
496                         self._date = [int(x) for x in m.groups()]
497                         self._previous_timestamp = None
498                         return d('date '+`self._date`)
499
500                 if self._date is None:
501                         return d('date unset')
502
503                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
504                 if not m:
505                         return d('no timestamp')
506
507                 while True:
508                         time_tuple = (self._date +
509                                       [int(x) for x in m.groups()] +
510                                       [-1,-1,-1])
511                         timestamp = time.mktime(time_tuple)
512                         if timestamp >= self._previous_timestamp: break
513                         self._date[2] += 1
514                         self._debug_line_disposition(timestamp,'',
515                                 'new date '+`self._date`)
516
517                 self._previous_timestamp = timestamp
518
519                 l = l[l.find(' ')+1:]
520
521                 def ob_x(who,event):
522                         return self._onboard_event(timestamp, who, event)
523                 def ob1(did): ob_x(m.group(1), did); return d(did)
524                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
525
526                 def disembark(who, how):
527                         ob_x(who, 'leaving '+how)
528                         del self._v[who]
529                         del self._pl[who]
530
531                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
532                 if m:
533                         dm = ['boarding']
534                         pn = self._myself.name
535                         self._vessel = vn = m.group(1)
536                         self._v = self._update_vessel_lookup(vn, timestamp, dm)
537
538                         ob_x(pn, 'we boarded')
539                         self.expire_garbage(timestamp)
540                         return d(' '.join(dm))
541
542                 if self._v is None:
543                         return d('no vessel')
544
545                 m = rm('(\\w+) has come aboard\\.$')
546                 if m: return ob1('boarded');
547
548                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
549                 if m:
550                         (who,what) = m.groups()
551                         pa = ob_x(who,'ord '+what)
552                         if what == 'Gunning':
553                                 pa.gunner = True
554                         return d('duty order')
555
556                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
557                 if m: oba('stopped'); return d("end")
558
559                 def chat(what):
560                         who = m.group(1)
561                         try: pa = self._pl[who]
562                         except KeyError: return d('chat mystery')
563                         if pa.v is self._v:
564                                 pa.last_chat_time = timestamp
565                                 pa.last_chat_chan = what
566                                 self.force_redisplay()
567                                 return d('chat '+what)
568
569                 def chat_metacmd(what):
570                         (cmdr, metacmd) = m.groups()
571                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
572                         m2 = regexp.match(
573                                 '/([ad]) (?:([A-Za-z ]+)\\s*:)?([A-Za-z ]+)$',
574                                 metacmd)
575                         if not m2: return chat(what)
576
577                         (cmd, vn, targets) = m2.groups()
578
579                         if cmdr == self._myself.name: how = 'manual: /%s' % cmd
580                         else: how = '/%s %s' % (cmd,cmdr)
581                         if cmd == 'a': each = ob_x
582                         else: each = disembark
583
584                         if vn is not None:
585                                 vn = vn.title()
586                                 if not regexp.match(
587                                                 '(?:.* )?' + vn + '$',
588                                                 self._vessel):
589                                         return chat('/%s %s:' % (cmd,vn))
590
591                         for target in targets.split(' '):
592                                 if not target: continue
593                                 each(target.title(), how)
594                         return d('/%s' % cmd)
595
596                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
597                 if m: return ob1('general order');
598
599                 m = rm('(\\w+) says, "')
600                 if m: return chat('public')
601
602                 m = rm('(\\w+) tells ye, "')
603                 if m: return chat('private')
604
605                 m = rm('Ye told (\\w+), "(.*)"$')
606                 if m: return chat_metacmd('private')
607
608                 m = rm('(\\w+) flag officer chats, "')
609                 if m: return chat('flag officer')
610
611                 m = rm('(\\w+) officer chats, "(.*)"$')
612                 if m: return chat_metacmd('officer')
613
614                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
615                 if m:
616                         pl = m.group(1).split(', ')
617                         if not self._myself.name in pl:
618                                 return d('lost boarding battle')
619                         for pn in pl:
620                                 if ' ' in pn: continue
621                                 ob_x(pn,'won boarding battle')
622                         return d('won boarding battle')
623
624                 m = rm('(\\w+) is eliminated\\!')
625                 if m: return ob1('eliminated in fray');
626
627                 m = rm('(\\w+) has left the vessel\.')
628                 if m:
629                         disembark(m.group(1), 'disembarked')
630                         return d('disembarked')
631
632                 return d('not matched')
633
634         def _str_vessel(self, vn, v):
635                 s = ' vessel %s\n' % vn
636                 s += ' '*20 + "%-*s   %13s\n" % (
637                                 max_pirate_namelen, '#lastinfo',
638                                 v['#lastinfo'])
639                 for pn in sorted(v.keys()):
640                         if pn.startswith('#'): continue
641                         pa = v[pn]
642                         assert pa.v == v
643                         assert self._pl[pn] == pa
644                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
645                                 (' ','G')[pa.gunner],
646                                 max_pirate_namelen, pn,
647                                 pa.last_time, pa.last_event,
648                                 pa.last_chat_time, pa.last_chat_chan)
649                 return s
650
651         def __str__(self):
652                 s = '''<ChatLogTracker
653  myself %s
654  vessel %s
655 '''                     % (self._myself.name, self._vessel)
656                 assert ((self._v is None and self._vessel is None) or
657                         (self._v is self._vl[self._vessel]))
658                 if self._vessel is not None:
659                         s += self._str_vessel(self._vessel, self._v)
660                 for vn in sorted(self._vl.keys()):
661                         if vn == self._vessel: continue
662                         s += self._str_vessel(vn, self._vl[vn])
663                 for p in self._pl:
664                         pa = self._pl[p]
665                         assert pa.v[p] is pa
666                         assert pa.v in self._vl.values()
667                 s += '>\n'
668                 return s
669
670         def catchup(self, progress=None):
671                 while True:
672                         more = self._f.readline()
673                         if not more: break
674
675                         self._progress[0] += len(more)
676                         if progress: progress.progress(*self._progress)
677
678                         self._lbuf += more
679                         if self._lbuf.endswith('\n'):
680                                 self.chatline(self._lbuf.rstrip())
681                                 self._lbuf = ''
682                                 if opts.debug >= 2:
683                                         debug(self.__str__())
684                 if progress: progress.caughtup()
685
686         def changed(self):
687                 rv = self._need_redisplay
688                 self._need_redisplay = False
689                 return rv
690         def myname(self):
691                 # returns our pirate name
692                 return self._myself.name
693         def vessel(self):
694                 # returns the vessel we're aboard or None
695                 return self._vessel
696         def aboard(self):
697                 # returns a list of PirateAboard sorted by name
698                 if self._v is None: return []
699                 return [ self._v[pn]
700                          for pn in sorted(self._v.keys())
701                          if not pn.startswith('#') ]
702
703 #---------- implementations of actual operation modes ----------
704
705 def do_pirate(pirates, bu):
706         print '{'
707         for pirate in pirates:
708                 info = PirateInfo(pirate)
709                 print '%s: %s,' % (`pirate`, info)
710         print '}'
711
712 def prep_crew_of(args, bu, max_age=300):
713         if len(args) != 1: bu('crew-of takes one pirate name')
714         pi = PirateInfo(args[0], max_age)
715         if pi.crew is None: return None
716         return CrewInfo(pi.crew[0], max_age)
717
718 def do_crew_of(args, bu):
719         ci = prep_crew_of(args, bu)
720         print ci
721
722 def do_standings_crew_of(args, bu):
723         ci = prep_crew_of(args, bu, 60)
724         tab = StandingsTable()
725         tab.headings()
726         for (rank, members) in ci.crew:
727                 if not members: continue
728                 tab.literalline('%s:' % rank)
729                 for p in members:
730                         pi = PirateInfo(p, random.randint(900,1800))
731                         tab.pirate(pi)
732         print tab.results()
733
734 class ProgressPrintPercentage:
735         def __init__(self, f=sys.stdout):
736                 self._f = f
737         def progress_string(self,done,total):
738                 return "scan chat logs %3d%%\r" % ((done*100) / total)
739         def progress(self,*a):
740                 self._f.write(self.progress_string(*a))
741                 self._f.flush()
742         def show_init(self, pirate, ocean):
743                 print >>self._f, 'Starting up, %s on the %s ocean' % (
744                         pirate, ocean)
745         def caughtup(self):
746                 self._f.write('                   \r')
747                 self._f.flush()
748
749 #----- modes which use the chat log parser are quite complex -----
750
751 def prep_chat_log(args, bu,
752                 progress=ProgressPrintPercentage(),
753                 max_myself_age=3600):
754         if len(args) != 1: bu('this action takes only chat log filename')
755         logfn = args[0]
756         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
757         match = regexp.match(logfn_re, logfn)
758         if not match: bu('chat log filename is not in expected format')
759         (pirate, ocean) = match.groups()
760         fetcher.default_ocean(ocean)
761         
762         myself = PirateInfo(pirate,max_myself_age)
763         progress.show_init(pirate, fetcher.ocean)
764         track = ChatLogTracker(myself, logfn)
765
766         opts.debug -= 2
767         track.catchup(progress)
768         opts.debug += 2
769
770         track.force_redisplay()
771
772         return (myself, track)
773
774 def do_track_chat_log(args, bu):
775         (myself, track) = prep_chat_log(args, bu)
776         while True:
777                 track.catchup()
778                 if track.changed():
779                         print track
780                 time.sleep(1)
781
782 #----- ship management aid -----
783
784 class Display_dumb(ProgressPrintPercentage):
785         def __init__(self):
786                 ProgressPrintPercentage.__init__(self)
787         def show(self, s):
788                 print '\n\n', s;
789         def realstart(self):
790                 pass
791
792 class Display_overwrite(ProgressPrintPercentage):
793         def __init__(self):
794                 ProgressPrintPercentage.__init__(self)
795
796                 null = file('/dev/null','w')
797                 curses.setupterm(fd=null.fileno())
798
799                 self._clear = curses.tigetstr('clear')
800                 if not self._clear:
801                         self._debug('missing clear!')
802                         self.show = Display_dumb.show
803                         return
804
805                 self._t = {'el':'', 'ed':''}
806                 if not self._init_sophisticated():
807                         for k in self._t.keys(): self._t[k] = ''
808                         self._t['ho'] = self._clear
809
810         def _debug(self,m): debug('display overwrite: '+m)
811
812         def _init_sophisticated(self):
813                 for k in self._t.keys():
814                         s = curses.tigetstr(k)
815                         self._t[k] = s
816                 self._t['ho'] = curses.tigetstr('ho')
817                 if not self._t['ho']:
818                         cup = curses.tigetstr('cup')
819                         self._t['ho'] = curses.tparm(cup,0,0)
820                 missing = [k for k in self._t.keys() if not self._t[k]]
821                 if missing:
822                         self.debug('missing '+(' '.join(missing)))
823                         return 0
824                 return 1
825
826         def show(self, s):
827                 w = sys.stdout.write
828                 def wti(k): w(self._t[k])
829
830                 wti('ho')
831                 nl = ''
832                 for l in s.rstrip().split('\n'):
833                         w(nl)
834                         w(l)
835                         wti('el')
836                         nl = '\r\n'
837                 wti('ed')
838                 w(' ')
839                 sys.stdout.flush()
840
841         def realstart(self):
842                 sys.stdout.write(self._clear)
843                 sys.stdout.flush()
844                         
845
846 def do_ship_aid(args, bu):
847         if opts.ship_duty is None: opts.ship_duty = True
848
849         displayer = globals()['Display_'+opts.display]()
850         rotate_nya = '/-\\'
851
852         (myself, track) = prep_chat_log(args, bu, progress=displayer)
853
854         def timeevent(t,e):
855                 if t is None: return ' ' * 22
856                 return " %-4s %-16s" % (format_time_interval(now - t),e)
857
858         displayer.realstart()
859
860         while True:
861                 track.catchup()
862                 now = time.time()
863
864                 s = "%s" % track.myname()
865
866                 vn = track.vessel()
867                 if vn is None: s += " not on a vessel?!"
868                 else: s += " on board the %s" % vn
869                 s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")
870
871                 tbl = StandingsTable()
872                 tbl.headings()
873
874                 for pa in track.aboard():
875                         pi = pa.pirate_info()
876
877                         xs = ''
878                         if pa.gunner: xs += 'G '
879                         else: xs += '  '
880                         xs += timeevent(pa.last_time, pa.last_event)
881                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
882
883                         if pi is None:
884                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
885                         else:
886                                 tbl.pirate(pi, xs)
887
888                 s += tbl.results()
889
890                 displayer.show(s)
891                 time.sleep(1)
892                 rotate_nya = rotate_nya[1:3] + rotate_nya[0]
893
894 #---------- main program ----------
895
896 def main():
897         global opts, fetcher
898
899         pa = OptionParser(
900 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
901 actions:
902  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
903  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
904  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
905  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
906  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
907
908 display modes (for --display) apply to ship-aid:
909  --display=dumb       just print new information, scrolling the screen
910  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
911 ''')
912         ao = pa.add_option
913         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
914                 help='select ocean OCEAN')
915         ao('--cache-dir', dest='cache_dir', metavar='DIR',
916                 default='~/.yoweb-scrape-cache',
917                 help='cache yoweb pages in DIR')
918         ao('-D','--debug', action='count', dest='debug', default=0,
919                 help='enable debugging output')
920         ao('--debug-fd', type='int', dest='debug_fd',
921                 help='write any debugging output to specified fd')
922         ao('-q','--quiet', action='store_true', dest='quiet',
923                 help='suppress warning output')
924         ao('--display', action='store', dest='display',
925                 type='choice', choices=['dumb','overwrite'],
926                 help='how to display ship aid')
927
928         ao('--ship-duty', action='store_true', dest='ship_duty',
929                 help='show ship duty station puzzles')
930         ao('--all-puzzles', action='store_false', dest='ship_duty',
931                 help='show all puzzles, not just ship duty stations')
932
933         (opts,args) = pa.parse_args()
934         random.seed()
935
936         if len(args) < 1:
937                 pa.error('need a mode argument')
938
939         if opts.debug_fd is not None:
940                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
941         else:
942                 opts.debug_file = sys.stdout
943
944         mode = args[0]
945         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
946         try: mode_fn = globals()[mode_fn_name]
947         except KeyError: pa.error('unknown mode "%s"' % mode)
948
949         # fixed parameters
950         opts.min_max_age = 60
951         opts.expire_age = 3600
952         opts.ship_reboard_clearout = 3600
953
954         if opts.cache_dir.startswith('~/'):
955                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
956
957         if opts.display is None:
958                 if ((opts.debug > 0 and opts.debug_fd is None)
959                     or not os.isatty(sys.stdout.fileno())):
960                         opts.display = 'dumb'
961                 else:
962                         opts.display = 'overwrite'
963
964         fetcher = Fetcher(opts.ocean, opts.cache_dir)
965
966         mode_fn(args[1:], pa.error)
967
968 main()