chiark / gitweb /
Improve debug and chat notation for cmd messages
[ypp-sc-tools.web-live.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 from optparse import OptionParser
18
19 from BeautifulSoup import BeautifulSoup
20
21 opts = None
22
23 #---------- YPP parameters and arrays ----------
24
25 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
26         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
27         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
28         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
29
30 standingvals = ('Able/Distinguished/Respected/Master'+
31                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
32
33 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
34
35 max_pirate_namelen = 12
36
37
38 #---------- general utilities ----------
39
40 def debug(m):
41         if opts.debug > 0:
42                 print >>opts.debug_file, m
43
44 def format_time_interval(ti):
45         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
46         if ti < 7200: return '%2dm' % (ti / 60)
47         if ti < 86400: return '%dh' % (ti / 3600)
48         return '%dd' % (ti / 86400)
49
50 #---------- caching and rate-limiting data fetcher ----------
51
52 class Fetcher:
53         def __init__(self, ocean, cachedir):
54                 debug('Fetcher init %s' % cachedir)
55                 self.ocean = ocean
56                 self.cachedir = cachedir
57                 try: os.mkdir(cachedir)
58                 except (OSError,IOError), oe:
59                         if oe.errno != errno.EEXIST: raise
60                 self._cache_scan(time.time())
61
62         def default_ocean(self, ocean='ice'):
63                 if self.ocean is None:
64                         self.ocean = ocean
65
66         def _cache_scan(self, now):
67                 # returns list of ages, unsorted
68                 ages = []
69                 debug('Fetcher   scan_cache')
70                 for leaf in os.listdir(self.cachedir):
71                         if not leaf.startswith('#'): continue
72                         path = self.cachedir + '/' + leaf
73                         try: s = os.stat(path)
74                         except (OSError,IOError), oe:
75                                 if oe.errno != errno.ENOENT: raise
76                                 continue
77                         age = now - s.st_mtime
78                         if age > opts.expire_age:
79                                 debug('Fetcher    expire %d %s' % (age, path))
80                                 try: os.remove(path)
81                                 except (OSError,IOError), oe:
82                                         if oe.errno != errno.ENOENT: raise
83                                 continue
84                         ages.append(age)
85                 return ages
86
87         def need_wait(self, now):
88                 ages = self._cache_scan(now)
89                 ages.sort()
90                 debug('Fetcher   ages ' + `ages`)
91                 min_age = 1
92                 need_wait = 0
93                 for age in ages:
94                         if age < min_age and age < 300:
95                                 debug('Fetcher   morewait min=%d age=%d' %
96                                         (min_age, age))
97                                 need_wait = max(need_wait, min_age - age)
98                         min_age += 3
99                         min_age *= 1.25
100                 return need_wait
101
102         def _rate_limit_cache_clean(self, now):
103                 need_wait = self.need_wait(now)
104                 if need_wait > 0:
105                         debug('Fetcher   wait %d' % need_wait)
106                         time.sleep(need_wait)
107
108         def fetch(self, url, max_age):
109                 debug('Fetcher fetch %s' % url)
110                 cache_corename = urllib.quote_plus(url)
111                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
112                 try: f = file(cache_item, 'r')
113                 except (OSError,IOError), oe:
114                         if oe.errno != errno.ENOENT: raise
115                         f = None
116                 now = time.time()
117                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
118                 if f is not None:
119                         s = os.fstat(f.fileno())
120                         age = now - s.st_mtime
121                         if age > max_age:
122                                 debug('Fetcher  stale %d < %d'% (max_age, age))
123                                 f = None
124                 if f is not None:
125                         data = f.read()
126                         f.close()
127                         debug('Fetcher  cached %d > %d' % (max_age, age))
128                         return data
129
130                 debug('Fetcher  fetch')
131                 self._rate_limit_cache_clean(now)
132
133                 stream = urllib2.urlopen(url)
134                 data = stream.read()
135                 cache_tmp = "%s/#%s~%d#" % (
136                         self.cachedir, cache_corename, os.getpid())
137                 f = file(cache_tmp, 'w')
138                 f.write(data)
139                 f.close()
140                 os.rename(cache_tmp, cache_item)
141                 debug('Fetcher  stored')
142                 return data
143
144         def yoweb(self, kind, tail, max_age):
145                 self.default_ocean()
146                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
147                         self.ocean, kind, tail)
148                 return self.fetch(url, max_age)
149
150 #---------- logging assistance for troubled screenscrapers ----------
151
152 class SoupLog:
153         def __init__(self):
154                 self.msgs = [ ]
155         def msg(self, m):
156                 self.msgs.append(m)
157         def soupm(self, obj, m):
158                 self.msg(m + '; in ' + `obj`)
159         def needs_msgs(self, child_souplog):
160                 self.msgs += child_souplog.msgs
161                 child_souplog.msgs = [ ]
162
163 def soup_text(obj):
164         str = ''.join(obj.findAll(text=True))
165         return str.strip()
166
167 class SomethingSoupInfo(SoupLog):
168         def __init__(self, kind, tail, max_age):
169                 SoupLog.__init__(self)
170                 html = fetcher.yoweb(kind, tail, max_age)
171                 self._soup = BeautifulSoup(html,
172                         convertEntities=BeautifulSoup.HTML_ENTITIES
173                         )
174
175 #---------- scraper for pirate pages ----------
176
177 class PirateInfo(SomethingSoupInfo):
178         # Public data members:
179         #  pi.standings = { 'Treasure Haul': 'Able' ... }
180         #  pi.name = name
181         #  pi.crew = (id, name)
182         #  pi.flag = (id, name)
183         #  pi.msgs = [ 'message describing problem with scrape' ]
184                 
185         def __init__(self, pirate, max_age=300):
186                 SomethingSoupInfo.__init__(self,
187                         'pirate.wm?target=', pirate, max_age)
188                 self.name = pirate
189                 self._find_standings()
190                 self.crew = self._find_crewflag('crew',
191                         '^/yoweb/crew/info\\.wm')
192                 self.flag = self._find_crewflag('flag',
193                         '^/yoweb/flag/info\\.wm')
194
195         def _find_standings(self):
196                 imgs = self._soup.findAll('img',
197                         src=regexp.compile('/yoweb/images/stat.*'))
198                 re = regexp.compile(
199 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
200                         )
201                 standings = { }
202
203                 for skill in puzzles:
204                         standings[skill] = [ ]
205
206                 skl = SoupLog()
207
208                 for img in imgs:
209                         try: puzzle = img['alt']
210                         except KeyError: continue
211
212                         if not puzzle in puzzles:
213                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
214                                 continue
215                         key = img.findParent('td')
216                         if key is None:
217                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
218                                 continue
219                         valelem = key.findNextSibling('td')
220                         if valelem is None:
221                                 skl.soupm(key, 'puzzle missing sibling "%s"'
222                                         % puzzle)
223                                 continue
224                         valstr = soup_text(valelem)
225                         match = re.match(valstr)
226                         if match is None:
227                                 skl.soupm(key, ('puzzle "%s" unparseable'+
228                                         ' standing "%s"') % (puzzle, valstr))
229                                 continue
230                         standing = match.group(match.lastindex)
231                         standings[puzzle].append(standing)
232
233                 self.standings = { }
234
235                 for puzzle in puzzles:
236                         sl = standings[puzzle]
237                         if len(sl) > 1:
238                                 skl.msg('puzzle "%s" multiple standings %s' %
239                                                 (puzzle, `sl`))
240                                 continue
241                         if not sl:
242                                 skl.msg('puzzle "%s" no standing found' % puzzle)
243                                 continue
244                         standing = sl[0]
245                         for i in range(0, len(standingvals)-1):
246                                 if standing == standingvals[i]:
247                                         self.standings[puzzle] = i
248                         if not puzzle in self.standings:
249                                 skl.msg('puzzle "%s" unknown standing "%s"' %
250                                         (puzzle, standing))
251
252                 all_standings_ok = True
253                 for puzzle in puzzles:
254                         if not puzzle in self.standings:
255                                 self.needs_msgs(skl)
256
257         def _find_crewflag(self, cf, yoweb_re):
258                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
259                 if len(things) != 1:
260                         self.msg('zero or several %s id references found' % cf)
261                         return None
262                 thing = things[0]
263                 id_re = '\\b%sid\\=(\\w+)$' % cf
264                 id_haystack = thing['href']
265                 match = regexp.compile(id_re).search(id_haystack)
266                 if match is None:
267                         self.soupm(thing, ('incomprehensible %s id ref'+
268                                 ' (%s in %s)') % (cf, id_re, id_haystack))
269                         return None
270                 name = soup_text(thing)
271                 return (match.group(1), name)
272
273         def __str__(self):
274                 return `(self.crew, self.flag, self.standings, self.msgs)`
275
276 #---------- scraper for crew pages ----------
277
278 class CrewInfo(SomethingSoupInfo):
279         # Public data members:
280         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
281         #              ('Senior Officer', [...]),
282         #               ... ]
283         #  pi.msgs = [ 'message describing problem with scrape' ]
284
285         def __init__(self, crewid, max_age=300):
286                 SomethingSoupInfo.__init__(self,
287                         'crew/info.wm?crewid=', crewid, max_age)
288                 self._find_crew()
289
290         def _find_crew(self):
291                 self.crew = []
292                 capts = self._soup.findAll('img',
293                         src='/yoweb/images/crew-captain.png')
294                 if len(capts) != 1:
295                         self.msg('crew members: no. of captain images != 1')
296                         return
297                 tbl = capts[0]
298                 while not tbl.find('a', href=pirate_ref_re):
299                         tbl = tbl.findParent('table')
300                         if not tbl:
301                                 self.msg('crew members: cannot find table')
302                                 return
303                 current_rank_crew = None
304                 crew_rank_re = regexp.compile('/yoweb/images/crew')
305                 for row in tbl.contents:
306                         # findAll(recurse=False)
307                         if isinstance(row,basestring):
308                                 continue
309
310                         is_rank = row.find('img', attrs={'src': crew_rank_re})
311                         if is_rank:
312                                 rank = soup_text(row)
313                                 current_rank_crew = []
314                                 self.crew.append((rank, current_rank_crew))
315                                 continue
316                         for cell in row.findAll('a', href=pirate_ref_re):
317                                 if current_rank_crew is None:
318                                         self.soupm(cell, 'crew members: crew'
319                                                 ' before rank')
320                                         continue
321                                 current_rank_crew.append(soup_text(cell))
322
323         def __str__(self):
324                 return `(self.crew, self.msgs)`
325
326 #---------- pretty-printer for tables of pirate puzzle standings ----------
327
328 class StandingsTable:
329         def __init__(self, use_puzzles=None, col_width=6):
330                 if use_puzzles is None:
331                         if opts.ship_duty:
332                                 use_puzzles=[
333                                         'Navigating','Battle Navigation',
334                                         'Gunning',
335                                         ['Sailing','Rigging'],
336                                         'Bilging',
337                                         'Carpentry',
338                                         'Treasure Haul'
339                                 ]
340                         else:
341                                 use_puzzles=puzzles
342                 self._puzzles = use_puzzles
343                 self.s = ''
344                 self._cw = col_width-1
345
346         def _pline(self, pirate, puzstrs, extra):
347                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
348                 for v in puzstrs:
349                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
350                 if extra:
351                         self.s += ' ' + extra
352                 self.s += '\n'
353
354         def _puzstr(self, pi, puzzle):
355                 if not isinstance(puzzle,list): puzzle = [puzzle]
356                 try: standing = max([pi.standings[p] for p in puzzle])
357                 except KeyError: return '?'
358                 if not standing: return ''
359                 s = ''
360                 if self._cw > 4:
361                         c1 = standingvals[standing][0]
362                         if standing < 3: c1 = c1.lower() # 3 = Master
363                         s += `standing`
364                 if self._cw > 5:
365                         s += ' '
366                 s += '*' * (standing / 2)
367                 s += '+' * (standing % 2)
368                 return s
369
370         def headings(self):
371                 def puzn_redact(name):
372                         if isinstance(name,list):
373                                 return '/'.join(
374                                         ["%.*s" % (self._cw/2, puzn_redact(n))
375                                          for n in name])
376                         spc = name.find(' ')
377                         if spc < 0: return name
378                         return name[0:min(4,spc)] + name[spc+1:]
379                 self._pline('', map(puzn_redact, self._puzzles), None)
380         def literalline(self, line):
381                 self.s += line + '\n'
382         def pirate_dummy(self, name, standingstring, extra=None):
383                 self._pline(name, standingstring * len(self._puzzles), extra)
384         def pirate(self, pi, extra=None):
385                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
386                 self._pline(pi.name, puzstrs, extra)
387
388         def results(self):
389                 return self.s
390
391 #---------- chat log parser ----------
392
393 class PirateAboard:
394         # This is essentially a transparent, dumb, data class.
395         #  pa.v
396         #  pa.name
397         #  pa.last_time
398         #  pa.last_event
399         #  pa.gunner
400         #  pa.last_chat_time
401         #  pa.last_chat_chan
402         #  pa.pi
403
404         def __init__(pa, pn, v, time, event):
405                 pa.name = pn
406                 pa.v = v
407                 pa.last_time = time
408                 pa.last_event = event
409                 pa.last_chat_time = None
410                 pa.last_chat_chan = None
411                 pa.gunner = False
412                 pa.pi = None
413
414         def pirate_info(pa):
415                 if not pa.pi and not fetcher.need_wait(time.time()):
416                         pa.pi = PirateInfo(pa.name, 3600)
417                 return pa.pi
418
419 class ChatLogTracker:
420         # This is quite complex so we make it opaque.  Use the
421         # official invokers, accessors etc.
422
423         def __init__(self, myself_pi, logfn):
424                 self._pl = {}   # self._pl['Pirate'] =
425                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
426                                 # self._vl['Vessel']['#lastinfo']
427                 self._v = None          # self._v =
428                 self._vessel = None     #       self._vl[self._vessel]
429                 self._date = None
430                 self._myself = myself_pi
431                 self._need_redisplay = False
432                 self._f = file(logfn)
433                 self._lbuf = ''
434                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
435
436         def force_redisplay(self):
437                 self._need_redisplay = True
438
439         def _onboard_event(self,v,timestamp,pirate,event):
440                 pa = self._pl.get(pirate, None)
441                 if pa is not None and pa.v is v:
442                         pa.last_time = timestamp
443                         pa.last_event = event
444                 else:
445                         if pa is not None: del pa.v[pirate]
446                         pa = PirateAboard(pirate, v, timestamp, event)
447                         self._pl[pirate] = pa
448                         v[pirate] = pa
449                 v['#lastinfo'] = timestamp
450                 self.force_redisplay()
451                 return pa
452
453         def _trash_vessel(self, v):
454                 for pn in v:
455                         if pn.startswith('#'): continue
456                         del self._pl[pn]
457                 self.force_redisplay()
458
459         def expire_garbage(self, timestamp):
460                 for (vn,v) in list(self._vl.iteritems()):
461                         la = v['#lastinfo']
462                         if timestamp - la > opts.ship_reboard_clearout:
463                                 self._debug_line_disposition(timestamp,'',
464                                         'stale reset '+vn)
465                                 self._trash_vessel(v)
466                                 del self._vl[vn]
467
468         def _create_vessel(self, vn, timestamp):
469                 self._vl[vn] = v = { '#lastinfo': timestamp }
470                 return v
471
472         def _update_vessel_lookup(self, vn, timestamp, dml):
473                 v = self._vl.get(vn, None)
474                 if v is None:
475                         dml.append('new')
476                         v = self._create_vessel(vn, timestamp)
477                 elif timestamp - v['#lastinfo'] > opts.ship_reboard_clearout:
478                         dml.append('stale')
479                         self._trash_vessel(v)
480                         v = self._create_vessel(vn, timestamp)
481                 else:
482                         dml.append('current')
483                 return v
484
485         def _debug_line_disposition(self,timestamp,l,m):
486                 debug('CLT %13s %-30s %s' % (timestamp,m,l))
487
488         def chatline(self,l):
489                 rm = lambda re: regexp.match(re,l)
490                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
491                 timestamp = None
492
493                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
494                 if m:
495                         self._date = [int(x) for x in m.groups()]
496                         self._previous_timestamp = None
497                         return d('date '+`self._date`)
498
499                 if self._date is None:
500                         return d('date unset')
501
502                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
503                 if not m:
504                         return d('no timestamp')
505
506                 while True:
507                         time_tuple = (self._date +
508                                       [int(x) for x in m.groups()] +
509                                       [-1,-1,-1])
510                         timestamp = time.mktime(time_tuple)
511                         if timestamp >= self._previous_timestamp: break
512                         self._date[2] += 1
513                         self._debug_line_disposition(timestamp,'',
514                                 'new date '+`self._date`)
515
516                 self._previous_timestamp = timestamp
517
518                 l = l[l.find(' ')+1:]
519
520                 def ob_x(pirate,event):
521                         return self._onboard_event(
522                                         self._v, timestamp, pirate, event)
523                 def ob1(did): ob_x(m.group(1), did); return d(did)
524                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
525
526                 def disembark(v, timestamp, pirate, event):
527                         self._onboard_event(
528                                         v, timestamp, pirate, 'leaving '+event)
529                         del v[pirate]
530                         del self._pl[pirate]
531
532                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
533                 if m:
534                         dm = ['boarding']
535                         pn = self._myself.name
536                         self._vessel = vn = m.group(1)
537                         self._v = self._update_vessel_lookup(vn, timestamp, dm)
538
539                         ob_x(pn, 'we boarded')
540                         self.expire_garbage(timestamp)
541                         return d(' '.join(dm))
542
543                 if self._v is None:
544                         return d('no vessel')
545
546                 m = rm('(\\w+) has come aboard\\.$')
547                 if m: return ob1('boarded');
548
549                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
550                 if m:
551                         (who,what) = m.groups()
552                         pa = ob_x(who,'ord '+what)
553                         if what == 'Gunning':
554                                 pa.gunner = True
555                         return d('duty order')
556
557                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
558                 if m: oba('stopped'); return d("end")
559
560                 def chat_core(speaker, chan):
561                         try: pa = self._pl[speaker]
562                         except KeyError: return 'mystery'
563                         if pa.v is not self._v: return 'elsewhere'
564                         pa.last_chat_time = timestamp
565                         pa.last_chat_chan = chan
566                         self.force_redisplay()
567                         return 'here'
568
569                 def chat(chan):
570                         speaker = m.group(1)
571                         dm = chat_core(speaker, chan)
572                         return d('chat %s %s' % (chan, dm))
573
574                 def chat_metacmd(chan):
575                         (cmdr, metacmd) = m.groups()
576                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
577                         m2 = regexp.match(
578                                 '/([ad]) (?:([A-Za-z ]+)\\s*:)?([A-Za-z ]+)$',
579                                 metacmd)
580                         if not m2: return chat(chan)
581
582                         (cmd, vn, targets) = m2.groups()
583                         dml = ['cmd', chan, cmd]
584
585                         if cmd == 'a': each = self._onboard_event
586                         else: each = disembark
587
588                         if cmdr == self._myself.name:
589                                 dml.append('self')
590                                 how = 'manual: /%s' % cmd
591                         else:
592                                 dml.append('other')
593                                 how = '/%s %s' % (cmd,cmdr)
594
595                         v = None
596                         if vn is not None and len(vn.split(' ')) == 2:
597                                 v = self._update_vessel_lookup(
598                                         vn.title(), timestamp, dml)
599                         elif self._v is None:
600                                 dml.append('no-current')
601                         elif vn is None:
602                                 dml.append('current')
603                                 v = self._v
604                         elif regexp.match('(?:.* )?%s$' % vn.title(),
605                                         self._vessel):
606                                 dml.append('match')
607                                 v = self._v
608                         else:
609                                 dml.append('unk-abbrev')
610
611                         if v is not None:
612                                 targets = targets.strip().split(' ')
613                                 dml.append(`len(targets)`)
614                                 for target in targets:
615                                         each(v, timestamp, target.title(), how)
616
617                         dm = ' '.join(dml)
618                         chat_core(cmdr, 'cmd '+chan)
619                         return d(dm)
620
621                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
622                 if m: return ob1('general order');
623
624                 m = rm('(\\w+) says, "')
625                 if m: return chat('public')
626
627                 m = rm('(\\w+) tells ye, "')
628                 if m: return chat('private')
629
630                 m = rm('Ye told (\\w+), "(.*)"$')
631                 if m: return chat_metacmd('private')
632
633                 m = rm('(\\w+) flag officer chats, "')
634                 if m: return chat('flag officer')
635
636                 m = rm('(\\w+) officer chats, "(.*)"$')
637                 if m: return chat_metacmd('officer')
638
639                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
640                 if m:
641                         pl = m.group(1).split(', ')
642                         if not self._myself.name in pl:
643                                 return d('lost boarding battle')
644                         for pn in pl:
645                                 if ' ' in pn: continue
646                                 ob_x(pn,'won boarding battle')
647                         return d('won boarding battle')
648
649                 m = rm('(\\w+) is eliminated\\!')
650                 if m: return ob1('eliminated in fray');
651
652                 m = rm('(\\w+) has left the vessel\.')
653                 if m:
654                         pirate = m.group(1)
655                         disembark(self._v, timestamp, pirate, 'disembarked')
656                         return d('disembarked')
657
658                 return d('not matched')
659
660         def _str_vessel(self, vn, v):
661                 s = ' vessel %s\n' % vn
662                 s += ' '*20 + "%-*s   %13s\n" % (
663                                 max_pirate_namelen, '#lastinfo',
664                                 v['#lastinfo'])
665                 for pn in sorted(v.keys()):
666                         if pn.startswith('#'): continue
667                         pa = v[pn]
668                         assert pa.v == v
669                         assert self._pl[pn] == pa
670                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
671                                 (' ','G')[pa.gunner],
672                                 max_pirate_namelen, pn,
673                                 pa.last_time, pa.last_event,
674                                 pa.last_chat_time, pa.last_chat_chan)
675                 return s
676
677         def __str__(self):
678                 s = '''<ChatLogTracker
679  myself %s
680  vessel %s
681 '''                     % (self._myself.name, self._vessel)
682                 assert ((self._v is None and self._vessel is None) or
683                         (self._v is self._vl[self._vessel]))
684                 if self._vessel is not None:
685                         s += self._str_vessel(self._vessel, self._v)
686                 for vn in sorted(self._vl.keys()):
687                         if vn == self._vessel: continue
688                         s += self._str_vessel(vn, self._vl[vn])
689                 for p in self._pl:
690                         pa = self._pl[p]
691                         assert pa.v[p] is pa
692                         assert pa.v in self._vl.values()
693                 s += '>\n'
694                 return s
695
696         def catchup(self, progress=None):
697                 while True:
698                         more = self._f.readline()
699                         if not more: break
700
701                         self._progress[0] += len(more)
702                         if progress: progress.progress(*self._progress)
703
704                         self._lbuf += more
705                         if self._lbuf.endswith('\n'):
706                                 self.chatline(self._lbuf.rstrip())
707                                 self._lbuf = ''
708                                 if opts.debug >= 2:
709                                         debug(self.__str__())
710                 if progress: progress.caughtup()
711
712         def changed(self):
713                 rv = self._need_redisplay
714                 self._need_redisplay = False
715                 return rv
716         def myname(self):
717                 # returns our pirate name
718                 return self._myself.name
719         def vessel(self):
720                 # returns the vessel we're aboard or None
721                 return self._vessel
722         def aboard(self):
723                 # returns a list of PirateAboard sorted by name
724                 if self._v is None: return []
725                 return [ self._v[pn]
726                          for pn in sorted(self._v.keys())
727                          if not pn.startswith('#') ]
728
729 #---------- implementations of actual operation modes ----------
730
731 def do_pirate(pirates, bu):
732         print '{'
733         for pirate in pirates:
734                 info = PirateInfo(pirate)
735                 print '%s: %s,' % (`pirate`, info)
736         print '}'
737
738 def prep_crew_of(args, bu, max_age=300):
739         if len(args) != 1: bu('crew-of takes one pirate name')
740         pi = PirateInfo(args[0], max_age)
741         if pi.crew is None: return None
742         return CrewInfo(pi.crew[0], max_age)
743
744 def do_crew_of(args, bu):
745         ci = prep_crew_of(args, bu)
746         print ci
747
748 def do_standings_crew_of(args, bu):
749         ci = prep_crew_of(args, bu, 60)
750         tab = StandingsTable()
751         tab.headings()
752         for (rank, members) in ci.crew:
753                 if not members: continue
754                 tab.literalline('%s:' % rank)
755                 for p in members:
756                         pi = PirateInfo(p, random.randint(900,1800))
757                         tab.pirate(pi)
758         print tab.results()
759
760 class ProgressPrintPercentage:
761         def __init__(self, f=sys.stdout):
762                 self._f = f
763         def progress_string(self,done,total):
764                 return "scan chat logs %3d%%\r" % ((done*100) / total)
765         def progress(self,*a):
766                 self._f.write(self.progress_string(*a))
767                 self._f.flush()
768         def show_init(self, pirate, ocean):
769                 print >>self._f, 'Starting up, %s on the %s ocean' % (
770                         pirate, ocean)
771         def caughtup(self):
772                 self._f.write('                   \r')
773                 self._f.flush()
774
775 #----- modes which use the chat log parser are quite complex -----
776
777 def prep_chat_log(args, bu,
778                 progress=ProgressPrintPercentage(),
779                 max_myself_age=3600):
780         if len(args) != 1: bu('this action takes only chat log filename')
781         logfn = args[0]
782         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
783         match = regexp.match(logfn_re, logfn)
784         if not match: bu('chat log filename is not in expected format')
785         (pirate, ocean) = match.groups()
786         fetcher.default_ocean(ocean)
787         
788         myself = PirateInfo(pirate,max_myself_age)
789         progress.show_init(pirate, fetcher.ocean)
790         track = ChatLogTracker(myself, logfn)
791
792         opts.debug -= 2
793         track.catchup(progress)
794         opts.debug += 2
795
796         track.force_redisplay()
797
798         return (myself, track)
799
800 def do_track_chat_log(args, bu):
801         (myself, track) = prep_chat_log(args, bu)
802         while True:
803                 track.catchup()
804                 if track.changed():
805                         print track
806                 time.sleep(1)
807
808 #----- ship management aid -----
809
810 class Display_dumb(ProgressPrintPercentage):
811         def __init__(self):
812                 ProgressPrintPercentage.__init__(self)
813         def show(self, s):
814                 print '\n\n', s;
815         def realstart(self):
816                 pass
817
818 class Display_overwrite(ProgressPrintPercentage):
819         def __init__(self):
820                 ProgressPrintPercentage.__init__(self)
821
822                 null = file('/dev/null','w')
823                 curses.setupterm(fd=null.fileno())
824
825                 self._clear = curses.tigetstr('clear')
826                 if not self._clear:
827                         self._debug('missing clear!')
828                         self.show = Display_dumb.show
829                         return
830
831                 self._t = {'el':'', 'ed':''}
832                 if not self._init_sophisticated():
833                         for k in self._t.keys(): self._t[k] = ''
834                         self._t['ho'] = self._clear
835
836         def _debug(self,m): debug('display overwrite: '+m)
837
838         def _init_sophisticated(self):
839                 for k in self._t.keys():
840                         s = curses.tigetstr(k)
841                         self._t[k] = s
842                 self._t['ho'] = curses.tigetstr('ho')
843                 if not self._t['ho']:
844                         cup = curses.tigetstr('cup')
845                         self._t['ho'] = curses.tparm(cup,0,0)
846                 missing = [k for k in self._t.keys() if not self._t[k]]
847                 if missing:
848                         self.debug('missing '+(' '.join(missing)))
849                         return 0
850                 return 1
851
852         def show(self, s):
853                 w = sys.stdout.write
854                 def wti(k): w(self._t[k])
855
856                 wti('ho')
857                 nl = ''
858                 for l in s.rstrip().split('\n'):
859                         w(nl)
860                         w(l)
861                         wti('el')
862                         nl = '\r\n'
863                 wti('ed')
864                 w(' ')
865                 sys.stdout.flush()
866
867         def realstart(self):
868                 sys.stdout.write(self._clear)
869                 sys.stdout.flush()
870                         
871
872 def do_ship_aid(args, bu):
873         if opts.ship_duty is None: opts.ship_duty = True
874
875         displayer = globals()['Display_'+opts.display]()
876         rotate_nya = '/-\\'
877
878         (myself, track) = prep_chat_log(args, bu, progress=displayer)
879
880         def timeevent(t,e):
881                 if t is None: return ' ' * 22
882                 return " %-4s %-16s" % (format_time_interval(now - t),e)
883
884         displayer.realstart()
885
886         while True:
887                 track.catchup()
888                 now = time.time()
889
890                 s = "%s" % track.myname()
891
892                 vn = track.vessel()
893                 if vn is None: s += " not on a vessel?!"
894                 else: s += " on board the %s" % vn
895                 s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")
896
897                 tbl = StandingsTable()
898                 tbl.headings()
899
900                 for pa in track.aboard():
901                         pi = pa.pirate_info()
902
903                         xs = ''
904                         if pa.gunner: xs += 'G '
905                         else: xs += '  '
906                         xs += timeevent(pa.last_time, pa.last_event)
907                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
908
909                         if pi is None:
910                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
911                         else:
912                                 tbl.pirate(pi, xs)
913
914                 s += tbl.results()
915
916                 displayer.show(s)
917                 time.sleep(1)
918                 rotate_nya = rotate_nya[1:3] + rotate_nya[0]
919
920 #---------- main program ----------
921
922 def main():
923         global opts, fetcher
924
925         pa = OptionParser(
926 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
927 actions:
928  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
929  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
930  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
931  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
932  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
933
934 display modes (for --display) apply to ship-aid:
935  --display=dumb       just print new information, scrolling the screen
936  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
937 ''')
938         ao = pa.add_option
939         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
940                 help='select ocean OCEAN')
941         ao('--cache-dir', dest='cache_dir', metavar='DIR',
942                 default='~/.yoweb-scrape-cache',
943                 help='cache yoweb pages in DIR')
944         ao('-D','--debug', action='count', dest='debug', default=0,
945                 help='enable debugging output')
946         ao('--debug-fd', type='int', dest='debug_fd',
947                 help='write any debugging output to specified fd')
948         ao('-q','--quiet', action='store_true', dest='quiet',
949                 help='suppress warning output')
950         ao('--display', action='store', dest='display',
951                 type='choice', choices=['dumb','overwrite'],
952                 help='how to display ship aid')
953
954         ao('--ship-duty', action='store_true', dest='ship_duty',
955                 help='show ship duty station puzzles')
956         ao('--all-puzzles', action='store_false', dest='ship_duty',
957                 help='show all puzzles, not just ship duty stations')
958
959         (opts,args) = pa.parse_args()
960         random.seed()
961
962         if len(args) < 1:
963                 pa.error('need a mode argument')
964
965         if opts.debug_fd is not None:
966                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
967         else:
968                 opts.debug_file = sys.stdout
969
970         mode = args[0]
971         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
972         try: mode_fn = globals()[mode_fn_name]
973         except KeyError: pa.error('unknown mode "%s"' % mode)
974
975         # fixed parameters
976         opts.min_max_age = 60
977         opts.expire_age = 3600
978         opts.ship_reboard_clearout = 3600
979
980         if opts.cache_dir.startswith('~/'):
981                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
982
983         if opts.display is None:
984                 if ((opts.debug > 0 and opts.debug_fd is None)
985                     or not os.isatty(sys.stdout.fileno())):
986                         opts.display = 'dumb'
987                 else:
988                         opts.display = 'overwrite'
989
990         fetcher = Fetcher(opts.ocean, opts.cache_dir)
991
992         mode_fn(args[1:], pa.error)
993
994 main()