chiark / gitweb /
04e392d7d53e79a25d382275f575030e420d18c2
[ypp-sc-tools.db-live.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 from optparse import OptionParser
18
19 from BeautifulSoup import BeautifulSoup
20
21 opts = None
22
23 #---------- YPP parameters and arrays ----------
24
25 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
26         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
27         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
28         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
29
30 standingvals = ('Able/Distinguished/Respected/Master'+
31                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
32
33 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
34
35 max_pirate_namelen = 12
36
37
38 #---------- general utilities ----------
39
40 def debug(m):
41         if opts.debug > 0:
42                 print m
43
44 def format_time_interval(ti):
45         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
46         if ti < 7200: return '%2dm' % (ti / 60)
47         if ti < 86400: return '%dh' % (ti / 3600)
48         return '%dd' % (ti / 86400)
49
50 #---------- caching and rate-limiting data fetcher ----------
51
52 class Fetcher:
53         def __init__(self, ocean, cachedir):
54                 debug('Fetcher init %s' % cachedir)
55                 self.ocean = ocean
56                 self.cachedir = cachedir
57                 try: os.mkdir(cachedir)
58                 except (OSError,IOError), oe:
59                         if oe.errno != errno.EEXIST: raise
60                 self._cache_scan(time.time())
61
62         def default_ocean(self, ocean='ice'):
63                 if self.ocean is None:
64                         self.ocean = ocean
65
66         def _cache_scan(self, now):
67                 # returns list of ages, unsorted
68                 ages = []
69                 debug('Fetcher   scan_cache')
70                 for leaf in os.listdir(self.cachedir):
71                         if not leaf.startswith('#'): continue
72                         path = self.cachedir + '/' + leaf
73                         try: s = os.stat(path)
74                         except (OSError,IOError), oe:
75                                 if oe.errno != errno.ENOENT: raise
76                                 continue
77                         age = now - s.st_mtime
78                         if age > opts.expire_age:
79                                 debug('Fetcher    expire %d %s' % (age, path))
80                                 try: os.remove(path)
81                                 except (OSError,IOError), oe:
82                                         if oe.errno != errno.ENOENT: raise
83                                 continue
84                         ages.append(age)
85                 return ages
86
87         def need_wait(self, now):
88                 ages = self._cache_scan(now)
89                 ages.sort()
90                 debug('Fetcher   ages ' + `ages`)
91                 min_age = 1
92                 need_wait = 0
93                 for age in ages:
94                         if age < min_age and age < 300:
95                                 debug('Fetcher   morewait min=%d age=%d' %
96                                         (min_age, age))
97                                 need_wait = max(need_wait, min_age - age)
98                         min_age += 3
99                         min_age *= 1.25
100                 return need_wait
101
102         def _rate_limit_cache_clean(self, now):
103                 need_wait = self.need_wait(now)
104                 if need_wait > 0:
105                         debug('Fetcher   wait %d' % need_wait)
106                         time.sleep(need_wait)
107
108         def fetch(self, url, max_age):
109                 debug('Fetcher fetch %s' % url)
110                 cache_corename = urllib.quote_plus(url)
111                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
112                 try: f = file(cache_item, 'r')
113                 except (OSError,IOError), oe:
114                         if oe.errno != errno.ENOENT: raise
115                         f = None
116                 now = time.time()
117                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
118                 if f is not None:
119                         s = os.fstat(f.fileno())
120                         age = now - s.st_mtime
121                         if age > max_age:
122                                 debug('Fetcher  stale %d < %d'% (max_age, age))
123                                 f = None
124                 if f is not None:
125                         data = f.read()
126                         f.close()
127                         debug('Fetcher  cached %d > %d' % (max_age, age))
128                         return data
129
130                 debug('Fetcher  fetch')
131                 self._rate_limit_cache_clean(now)
132
133                 stream = urllib2.urlopen(url)
134                 data = stream.read()
135                 cache_tmp = "%s/#%s~%d#" % (
136                         self.cachedir, cache_corename, os.getpid())
137                 f = file(cache_tmp, 'w')
138                 f.write(data)
139                 f.close()
140                 os.rename(cache_tmp, cache_item)
141                 debug('Fetcher  stored')
142                 return data
143
144         def yoweb(self, kind, tail, max_age):
145                 self.default_ocean()
146                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
147                         self.ocean, kind, tail)
148                 return self.fetch(url, max_age)
149
150 #---------- logging assistance for troubled screenscrapers ----------
151
152 class SoupLog:
153         def __init__(self):
154                 self.msgs = [ ]
155         def msg(self, m):
156                 self.msgs.append(m)
157         def soupm(self, obj, m):
158                 self.msg(m + '; in ' + `obj`)
159         def needs_msgs(self, child_souplog):
160                 self.msgs += child_souplog.msgs
161                 child_souplog.msgs = [ ]
162
163 def soup_text(obj):
164         str = ''.join(obj.findAll(text=True))
165         return str.strip()
166
167 class SomethingSoupInfo(SoupLog):
168         def __init__(self, kind, tail, max_age):
169                 SoupLog.__init__(self)
170                 html = fetcher.yoweb(kind, tail, max_age)
171                 self._soup = BeautifulSoup(html,
172                         convertEntities=BeautifulSoup.HTML_ENTITIES
173                         )
174
175 #---------- scraper for pirate pages ----------
176
177 class PirateInfo(SomethingSoupInfo):
178         # Public data members:
179         #  pi.standings = { 'Treasure Haul': 'Able' ... }
180         #  pi.name = name
181         #  pi.crew = (id, name)
182         #  pi.flag = (id, name)
183         #  pi.msgs = [ 'message describing problem with scrape' ]
184                 
185         def __init__(self, pirate, max_age=300):
186                 SomethingSoupInfo.__init__(self,
187                         'pirate.wm?target=', pirate, max_age)
188                 self.name = pirate
189                 self._find_standings()
190                 self.crew = self._find_crewflag('crew',
191                         '^/yoweb/crew/info\\.wm')
192                 self.flag = self._find_crewflag('flag',
193                         '^/yoweb/flag/info\\.wm')
194
195         def _find_standings(self):
196                 imgs = self._soup.findAll('img',
197                         src=regexp.compile('/yoweb/images/stat.*'))
198                 re = regexp.compile(
199 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
200                         )
201                 standings = { }
202
203                 for skill in puzzles:
204                         standings[skill] = [ ]
205
206                 skl = SoupLog()
207
208                 for img in imgs:
209                         try: puzzle = img['alt']
210                         except KeyError: continue
211
212                         if not puzzle in puzzles:
213                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
214                                 continue
215                         key = img.findParent('td')
216                         if key is None:
217                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
218                                 continue
219                         valelem = key.findNextSibling('td')
220                         if valelem is None:
221                                 skl.soupm(key, 'puzzle missing sibling "%s"'
222                                         % puzzle)
223                                 continue
224                         valstr = soup_text(valelem)
225                         match = re.match(valstr)
226                         if match is None:
227                                 skl.soupm(key, ('puzzle "%s" unparseable'+
228                                         ' standing "%s"') % (puzzle, valstr))
229                                 continue
230                         standing = match.group(match.lastindex)
231                         standings[puzzle].append(standing)
232
233                 self.standings = { }
234
235                 for puzzle in puzzles:
236                         sl = standings[puzzle]
237                         if len(sl) > 1:
238                                 skl.msg('puzzle "%s" multiple standings %s' %
239                                                 (puzzle, `sl`))
240                                 continue
241                         if not sl:
242                                 skl.msg('puzzle "%s" no standing found' % puzzle)
243                                 continue
244                         standing = sl[0]
245                         for i in range(0, len(standingvals)-1):
246                                 if standing == standingvals[i]:
247                                         self.standings[puzzle] = i
248                         if not puzzle in self.standings:
249                                 skl.msg('puzzle "%s" unknown standing "%s"' %
250                                         (puzzle, standing))
251
252                 all_standings_ok = True
253                 for puzzle in puzzles:
254                         if not puzzle in self.standings:
255                                 self.needs_msgs(skl)
256
257         def _find_crewflag(self, cf, yoweb_re):
258                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
259                 if len(things) != 1:
260                         self.msg('zero or several %s id references found' % cf)
261                         return None
262                 thing = things[0]
263                 id_re = '\\b%sid\\=(\\w+)$' % cf
264                 id_haystack = thing['href']
265                 match = regexp.compile(id_re).search(id_haystack)
266                 if match is None:
267                         self.soupm(thing, ('incomprehensible %s id ref'+
268                                 ' (%s in %s)') % (cf, id_re, id_haystack))
269                         return None
270                 name = soup_text(thing)
271                 return (match.group(1), name)
272
273         def __str__(self):
274                 return `(self.crew, self.flag, self.standings, self.msgs)`
275
276 #---------- scraper for crew pages ----------
277
278 class CrewInfo(SomethingSoupInfo):
279         # Public data members:
280         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
281         #              ('Senior Officer', [...]),
282         #               ... ]
283         #  pi.msgs = [ 'message describing problem with scrape' ]
284
285         def __init__(self, crewid, max_age=300):
286                 SomethingSoupInfo.__init__(self,
287                         'crew/info.wm?crewid=', crewid, max_age)
288                 self._find_crew()
289
290         def _find_crew(self):
291                 self.crew = []
292                 capts = self._soup.findAll('img',
293                         src='/yoweb/images/crew-captain.png')
294                 if len(capts) != 1:
295                         self.msg('crew members: no. of captain images != 1')
296                         return
297                 tbl = capts[0]
298                 while not tbl.find('a', href=pirate_ref_re):
299                         tbl = tbl.findParent('table')
300                         if not tbl:
301                                 self.msg('crew members: cannot find table')
302                                 return
303                 current_rank_crew = None
304                 crew_rank_re = regexp.compile('/yoweb/images/crew')
305                 for row in tbl.contents:
306                         # findAll(recurse=False)
307                         if isinstance(row,basestring):
308                                 continue
309
310                         is_rank = row.find('img', attrs={'src': crew_rank_re})
311                         if is_rank:
312                                 rank = soup_text(row)
313                                 current_rank_crew = []
314                                 self.crew.append((rank, current_rank_crew))
315                                 continue
316                         for cell in row.findAll('a', href=pirate_ref_re):
317                                 if current_rank_crew is None:
318                                         self.soupm(cell, 'crew members: crew'
319                                                 ' before rank')
320                                         continue
321                                 current_rank_crew.append(soup_text(cell))
322
323         def __str__(self):
324                 return `(self.crew, self.msgs)`
325
326 #---------- pretty-printer for tables of pirate puzzle standings ----------
327
328 class StandingsTable:
329         def __init__(self, use_puzzles=None, col_width=6):
330                 if use_puzzles is None:
331                         if opts.ship_duty:
332                                 use_puzzles=[
333                                         'Navigating','Battle Navigation',
334                                         'Gunning',
335                                         ['Sailing','Rigging'],
336                                         'Bilging',
337                                         'Carpentry',
338                                         'Treasure Haul'
339                                 ]
340                         else:
341                                 use_puzzles=puzzles
342                 self._puzzles = use_puzzles
343                 self.s = ''
344                 self._cw = col_width-1
345
346         def _pline(self, pirate, puzstrs, extra):
347                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
348                 for v in puzstrs:
349                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
350                 if extra:
351                         self.s += ' ' + extra
352                 self.s += '\n'
353
354         def _puzstr(self, pi, puzzle):
355                 if not isinstance(puzzle,list): puzzle = [puzzle]
356                 try: standing = max([pi.standings[p] for p in puzzle])
357                 except KeyError: return '?'
358                 if not standing: return ''
359                 s = ''
360                 if self._cw > 4:
361                         c1 = standingvals[standing][0]
362                         if standing < 3: c1 = c1.lower() # 3 = Master
363                         s += `standing`
364                 if self._cw > 5:
365                         s += ' '
366                 s += '*' * (standing / 2)
367                 s += '+' * (standing % 2)
368                 return s
369
370         def headings(self):
371                 def puzn_redact(name):
372                         if isinstance(name,list):
373                                 return '/'.join(
374                                         ["%.*s" % (self._cw/2, puzn_redact(n))
375                                          for n in name])
376                         spc = name.find(' ')
377                         if spc < 0: return name
378                         return name[0:min(4,spc)] + name[spc+1:]
379                 self._pline('', map(puzn_redact, self._puzzles), None)
380         def literalline(self, line):
381                 self.s += line + '\n'
382         def pirate_dummy(self, name, standingstring, extra=None):
383                 self._pline(name, standingstring * len(self._puzzles), extra)
384         def pirate(self, pi, extra=None):
385                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
386                 self._pline(pi.name, puzstrs, extra)
387
388         def results(self):
389                 return self.s
390
391 #---------- chat log parser ----------
392
393 class PirateAboard:
394         # This is essentially a transparent, dumb, data class.
395         #  pa.v
396         #  pa.name
397         #  pa.last_time
398         #  pa.last_event
399         #  pa.gunner
400         #  pa.last_chat_time
401         #  pa.last_chat_chan
402         #  pa.pi
403
404         def __init__(pa, pn, v, time, event):
405                 pa.name = pn
406                 pa.v = v
407                 pa.last_time = time
408                 pa.last_event = event
409                 pa.last_chat_time = None
410                 pa.last_chat_chan = None
411                 pa.gunner = False
412                 pa.pi = None
413
414         def pirate_info(pa):
415                 if not pa.pi and not fetcher.need_wait(time.time()):
416                         pa.pi = PirateInfo(pa.name, 3600)
417                 return pa.pi
418
419 class ChatLogTracker:
420         # This is quite complex so we make it opaque.  Use the
421         # official invokers, accessors etc.
422
423         def __init__(self, myself_pi, logfn):
424                 self._pl = {}   # self._pl['Pirate'] =
425                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
426                                 # self._vl['Vessel']['#lastaboard']
427                 self._v = None          # self._v =
428                 self._vessel = None     #       self._vl[self._vessel]
429                 self._date = None
430                 self._myself = myself_pi
431                 self._need_redisplay = False
432                 self._f = file(logfn)
433                 self._lbuf = ''
434                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
435
436         def force_redisplay(self):
437                 self._need_redisplay = True
438
439         def _onboard_event(self,timestamp,pirate,event):
440                 try: pa = self._pl[pirate]
441                 except KeyError: pa = None
442                 if pa is not None and pa.v is self._v:
443                         pa.last_time = timestamp
444                         pa.last_event = event
445                 else:
446                         if pa is not None: del pa.v[pirate]
447                         pa = PirateAboard(pirate, self._v, timestamp, event)
448                         self._pl[pirate] = pa
449                         self._v[pirate] = pa
450                 self._v['#lastaboard'] = timestamp
451                 self.force_redisplay()
452                 return pa
453
454         def _trash_vessel(self, v):
455                 for pn in v:
456                         if pn.startswith('#'): continue
457                         del self._pl[pn]
458                 self.force_redisplay()
459
460         def expire_garbage(self, timestamp):
461                 for (vn,v) in list(self._vl.iteritems()):
462                         la = v['#lastaboard']
463                         if timestamp - la > opts.ship_reboard_clearout:
464                                 self._debug_line_disposition(timestamp,'',
465                                         'stale reset '+vn)
466                                 self._trash_vessel(v)
467                                 del self._vl[vn]
468
469         def clear_vessel(self, timestamp):
470                 if self._v is not None:
471                         self._trash_vessel(self._v)
472                 self._v = {'#lastaboard': timestamp}
473                 self._vl[self._vessel] = self._v
474
475         def _debug_line_disposition(self,timestamp,l,m):
476                 debug('CLT %13s %-30s %s' % (timestamp,m,l))
477
478         def chatline(self,l):
479                 rm = lambda re: regexp.match(re,l)
480                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
481                 timestamp = None
482
483                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
484                 if m:
485                         self._date = [int(x) for x in m.groups()]
486                         self._previous_timestamp = None
487                         return d('date '+`self._date`)
488
489                 if self._date is None:
490                         return d('date unset')
491
492                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
493                 if not m:
494                         return d('no timestamp')
495
496                 while True:
497                         time_tuple = (self._date +
498                                       [int(x) for x in m.groups()] +
499                                       [-1,-1,-1])
500                         timestamp = time.mktime(time_tuple)
501                         if timestamp >= self._previous_timestamp: break
502                         self._date[2] += 1
503                         self._debug_line_disposition(timestamp,'',
504                                 'new date '+`self._date`)
505
506                 self._previous_timestamp = timestamp
507
508                 l = l[l.find(' ')+1:]
509
510                 def ob_x(who,event):
511                         return self._onboard_event(timestamp, who, event)
512                 def ob1(did): ob_x(m.group(1), did); return d(did)
513                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
514
515                 def disembark(who, how):
516                         ob_x(who, 'leaving '+how)
517                         del self._v[who]
518                         del self._pl[who]
519
520                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
521                 if m:
522                         pn = self._myself.name
523                         self._vessel = m.group(1)
524                         dm = 'boarding'
525
526                         try:             self._v = self._vl[self._vessel]
527                         except KeyError: self._v = None; dm += ' new'
528                         
529                         if self._v is not None:  la = self._v['#lastaboard']
530                         else:                    la = 0; dm += ' ?la'
531
532                         if timestamp - la > opts.ship_reboard_clearout:
533                                 self.clear_vessel(timestamp)
534                                 dm += ' stale'
535
536                         ob_x(pn, 'we boarded')
537                         self.expire_garbage(timestamp)
538                         return d(dm)
539
540                 if self._v is None:
541                         return d('no vessel')
542
543                 m = rm('(\\w+) has come aboard\\.$')
544                 if m: return ob1('boarded');
545
546                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
547                 if m:
548                         (who,what) = m.groups()
549                         pa = ob_x(who,'ord '+what)
550                         if what == 'Gunning':
551                                 pa.gunner = True
552                         return d('duty order')
553
554                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
555                 if m: oba('stopped'); return d("end")
556
557                 def chat(what):
558                         who = m.group(1)
559                         try: pa = self._pl[who]
560                         except KeyError: return d('chat mystery')
561                         if pa.v is self._v:
562                                 pa.last_chat_time = timestamp
563                                 pa.last_chat_chan = what
564                                 self.force_redisplay()
565                                 return d('chat '+what)
566
567                 def chat_metacmd(what):
568                         (cmdr, metacmd) = m.groups()
569                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
570                         m2 = regexp.match(
571                                 '/([ad]) (?:([A-Za-z ]+)\\s*:)?([A-Za-z ]+)$',
572                                 metacmd)
573                         if not m2: return chat(what)
574
575                         (cmd, vn, targets) = m2.groups()
576
577                         if cmdr == self._myself.name: how = 'manual: /%s' % cmd
578                         else: how = '/%s %s' % (cmd,cmdr)
579                         if cmd == 'a': each = ob_x
580                         else: each = disembark
581
582                         if vn is not None:
583                                 vn = vn.title()
584                                 if not regexp.match(
585                                                 '(?:.* )?' + vn + '$',
586                                                 self._vessel):
587                                         return chat('/%s %s:' % (cmd,vn))
588
589                         for target in targets.split(' '):
590                                 if not target: continue
591                                 each(target.title(), how)
592                         return d('/%s' % cmd)
593
594                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
595                 if m: return ob1('general order');
596
597                 m = rm('(\\w+) says, "')
598                 if m: return chat('public')
599
600                 m = rm('(\\w+) tells ye, "')
601                 if m: return chat('private')
602
603                 m = rm('Ye told (\\w+), "(.*)"$')
604                 if m: return chat_metacmd('private')
605
606                 m = rm('(\\w+) flag officer chats, "')
607                 if m: return chat('flag officer')
608
609                 m = rm('(\\w+) officer chats, "(.*)"$')
610                 if m: return chat_metacmd('officer')
611
612                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
613                 if m:
614                         pl = m.group(1).split(', ')
615                         if not self._myself.name in pl:
616                                 return d('lost boarding battle')
617                         for pn in pl:
618                                 if ' ' in pn: continue
619                                 ob_x(pn,'won boarding battle')
620                         return d('won boarding battle')
621
622                 m = rm('(\\w+) is eliminated\\!')
623                 if m: return ob1('eliminated in fray');
624
625                 m = rm('(\\w+) has left the vessel\.')
626                 if m:
627                         disembark(m.group(1), 'disembarked')
628                         return d('disembarked')
629
630                 return d('not matched')
631
632         def _str_vessel(self, vn, v):
633                 s = ' vessel %s\n' % vn
634                 s += ' '*20 + "%-*s   %13s\n" % (
635                                 max_pirate_namelen, '#lastaboard',
636                                 v['#lastaboard'])
637                 for pn in sorted(v.keys()):
638                         if pn.startswith('#'): continue
639                         pa = v[pn]
640                         assert pa.v == v
641                         assert self._pl[pn] == pa
642                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
643                                 (' ','G')[pa.gunner],
644                                 max_pirate_namelen, pn,
645                                 pa.last_time, pa.last_event,
646                                 pa.last_chat_time, pa.last_chat_chan)
647                 return s
648
649         def __str__(self):
650                 s = '''<ChatLogTracker
651  myself %s
652  vessel %s
653 '''                     % (self._myself.name, self._vessel)
654                 assert ((self._v is None and self._vessel is None) or
655                         (self._v is self._vl[self._vessel]))
656                 if self._vessel is not None:
657                         s += self._str_vessel(self._vessel, self._v)
658                 for vn in sorted(self._vl.keys()):
659                         if vn == self._vessel: continue
660                         s += self._str_vessel(vn, self._vl[vn])
661                 for p in self._pl:
662                         pa = self._pl[p]
663                         assert pa.v[p] is pa
664                         assert pa.v in self._vl.values()
665                 s += '>\n'
666                 return s
667
668         def catchup(self, progress=None):
669                 while True:
670                         more = self._f.readline()
671                         if not more: break
672
673                         self._progress[0] += len(more)
674                         if progress: progress.progress(*self._progress)
675
676                         self._lbuf += more
677                         if self._lbuf.endswith('\n'):
678                                 self.chatline(self._lbuf.rstrip())
679                                 self._lbuf = ''
680                                 if opts.debug >= 2:
681                                         debug(self.__str__())
682                 if progress: progress.caughtup()
683
684         def changed(self):
685                 rv = self._need_redisplay
686                 self._need_redisplay = False
687                 return rv
688         def myname(self):
689                 # returns our pirate name
690                 return self._myself.name
691         def vessel(self):
692                 # returns the vessel we're aboard or None
693                 return self._vessel
694         def aboard(self):
695                 # returns a list of PirateAboard sorted by name
696                 if self._v is None: return []
697                 return [ self._v[pn]
698                          for pn in sorted(self._v.keys())
699                          if not pn.startswith('#') ]
700
701 #---------- implementations of actual operation modes ----------
702
703 def do_pirate(pirates, bu):
704         print '{'
705         for pirate in pirates:
706                 info = PirateInfo(pirate)
707                 print '%s: %s,' % (`pirate`, info)
708         print '}'
709
710 def prep_crew_of(args, bu, max_age=300):
711         if len(args) != 1: bu('crew-of takes one pirate name')
712         pi = PirateInfo(args[0], max_age)
713         if pi.crew is None: return None
714         return CrewInfo(pi.crew[0], max_age)
715
716 def do_crew_of(args, bu):
717         ci = prep_crew_of(args, bu)
718         print ci
719
720 def do_standings_crew_of(args, bu):
721         ci = prep_crew_of(args, bu, 60)
722         tab = StandingsTable()
723         tab.headings()
724         for (rank, members) in ci.crew:
725                 if not members: continue
726                 tab.literalline('%s:' % rank)
727                 for p in members:
728                         pi = PirateInfo(p, random.randint(900,1800))
729                         tab.pirate(pi)
730         print tab.results()
731
732 class ProgressPrintPercentage:
733         def __init__(self, f=sys.stdout):
734                 self._f = f
735         def progress_string(self,done,total):
736                 return "scan chat logs %3d%%\r" % ((done*100) / total)
737         def progress(self,*a):
738                 self._f.write(self.progress_string(*a))
739                 self._f.flush()
740         def show_init(self, pirate, ocean):
741                 print >>self._f, 'Starting up, %s on the %s ocean' % (
742                         pirate, ocean)
743         def caughtup(self):
744                 self._f.write('                   \r')
745                 self._f.flush()
746
747 #----- modes which use the chat log parser are quite complex -----
748
749 def prep_chat_log(args, bu,
750                 progress=ProgressPrintPercentage(),
751                 max_myself_age=3600):
752         if len(args) != 1: bu('this action takes only chat log filename')
753         logfn = args[0]
754         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
755         match = regexp.match(logfn_re, logfn)
756         if not match: bu('chat log filename is not in expected format')
757         (pirate, ocean) = match.groups()
758         fetcher.default_ocean(ocean)
759         
760         myself = PirateInfo(pirate,max_myself_age)
761         progress.show_init(pirate, fetcher.ocean)
762         track = ChatLogTracker(myself, logfn)
763
764         opts.debug -= 2
765         track.catchup(progress)
766         opts.debug += 2
767
768         track.force_redisplay()
769
770         return (myself, track)
771
772 def do_track_chat_log(args, bu):
773         (myself, track) = prep_chat_log(args, bu)
774         while True:
775                 track.catchup()
776                 if track.changed():
777                         print track
778                 time.sleep(1)
779
780 #----- ship management aid -----
781
782 class Display_dumb(ProgressPrintPercentage):
783         def __init__(self):
784                 ProgressPrintPercentage.__init__(self)
785         def show(self, s):
786                 print '\n\n', s;
787         def realstart(self):
788                 pass
789
790 class Display_overwrite(ProgressPrintPercentage):
791         def __init__(self):
792                 ProgressPrintPercentage.__init__(self)
793
794                 null = file('/dev/null','w')
795                 curses.setupterm(fd=null.fileno())
796
797                 self._clear = curses.tigetstr('clear')
798                 if not self._clear:
799                         self._debug('missing clear!')
800                         self.show = Display_dumb.show
801                         return
802
803                 self._t = {'el':'', 'ed':''}
804                 if not self._init_sophisticated():
805                         for k in self._t.keys(): self._t[k] = ''
806                         self._t['ho'] = self._clear
807
808         def _debug(self,m): debug('display overwrite: '+m)
809
810         def _init_sophisticated(self):
811                 for k in self._t.keys():
812                         s = curses.tigetstr(k)
813                         self._t[k] = s
814                 self._t['ho'] = curses.tigetstr('ho')
815                 if not self._t['ho']:
816                         cup = curses.tigetstr('cup')
817                         self._t['ho'] = curses.tparm(cup,0,0)
818                 missing = [k for k in self._t.keys() if not self._t[k]]
819                 if missing:
820                         self.debug('missing '+(' '.join(missing)))
821                         return 0
822                 return 1
823
824         def show(self, s):
825                 w = sys.stdout.write
826                 def wti(k): w(self._t[k])
827
828                 wti('ho')
829                 nl = ''
830                 for l in s.rstrip().split('\n'):
831                         w(nl)
832                         w(l)
833                         wti('el')
834                         nl = '\r\n'
835                 wti('ed')
836                 w(' ')
837                 sys.stdout.flush()
838
839         def realstart(self):
840                 sys.stdout.write(self._clear)
841                 sys.stdout.flush()
842                         
843
844 def do_ship_aid(args, bu):
845         if opts.ship_duty is None: opts.ship_duty = True
846
847         displayer = globals()['Display_'+opts.display]()
848         rotate_nya = '/-\\'
849
850         (myself, track) = prep_chat_log(args, bu, progress=displayer)
851
852         def timeevent(t,e):
853                 if t is None: return ' ' * 22
854                 return " %-4s %-16s" % (format_time_interval(now - t),e)
855
856         displayer.realstart()
857
858         while True:
859                 track.catchup()
860                 now = time.time()
861
862                 s = "%s" % track.myname()
863
864                 vn = track.vessel()
865                 if vn is None: s += " not on a vessel?!"
866                 else: s += " on board the %s" % vn
867                 s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")
868
869                 tbl = StandingsTable()
870                 tbl.headings()
871
872                 for pa in track.aboard():
873                         pi = pa.pirate_info()
874
875                         xs = ''
876                         if pa.gunner: xs += 'G '
877                         else: xs += '  '
878                         xs += timeevent(pa.last_time, pa.last_event)
879                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
880
881                         if pi is None:
882                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
883                         else:
884                                 tbl.pirate(pi, xs)
885
886                 s += tbl.results()
887
888                 displayer.show(s)
889                 time.sleep(1)
890                 rotate_nya = rotate_nya[1:3] + rotate_nya[0]
891
892 #---------- main program ----------
893
894 def main():
895         global opts, fetcher
896
897         pa = OptionParser(
898 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
899 actions:
900  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
901  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
902  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
903  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
904  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
905
906 display modes (for --display) apply to ship-aid:
907  --display=dumb       just print new information, scrolling the screen
908  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
909 ''')
910         ao = pa.add_option
911         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
912                 help='select ocean OCEAN')
913         ao('--cache-dir', dest='cache_dir', metavar='DIR',
914                 default='~/.yoweb-scrape-cache',
915                 help='cache yoweb pages in DIR')
916         ao('-D','--debug', action='count', dest='debug', default=0,
917                 help='enable debugging output')
918         ao('--debug-fd', action='count', dest='debug_fd',
919                 help='write any debugging output to specified fd')
920         ao('-q','--quiet', action='store_true', dest='quiet',
921                 help='suppress warning output')
922         ao('--display', action='store', dest='display',
923                 type='choice', choices=['dumb','overwrite'],
924                 help='how to display ship aid')
925
926         ao('--ship-duty', action='store_true', dest='ship_duty',
927                 help='show ship duty station puzzles')
928         ao('--all-puzzles', action='store_false', dest='ship_duty',
929                 help='show all puzzles, not just ship duty stations')
930
931         (opts,args) = pa.parse_args()
932         random.seed()
933
934         if len(args) < 1:
935                 pa.error('need a mode argument')
936
937         if opts.debug_fd is not None:
938                 opts.debug_file = fdopen(opts.debug_fd, 'w')
939
940         mode = args[0]
941         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
942         try: mode_fn = globals()[mode_fn_name]
943         except KeyError: pa.error('unknown mode "%s"' % mode)
944
945         # fixed parameters
946         opts.min_max_age = 60
947         opts.expire_age = 3600
948         opts.ship_reboard_clearout = 3600
949
950         if opts.cache_dir.startswith('~/'):
951                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
952
953         if opts.display is None:
954                 if ((opts.debug > 0 and opts.debug_fd is None)
955                     or not os.isatty(sys.stdout.fileno())):
956                         opts.display = 'dumb'
957                 else:
958                         opts.display = 'overwrite'
959
960         fetcher = Fetcher(opts.ocean, opts.cache_dir)
961
962         mode_fn(args[1:], pa.error)
963
964 main()