chiark / gitweb /
7db7afac44400fe17ae3ae0e4b3a904d994c2c40
[ypp-sc-tools.main.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 from optparse import OptionParser
18
19 from BeautifulSoup import BeautifulSoup
20
21 opts = None
22
23 #---------- YPP parameters and arrays ----------
24
25 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
26         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
27         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
28         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
29
30 standingvals = ('Able/Distinguished/Respected/Master'+
31                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
32
33 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
34
35 max_pirate_namelen = 12
36
37
38 #---------- general utilities ----------
39
40 def debug(m):
41         if opts.debug > 0:
42                 print >>opts.debug_file, m
43
44 def format_time_interval(ti):
45         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
46         if ti < 7200: return '%2dm' % (ti / 60)
47         if ti < 86400: return '%dh' % (ti / 3600)
48         return '%dd' % (ti / 86400)
49
50 #---------- caching and rate-limiting data fetcher ----------
51
52 class Fetcher:
53         def __init__(self, ocean, cachedir):
54                 debug('Fetcher init %s' % cachedir)
55                 self.ocean = ocean
56                 self.cachedir = cachedir
57                 try: os.mkdir(cachedir)
58                 except (OSError,IOError), oe:
59                         if oe.errno != errno.EEXIST: raise
60                 self._cache_scan(time.time())
61
62         def default_ocean(self, ocean='ice'):
63                 if self.ocean is None:
64                         self.ocean = ocean
65
66         def _cache_scan(self, now):
67                 # returns list of ages, unsorted
68                 ages = []
69                 debug('Fetcher   scan_cache')
70                 for leaf in os.listdir(self.cachedir):
71                         if not leaf.startswith('#'): continue
72                         path = self.cachedir + '/' + leaf
73                         try: s = os.stat(path)
74                         except (OSError,IOError), oe:
75                                 if oe.errno != errno.ENOENT: raise
76                                 continue
77                         age = now - s.st_mtime
78                         if age > opts.expire_age:
79                                 debug('Fetcher    expire %d %s' % (age, path))
80                                 try: os.remove(path)
81                                 except (OSError,IOError), oe:
82                                         if oe.errno != errno.ENOENT: raise
83                                 continue
84                         ages.append(age)
85                 return ages
86
87         def need_wait(self, now):
88                 ages = self._cache_scan(now)
89                 ages.sort()
90                 debug('Fetcher   ages ' + `ages`)
91                 min_age = 1
92                 need_wait = 0
93                 for age in ages:
94                         if age < min_age and age < 300:
95                                 debug('Fetcher   morewait min=%d age=%d' %
96                                         (min_age, age))
97                                 need_wait = max(need_wait, min_age - age)
98                         min_age += 3
99                         min_age *= 1.25
100                 return need_wait
101
102         def _rate_limit_cache_clean(self, now):
103                 need_wait = self.need_wait(now)
104                 if need_wait > 0:
105                         debug('Fetcher   wait %d' % need_wait)
106                         time.sleep(need_wait)
107
108         def fetch(self, url, max_age):
109                 debug('Fetcher fetch %s' % url)
110                 cache_corename = urllib.quote_plus(url)
111                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
112                 try: f = file(cache_item, 'r')
113                 except (OSError,IOError), oe:
114                         if oe.errno != errno.ENOENT: raise
115                         f = None
116                 now = time.time()
117                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
118                 if f is not None:
119                         s = os.fstat(f.fileno())
120                         age = now - s.st_mtime
121                         if age > max_age:
122                                 debug('Fetcher  stale %d < %d'% (max_age, age))
123                                 f = None
124                 if f is not None:
125                         data = f.read()
126                         f.close()
127                         debug('Fetcher  cached %d > %d' % (max_age, age))
128                         return data
129
130                 debug('Fetcher  fetch')
131                 self._rate_limit_cache_clean(now)
132
133                 stream = urllib2.urlopen(url)
134                 data = stream.read()
135                 cache_tmp = "%s/#%s~%d#" % (
136                         self.cachedir, cache_corename, os.getpid())
137                 f = file(cache_tmp, 'w')
138                 f.write(data)
139                 f.close()
140                 os.rename(cache_tmp, cache_item)
141                 debug('Fetcher  stored')
142                 return data
143
144         def yoweb(self, kind, tail, max_age):
145                 self.default_ocean()
146                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
147                         self.ocean, kind, tail)
148                 return self.fetch(url, max_age)
149
150 #---------- logging assistance for troubled screenscrapers ----------
151
152 class SoupLog:
153         def __init__(self):
154                 self.msgs = [ ]
155         def msg(self, m):
156                 self.msgs.append(m)
157         def soupm(self, obj, m):
158                 self.msg(m + '; in ' + `obj`)
159         def needs_msgs(self, child_souplog):
160                 self.msgs += child_souplog.msgs
161                 child_souplog.msgs = [ ]
162
163 def soup_text(obj):
164         str = ''.join(obj.findAll(text=True))
165         return str.strip()
166
167 class SomethingSoupInfo(SoupLog):
168         def __init__(self, kind, tail, max_age):
169                 SoupLog.__init__(self)
170                 html = fetcher.yoweb(kind, tail, max_age)
171                 self._soup = BeautifulSoup(html,
172                         convertEntities=BeautifulSoup.HTML_ENTITIES
173                         )
174
175 #---------- scraper for pirate pages ----------
176
177 class PirateInfo(SomethingSoupInfo):
178         # Public data members:
179         #  pi.standings = { 'Treasure Haul': 'Able' ... }
180         #  pi.name = name
181         #  pi.crew = (id, name)
182         #  pi.flag = (id, name)
183         #  pi.msgs = [ 'message describing problem with scrape' ]
184                 
185         def __init__(self, pirate, max_age=300):
186                 SomethingSoupInfo.__init__(self,
187                         'pirate.wm?target=', pirate, max_age)
188                 self.name = pirate
189                 self._find_standings()
190                 self.crew = self._find_crewflag('crew',
191                         '^/yoweb/crew/info\\.wm')
192                 self.flag = self._find_crewflag('flag',
193                         '^/yoweb/flag/info\\.wm')
194
195         def _find_standings(self):
196                 imgs = self._soup.findAll('img',
197                         src=regexp.compile('/yoweb/images/stat.*'))
198                 re = regexp.compile(
199 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
200                         )
201                 standings = { }
202
203                 for skill in puzzles:
204                         standings[skill] = [ ]
205
206                 skl = SoupLog()
207
208                 for img in imgs:
209                         try: puzzle = img['alt']
210                         except KeyError: continue
211
212                         if not puzzle in puzzles:
213                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
214                                 continue
215                         key = img.findParent('td')
216                         if key is None:
217                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
218                                 continue
219                         valelem = key.findNextSibling('td')
220                         if valelem is None:
221                                 skl.soupm(key, 'puzzle missing sibling "%s"'
222                                         % puzzle)
223                                 continue
224                         valstr = soup_text(valelem)
225                         match = re.match(valstr)
226                         if match is None:
227                                 skl.soupm(key, ('puzzle "%s" unparseable'+
228                                         ' standing "%s"') % (puzzle, valstr))
229                                 continue
230                         standing = match.group(match.lastindex)
231                         standings[puzzle].append(standing)
232
233                 self.standings = { }
234
235                 for puzzle in puzzles:
236                         sl = standings[puzzle]
237                         if len(sl) > 1:
238                                 skl.msg('puzzle "%s" multiple standings %s' %
239                                                 (puzzle, `sl`))
240                                 continue
241                         if not sl:
242                                 skl.msg('puzzle "%s" no standing found' % puzzle)
243                                 continue
244                         standing = sl[0]
245                         for i in range(0, len(standingvals)-1):
246                                 if standing == standingvals[i]:
247                                         self.standings[puzzle] = i
248                         if not puzzle in self.standings:
249                                 skl.msg('puzzle "%s" unknown standing "%s"' %
250                                         (puzzle, standing))
251
252                 all_standings_ok = True
253                 for puzzle in puzzles:
254                         if not puzzle in self.standings:
255                                 self.needs_msgs(skl)
256
257         def _find_crewflag(self, cf, yoweb_re):
258                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
259                 if len(things) != 1:
260                         self.msg('zero or several %s id references found' % cf)
261                         return None
262                 thing = things[0]
263                 id_re = '\\b%sid\\=(\\w+)$' % cf
264                 id_haystack = thing['href']
265                 match = regexp.compile(id_re).search(id_haystack)
266                 if match is None:
267                         self.soupm(thing, ('incomprehensible %s id ref'+
268                                 ' (%s in %s)') % (cf, id_re, id_haystack))
269                         return None
270                 name = soup_text(thing)
271                 return (match.group(1), name)
272
273         def __str__(self):
274                 return `(self.crew, self.flag, self.standings, self.msgs)`
275
276 #---------- scraper for crew pages ----------
277
278 class CrewInfo(SomethingSoupInfo):
279         # Public data members:
280         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
281         #              ('Senior Officer', [...]),
282         #               ... ]
283         #  pi.msgs = [ 'message describing problem with scrape' ]
284
285         def __init__(self, crewid, max_age=300):
286                 SomethingSoupInfo.__init__(self,
287                         'crew/info.wm?crewid=', crewid, max_age)
288                 self._find_crew()
289
290         def _find_crew(self):
291                 self.crew = []
292                 capts = self._soup.findAll('img',
293                         src='/yoweb/images/crew-captain.png')
294                 if len(capts) != 1:
295                         self.msg('crew members: no. of captain images != 1')
296                         return
297                 tbl = capts[0]
298                 while not tbl.find('a', href=pirate_ref_re):
299                         tbl = tbl.findParent('table')
300                         if not tbl:
301                                 self.msg('crew members: cannot find table')
302                                 return
303                 current_rank_crew = None
304                 crew_rank_re = regexp.compile('/yoweb/images/crew')
305                 for row in tbl.contents:
306                         # findAll(recurse=False)
307                         if isinstance(row,basestring):
308                                 continue
309
310                         is_rank = row.find('img', attrs={'src': crew_rank_re})
311                         if is_rank:
312                                 rank = soup_text(row)
313                                 current_rank_crew = []
314                                 self.crew.append((rank, current_rank_crew))
315                                 continue
316                         for cell in row.findAll('a', href=pirate_ref_re):
317                                 if current_rank_crew is None:
318                                         self.soupm(cell, 'crew members: crew'
319                                                 ' before rank')
320                                         continue
321                                 current_rank_crew.append(soup_text(cell))
322
323         def __str__(self):
324                 return `(self.crew, self.msgs)`
325
326 #---------- pretty-printer for tables of pirate puzzle standings ----------
327
328 class StandingsTable:
329         def __init__(self, use_puzzles=None, col_width=6):
330                 if use_puzzles is None:
331                         if opts.ship_duty:
332                                 use_puzzles=[
333                                         'Navigating','Battle Navigation',
334                                         'Gunning',
335                                         ['Sailing','Rigging'],
336                                         'Bilging',
337                                         'Carpentry',
338                                         'Treasure Haul'
339                                 ]
340                         else:
341                                 use_puzzles=puzzles
342                 self._puzzles = use_puzzles
343                 self.s = ''
344                 self._cw = col_width-1
345
346         def _pline(self, pirate, puzstrs, extra):
347                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
348                 for v in puzstrs:
349                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
350                 if extra:
351                         self.s += ' ' + extra
352                 self.s += '\n'
353
354         def _puzstr(self, pi, puzzle):
355                 if not isinstance(puzzle,list): puzzle = [puzzle]
356                 try: standing = max([pi.standings[p] for p in puzzle])
357                 except KeyError: return '?'
358                 if not standing: return ''
359                 s = ''
360                 if self._cw > 4:
361                         c1 = standingvals[standing][0]
362                         if standing < 3: c1 = c1.lower() # 3 = Master
363                         s += `standing`
364                 if self._cw > 5:
365                         s += ' '
366                 s += '*' * (standing / 2)
367                 s += '+' * (standing % 2)
368                 return s
369
370         def headings(self):
371                 def puzn_redact(name):
372                         if isinstance(name,list):
373                                 return '/'.join(
374                                         ["%.*s" % (self._cw/2, puzn_redact(n))
375                                          for n in name])
376                         spc = name.find(' ')
377                         if spc < 0: return name
378                         return name[0:min(4,spc)] + name[spc+1:]
379                 self._pline('', map(puzn_redact, self._puzzles), None)
380         def literalline(self, line):
381                 self.s += line + '\n'
382         def pirate_dummy(self, name, standingstring, extra=None):
383                 self._pline(name, standingstring * len(self._puzzles), extra)
384         def pirate(self, pi, extra=None):
385                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
386                 self._pline(pi.name, puzstrs, extra)
387
388         def results(self):
389                 return self.s
390
391 #---------- chat log parser ----------
392
393 class PirateAboard:
394         # This is essentially a transparent, dumb, data class.
395         #  pa.v
396         #  pa.name
397         #  pa.last_time
398         #  pa.last_event
399         #  pa.gunner
400         #  pa.last_chat_time
401         #  pa.last_chat_chan
402         #  pa.pi
403
404         def __init__(pa, pn, v, time, event):
405                 pa.name = pn
406                 pa.v = v
407                 pa.last_time = time
408                 pa.last_event = event
409                 pa.last_chat_time = None
410                 pa.last_chat_chan = None
411                 pa.gunner = False
412                 pa.pi = None
413
414         def pirate_info(pa):
415                 if not pa.pi and not fetcher.need_wait(time.time()):
416                         pa.pi = PirateInfo(pa.name, 3600)
417                 return pa.pi
418
419 class ChatLogTracker:
420         # This is quite complex so we make it opaque.  Use the
421         # official invokers, accessors etc.
422
423         def __init__(self, myself_pi, logfn):
424                 self._pl = {}   # self._pl['Pirate'] =
425                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
426                                 # self._vl['Vessel']['#lastinfo']
427                 self._v = None          # self._v =
428                 self._vessel = None     #       self._vl[self._vessel]
429                 self._date = None
430                 self._myself = myself_pi
431                 self._need_redisplay = False
432                 self._f = file(logfn)
433                 self._lbuf = ''
434                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
435
436         def force_redisplay(self):
437                 self._need_redisplay = True
438
439         def _onboard_event(self,v,timestamp,pirate,event):
440                 pa = self._pl.get(pirate, None)
441                 if pa is not None and pa.v is v:
442                         pa.last_time = timestamp
443                         pa.last_event = event
444                 else:
445                         if pa is not None: del pa.v[pirate]
446                         pa = PirateAboard(pirate, v, timestamp, event)
447                         self._pl[pirate] = pa
448                         v[pirate] = pa
449                 v['#lastinfo'] = timestamp
450                 self.force_redisplay()
451                 return pa
452
453         def _trash_vessel(self, v):
454                 for pn in v:
455                         if pn.startswith('#'): continue
456                         del self._pl[pn]
457                 self.force_redisplay()
458
459         def expire_garbage(self, timestamp):
460                 for (vn,v) in list(self._vl.iteritems()):
461                         la = v['#lastinfo']
462                         if timestamp - la > opts.ship_reboard_clearout:
463                                 self._debug_line_disposition(timestamp,'',
464                                         'stale reset '+vn)
465                                 self._trash_vessel(v)
466                                 del self._vl[vn]
467
468         def _create_vessel(self, vn, timestamp):
469                 self._vl[vn] = v = { '#lastinfo': timestamp }
470                 return v
471
472         def _update_vessel_lookup(self, vn, timestamp, dml):
473                 v = self._vl.get(vn, None)
474                 if v is None:
475                         dml.append('new')
476                         v = self._create_vessel(vn, timestamp)
477                 elif timestamp - v['#lastinfo'] > opts.ship_reboard_clearout:
478                         dml.append('stale')
479                         self._trash_vessel(v)
480                         v = self._create_vessel(vn, timestamp)
481                 else:
482                         dml.append('current')
483                 return v
484
485         def _debug_line_disposition(self,timestamp,l,m):
486                 debug('CLT %13s %-30s %s' % (timestamp,m,l))
487
488         def chatline(self,l):
489                 rm = lambda re: regexp.match(re,l)
490                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
491                 timestamp = None
492
493                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
494                 if m:
495                         self._date = [int(x) for x in m.groups()]
496                         self._previous_timestamp = None
497                         return d('date '+`self._date`)
498
499                 if self._date is None:
500                         return d('date unset')
501
502                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
503                 if not m:
504                         return d('no timestamp')
505
506                 while True:
507                         time_tuple = (self._date +
508                                       [int(x) for x in m.groups()] +
509                                       [-1,-1,-1])
510                         timestamp = time.mktime(time_tuple)
511                         if timestamp >= self._previous_timestamp: break
512                         self._date[2] += 1
513                         self._debug_line_disposition(timestamp,'',
514                                 'new date '+`self._date`)
515
516                 self._previous_timestamp = timestamp
517
518                 l = l[l.find(' ')+1:]
519
520                 def ob_x(pirate,event):
521                         return self._onboard_event(
522                                         self._v, timestamp, pirate, event)
523                 def ob1(did): ob_x(m.group(1), did); return d(did)
524                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
525
526                 def disembark(v, timestamp, pirate, event):
527                         self._onboard_event(
528                                         v, timestamp, pirate, 'leaving '+event)
529                         del v[pirate]
530                         del self._pl[pirate]
531
532                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
533                 if m:
534                         dm = ['boarding']
535                         pn = self._myself.name
536                         self._vessel = vn = m.group(1)
537                         self._v = self._update_vessel_lookup(vn, timestamp, dm)
538
539                         ob_x(pn, 'we boarded')
540                         self.expire_garbage(timestamp)
541                         return d(' '.join(dm))
542
543                 if self._v is None:
544                         return d('no vessel')
545
546                 m = rm('(\\w+) has come aboard\\.$')
547                 if m: return ob1('boarded');
548
549                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
550                 if m:
551                         (who,what) = m.groups()
552                         pa = ob_x(who,'ord '+what)
553                         if what == 'Gunning':
554                                 pa.gunner = True
555                         return d('duty order')
556
557                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
558                 if m: oba('stopped'); return d("end")
559
560                 def chat(what):
561                         who = m.group(1)
562                         try: pa = self._pl[who]
563                         except KeyError: return d('chat mystery')
564                         if pa.v is self._v:
565                                 pa.last_chat_time = timestamp
566                                 pa.last_chat_chan = what
567                                 self.force_redisplay()
568                                 return d('chat '+what)
569
570                 def chat_metacmd(what):
571                         (cmdr, metacmd) = m.groups()
572                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
573                         m2 = regexp.match(
574                                 '/([ad]) (?:([A-Za-z ]+)\\s*:)?([A-Za-z ]+)$',
575                                 metacmd)
576                         if not m2: return chat(what)
577
578                         (cmd, vn, targets) = m2.groups()
579                         dml = ['metachat', cmd]
580
581                         if cmd == 'a': each = self._onboard_event
582                         else: each = disembark
583
584                         if cmdr == self._myself.name:
585                                 dml.append('self')
586                                 how = 'manual: /%s' % cmd
587                         else:
588                                 dml.append('other')
589                                 how = '/%s %s' % (cmd,cmdr)
590
591                         v = None
592                         if vn is not None and len(vn.split(' ')) == 2:
593                                 v = self._update_vessel_lookup(
594                                         vn.title(), timestamp, dml)
595                         elif self._v is None:
596                                 dml.append('no-current')
597                         elif vn is None:
598                                 dml.append('current')
599                                 v = self._v
600                         elif regexp.match('(?:.* )?%s$' % vn.title(),
601                                         self._vessel):
602                                 dml.append('match')
603                                 v = self._v
604                         else:
605                                 dml.append('unk-abbrev')
606
607                         if v is None:
608                                 return d(' '.join(dml))
609
610                         targets = targets.strip().split(' ')
611                         dml.append(`len(targets)`)
612                         for target in targets:
613                                 each(v, timestamp, target.title(), how)
614
615                         return d(' '.join(dml))
616
617                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
618                 if m: return ob1('general order');
619
620                 m = rm('(\\w+) says, "')
621                 if m: return chat('public')
622
623                 m = rm('(\\w+) tells ye, "')
624                 if m: return chat('private')
625
626                 m = rm('Ye told (\\w+), "(.*)"$')
627                 if m: return chat_metacmd('private')
628
629                 m = rm('(\\w+) flag officer chats, "')
630                 if m: return chat('flag officer')
631
632                 m = rm('(\\w+) officer chats, "(.*)"$')
633                 if m: return chat_metacmd('officer')
634
635                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
636                 if m:
637                         pl = m.group(1).split(', ')
638                         if not self._myself.name in pl:
639                                 return d('lost boarding battle')
640                         for pn in pl:
641                                 if ' ' in pn: continue
642                                 ob_x(pn,'won boarding battle')
643                         return d('won boarding battle')
644
645                 m = rm('(\\w+) is eliminated\\!')
646                 if m: return ob1('eliminated in fray');
647
648                 m = rm('(\\w+) has left the vessel\.')
649                 if m:
650                         pirate = m.group(1)
651                         disembark(self._v, timestamp, pirate, 'disembarked')
652                         return d('disembarked')
653
654                 return d('not matched')
655
656         def _str_vessel(self, vn, v):
657                 s = ' vessel %s\n' % vn
658                 s += ' '*20 + "%-*s   %13s\n" % (
659                                 max_pirate_namelen, '#lastinfo',
660                                 v['#lastinfo'])
661                 for pn in sorted(v.keys()):
662                         if pn.startswith('#'): continue
663                         pa = v[pn]
664                         assert pa.v == v
665                         assert self._pl[pn] == pa
666                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
667                                 (' ','G')[pa.gunner],
668                                 max_pirate_namelen, pn,
669                                 pa.last_time, pa.last_event,
670                                 pa.last_chat_time, pa.last_chat_chan)
671                 return s
672
673         def __str__(self):
674                 s = '''<ChatLogTracker
675  myself %s
676  vessel %s
677 '''                     % (self._myself.name, self._vessel)
678                 assert ((self._v is None and self._vessel is None) or
679                         (self._v is self._vl[self._vessel]))
680                 if self._vessel is not None:
681                         s += self._str_vessel(self._vessel, self._v)
682                 for vn in sorted(self._vl.keys()):
683                         if vn == self._vessel: continue
684                         s += self._str_vessel(vn, self._vl[vn])
685                 for p in self._pl:
686                         pa = self._pl[p]
687                         assert pa.v[p] is pa
688                         assert pa.v in self._vl.values()
689                 s += '>\n'
690                 return s
691
692         def catchup(self, progress=None):
693                 while True:
694                         more = self._f.readline()
695                         if not more: break
696
697                         self._progress[0] += len(more)
698                         if progress: progress.progress(*self._progress)
699
700                         self._lbuf += more
701                         if self._lbuf.endswith('\n'):
702                                 self.chatline(self._lbuf.rstrip())
703                                 self._lbuf = ''
704                                 if opts.debug >= 2:
705                                         debug(self.__str__())
706                 if progress: progress.caughtup()
707
708         def changed(self):
709                 rv = self._need_redisplay
710                 self._need_redisplay = False
711                 return rv
712         def myname(self):
713                 # returns our pirate name
714                 return self._myself.name
715         def vessel(self):
716                 # returns the vessel we're aboard or None
717                 return self._vessel
718         def aboard(self):
719                 # returns a list of PirateAboard sorted by name
720                 if self._v is None: return []
721                 return [ self._v[pn]
722                          for pn in sorted(self._v.keys())
723                          if not pn.startswith('#') ]
724
725 #---------- implementations of actual operation modes ----------
726
727 def do_pirate(pirates, bu):
728         print '{'
729         for pirate in pirates:
730                 info = PirateInfo(pirate)
731                 print '%s: %s,' % (`pirate`, info)
732         print '}'
733
734 def prep_crew_of(args, bu, max_age=300):
735         if len(args) != 1: bu('crew-of takes one pirate name')
736         pi = PirateInfo(args[0], max_age)
737         if pi.crew is None: return None
738         return CrewInfo(pi.crew[0], max_age)
739
740 def do_crew_of(args, bu):
741         ci = prep_crew_of(args, bu)
742         print ci
743
744 def do_standings_crew_of(args, bu):
745         ci = prep_crew_of(args, bu, 60)
746         tab = StandingsTable()
747         tab.headings()
748         for (rank, members) in ci.crew:
749                 if not members: continue
750                 tab.literalline('%s:' % rank)
751                 for p in members:
752                         pi = PirateInfo(p, random.randint(900,1800))
753                         tab.pirate(pi)
754         print tab.results()
755
756 class ProgressPrintPercentage:
757         def __init__(self, f=sys.stdout):
758                 self._f = f
759         def progress_string(self,done,total):
760                 return "scan chat logs %3d%%\r" % ((done*100) / total)
761         def progress(self,*a):
762                 self._f.write(self.progress_string(*a))
763                 self._f.flush()
764         def show_init(self, pirate, ocean):
765                 print >>self._f, 'Starting up, %s on the %s ocean' % (
766                         pirate, ocean)
767         def caughtup(self):
768                 self._f.write('                   \r')
769                 self._f.flush()
770
771 #----- modes which use the chat log parser are quite complex -----
772
773 def prep_chat_log(args, bu,
774                 progress=ProgressPrintPercentage(),
775                 max_myself_age=3600):
776         if len(args) != 1: bu('this action takes only chat log filename')
777         logfn = args[0]
778         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
779         match = regexp.match(logfn_re, logfn)
780         if not match: bu('chat log filename is not in expected format')
781         (pirate, ocean) = match.groups()
782         fetcher.default_ocean(ocean)
783         
784         myself = PirateInfo(pirate,max_myself_age)
785         progress.show_init(pirate, fetcher.ocean)
786         track = ChatLogTracker(myself, logfn)
787
788         opts.debug -= 2
789         track.catchup(progress)
790         opts.debug += 2
791
792         track.force_redisplay()
793
794         return (myself, track)
795
796 def do_track_chat_log(args, bu):
797         (myself, track) = prep_chat_log(args, bu)
798         while True:
799                 track.catchup()
800                 if track.changed():
801                         print track
802                 time.sleep(1)
803
804 #----- ship management aid -----
805
806 class Display_dumb(ProgressPrintPercentage):
807         def __init__(self):
808                 ProgressPrintPercentage.__init__(self)
809         def show(self, s):
810                 print '\n\n', s;
811         def realstart(self):
812                 pass
813
814 class Display_overwrite(ProgressPrintPercentage):
815         def __init__(self):
816                 ProgressPrintPercentage.__init__(self)
817
818                 null = file('/dev/null','w')
819                 curses.setupterm(fd=null.fileno())
820
821                 self._clear = curses.tigetstr('clear')
822                 if not self._clear:
823                         self._debug('missing clear!')
824                         self.show = Display_dumb.show
825                         return
826
827                 self._t = {'el':'', 'ed':''}
828                 if not self._init_sophisticated():
829                         for k in self._t.keys(): self._t[k] = ''
830                         self._t['ho'] = self._clear
831
832         def _debug(self,m): debug('display overwrite: '+m)
833
834         def _init_sophisticated(self):
835                 for k in self._t.keys():
836                         s = curses.tigetstr(k)
837                         self._t[k] = s
838                 self._t['ho'] = curses.tigetstr('ho')
839                 if not self._t['ho']:
840                         cup = curses.tigetstr('cup')
841                         self._t['ho'] = curses.tparm(cup,0,0)
842                 missing = [k for k in self._t.keys() if not self._t[k]]
843                 if missing:
844                         self.debug('missing '+(' '.join(missing)))
845                         return 0
846                 return 1
847
848         def show(self, s):
849                 w = sys.stdout.write
850                 def wti(k): w(self._t[k])
851
852                 wti('ho')
853                 nl = ''
854                 for l in s.rstrip().split('\n'):
855                         w(nl)
856                         w(l)
857                         wti('el')
858                         nl = '\r\n'
859                 wti('ed')
860                 w(' ')
861                 sys.stdout.flush()
862
863         def realstart(self):
864                 sys.stdout.write(self._clear)
865                 sys.stdout.flush()
866                         
867
868 def do_ship_aid(args, bu):
869         if opts.ship_duty is None: opts.ship_duty = True
870
871         displayer = globals()['Display_'+opts.display]()
872         rotate_nya = '/-\\'
873
874         (myself, track) = prep_chat_log(args, bu, progress=displayer)
875
876         def timeevent(t,e):
877                 if t is None: return ' ' * 22
878                 return " %-4s %-16s" % (format_time_interval(now - t),e)
879
880         displayer.realstart()
881
882         while True:
883                 track.catchup()
884                 now = time.time()
885
886                 s = "%s" % track.myname()
887
888                 vn = track.vessel()
889                 if vn is None: s += " not on a vessel?!"
890                 else: s += " on board the %s" % vn
891                 s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")
892
893                 tbl = StandingsTable()
894                 tbl.headings()
895
896                 for pa in track.aboard():
897                         pi = pa.pirate_info()
898
899                         xs = ''
900                         if pa.gunner: xs += 'G '
901                         else: xs += '  '
902                         xs += timeevent(pa.last_time, pa.last_event)
903                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
904
905                         if pi is None:
906                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
907                         else:
908                                 tbl.pirate(pi, xs)
909
910                 s += tbl.results()
911
912                 displayer.show(s)
913                 time.sleep(1)
914                 rotate_nya = rotate_nya[1:3] + rotate_nya[0]
915
916 #---------- main program ----------
917
918 def main():
919         global opts, fetcher
920
921         pa = OptionParser(
922 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
923 actions:
924  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
925  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
926  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
927  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
928  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
929
930 display modes (for --display) apply to ship-aid:
931  --display=dumb       just print new information, scrolling the screen
932  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
933 ''')
934         ao = pa.add_option
935         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
936                 help='select ocean OCEAN')
937         ao('--cache-dir', dest='cache_dir', metavar='DIR',
938                 default='~/.yoweb-scrape-cache',
939                 help='cache yoweb pages in DIR')
940         ao('-D','--debug', action='count', dest='debug', default=0,
941                 help='enable debugging output')
942         ao('--debug-fd', type='int', dest='debug_fd',
943                 help='write any debugging output to specified fd')
944         ao('-q','--quiet', action='store_true', dest='quiet',
945                 help='suppress warning output')
946         ao('--display', action='store', dest='display',
947                 type='choice', choices=['dumb','overwrite'],
948                 help='how to display ship aid')
949
950         ao('--ship-duty', action='store_true', dest='ship_duty',
951                 help='show ship duty station puzzles')
952         ao('--all-puzzles', action='store_false', dest='ship_duty',
953                 help='show all puzzles, not just ship duty stations')
954
955         (opts,args) = pa.parse_args()
956         random.seed()
957
958         if len(args) < 1:
959                 pa.error('need a mode argument')
960
961         if opts.debug_fd is not None:
962                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
963         else:
964                 opts.debug_file = sys.stdout
965
966         mode = args[0]
967         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
968         try: mode_fn = globals()[mode_fn_name]
969         except KeyError: pa.error('unknown mode "%s"' % mode)
970
971         # fixed parameters
972         opts.min_max_age = 60
973         opts.expire_age = 3600
974         opts.ship_reboard_clearout = 3600
975
976         if opts.cache_dir.startswith('~/'):
977                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
978
979         if opts.display is None:
980                 if ((opts.debug > 0 and opts.debug_fd is None)
981                     or not os.isatty(sys.stdout.fileno())):
982                         opts.display = 'dumb'
983                 else:
984                         opts.display = 'overwrite'
985
986         fetcher = Fetcher(opts.ocean, opts.cache_dir)
987
988         mode_fn(args[1:], pa.error)
989
990 main()