chiark / gitweb /
3567f0dcd08d154904a67162dce9f87cca983ada
[ypp-sc-tools.db-test.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 import termios
18 from optparse import OptionParser
19
20 from BeautifulSoup import BeautifulSoup
21
22 opts = None
23
24 #---------- YPP parameters and arrays ----------
25
26 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
27         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
28         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
29         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
30
31 standingvals = ('Able/Distinguished/Respected/Master'+
32                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
33
34 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
35
36 max_pirate_namelen = 12
37
38
39 #---------- general utilities ----------
40
41 def debug(m):
42         if opts.debug > 0:
43                 print >>opts.debug_file, m
44
45 def debug_flush():
46         if opts.debug > 0:
47                 opts.debug_file.flush() 
48
49 def sleep(seconds):
50         debug_flush()
51         time.sleep(seconds)
52
53 def format_time_interval(ti):
54         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
55         if ti < 7200: return '%2dm' % (ti / 60)
56         if ti < 86400: return '%dh' % (ti / 3600)
57         return '%dd' % (ti / 86400)
58
59 #---------- caching and rate-limiting data fetcher ----------
60
61 class Fetcher:
62         def __init__(self, ocean, cachedir):
63                 debug('Fetcher init %s' % cachedir)
64                 self.ocean = ocean
65                 self.cachedir = cachedir
66                 try: os.mkdir(cachedir)
67                 except (OSError,IOError), oe:
68                         if oe.errno != errno.EEXIST: raise
69                 self._cache_scan(time.time())
70
71         def default_ocean(self, ocean='ice'):
72                 if self.ocean is None:
73                         self.ocean = ocean
74
75         def _cache_scan(self, now):
76                 # returns list of ages, unsorted
77                 ages = []
78                 debug('Fetcher   scan_cache')
79                 for leaf in os.listdir(self.cachedir):
80                         if not leaf.startswith('#'): continue
81                         path = self.cachedir + '/' + leaf
82                         try: s = os.stat(path)
83                         except (OSError,IOError), oe:
84                                 if oe.errno != errno.ENOENT: raise
85                                 continue
86                         age = now - s.st_mtime
87                         if age > opts.expire_age:
88                                 debug('Fetcher    expire %d %s' % (age, path))
89                                 try: os.remove(path)
90                                 except (OSError,IOError), oe:
91                                         if oe.errno != errno.ENOENT: raise
92                                 continue
93                         ages.append(age)
94                 return ages
95
96         def need_wait(self, now, imaginary=[]):
97                 ages = self._cache_scan(now)
98                 ages += imaginary
99                 ages.sort()
100                 debug('Fetcher   ages ' + `ages`)
101                 min_age = 1
102                 need_wait = 0
103                 for age in ages:
104                         if age < min_age and age < 300:
105                                 debug('Fetcher   morewait min=%d age=%d' %
106                                         (min_age, age))
107                                 need_wait = max(need_wait, min_age - age)
108                         min_age += 3
109                         min_age *= 1.25
110                 return need_wait
111
112         def _rate_limit_cache_clean(self, now):
113                 need_wait = self.need_wait(now)
114                 if need_wait > 0:
115                         debug('Fetcher   wait %d' % need_wait)
116                         sleep(need_wait)
117
118         def fetch(self, url, max_age):
119                 debug('Fetcher fetch %s' % url)
120                 cache_corename = urllib.quote_plus(url)
121                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
122                 try: f = file(cache_item, 'r')
123                 except (OSError,IOError), oe:
124                         if oe.errno != errno.ENOENT: raise
125                         f = None
126                 now = time.time()
127                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
128                 if f is not None:
129                         s = os.fstat(f.fileno())
130                         age = now - s.st_mtime
131                         if age > max_age:
132                                 debug('Fetcher  stale %d < %d'% (max_age, age))
133                                 f = None
134                 if f is not None:
135                         data = f.read()
136                         f.close()
137                         debug('Fetcher  cached %d > %d' % (max_age, age))
138                         return data
139
140                 debug('Fetcher  fetch')
141                 self._rate_limit_cache_clean(now)
142
143                 stream = urllib2.urlopen(url)
144                 data = stream.read()
145                 cache_tmp = "%s/#%s~%d#" % (
146                         self.cachedir, cache_corename, os.getpid())
147                 f = file(cache_tmp, 'w')
148                 f.write(data)
149                 f.close()
150                 os.rename(cache_tmp, cache_item)
151                 debug('Fetcher  stored')
152                 return data
153
154         def yoweb(self, kind, tail, max_age):
155                 self.default_ocean()
156                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
157                         self.ocean, kind, tail)
158                 return self.fetch(url, max_age)
159
160 #---------- logging assistance for troubled screenscrapers ----------
161
162 class SoupLog:
163         def __init__(self):
164                 self.msgs = [ ]
165         def msg(self, m):
166                 self.msgs.append(m)
167         def soupm(self, obj, m):
168                 self.msg(m + '; in ' + `obj`)
169         def needs_msgs(self, child_souplog):
170                 self.msgs += child_souplog.msgs
171                 child_souplog.msgs = [ ]
172
173 def soup_text(obj):
174         str = ''.join(obj.findAll(text=True))
175         return str.strip()
176
177 class SomethingSoupInfo(SoupLog):
178         def __init__(self, kind, tail, max_age):
179                 SoupLog.__init__(self)
180                 html = fetcher.yoweb(kind, tail, max_age)
181                 self._soup = BeautifulSoup(html,
182                         convertEntities=BeautifulSoup.HTML_ENTITIES
183                         )
184
185 #---------- scraper for pirate pages ----------
186
187 class PirateInfo(SomethingSoupInfo):
188         # Public data members:
189         #  pi.standings = { 'Treasure Haul': 'Able' ... }
190         #  pi.name = name
191         #  pi.crew = (id, name)
192         #  pi.flag = (id, name)
193         #  pi.msgs = [ 'message describing problem with scrape' ]
194                 
195         def __init__(self, pirate, max_age=300):
196                 SomethingSoupInfo.__init__(self,
197                         'pirate.wm?target=', pirate, max_age)
198                 self.name = pirate
199                 self._find_standings()
200                 self.crew = self._find_crewflag('crew',
201                         '^/yoweb/crew/info\\.wm')
202                 self.flag = self._find_crewflag('flag',
203                         '^/yoweb/flag/info\\.wm')
204
205         def _find_standings(self):
206                 imgs = self._soup.findAll('img',
207                         src=regexp.compile('/yoweb/images/stat.*'))
208                 re = regexp.compile(
209 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
210                         )
211                 standings = { }
212
213                 for skill in puzzles:
214                         standings[skill] = [ ]
215
216                 skl = SoupLog()
217
218                 for img in imgs:
219                         try: puzzle = img['alt']
220                         except KeyError: continue
221
222                         if not puzzle in puzzles:
223                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
224                                 continue
225                         key = img.findParent('td')
226                         if key is None:
227                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
228                                 continue
229                         valelem = key.findNextSibling('td')
230                         if valelem is None:
231                                 skl.soupm(key, 'puzzle missing sibling "%s"'
232                                         % puzzle)
233                                 continue
234                         valstr = soup_text(valelem)
235                         match = re.match(valstr)
236                         if match is None:
237                                 skl.soupm(key, ('puzzle "%s" unparseable'+
238                                         ' standing "%s"') % (puzzle, valstr))
239                                 continue
240                         standing = match.group(match.lastindex)
241                         standings[puzzle].append(standing)
242
243                 self.standings = { }
244
245                 for puzzle in puzzles:
246                         sl = standings[puzzle]
247                         if len(sl) > 1:
248                                 skl.msg('puzzle "%s" multiple standings %s' %
249                                                 (puzzle, `sl`))
250                                 continue
251                         if not sl:
252                                 skl.msg('puzzle "%s" no standing found' % puzzle)
253                                 continue
254                         standing = sl[0]
255                         for i in range(0, len(standingvals)):
256                                 if standing == standingvals[i]:
257                                         self.standings[puzzle] = i
258                         if not puzzle in self.standings:
259                                 skl.msg('puzzle "%s" unknown standing "%s"' %
260                                         (puzzle, standing))
261
262                 all_standings_ok = True
263                 for puzzle in puzzles:
264                         if not puzzle in self.standings:
265                                 self.needs_msgs(skl)
266
267         def _find_crewflag(self, cf, yoweb_re):
268                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
269                 if len(things) != 1:
270                         self.msg('zero or several %s id references found' % cf)
271                         return None
272                 thing = things[0]
273                 id_re = '\\b%sid\\=(\\w+)$' % cf
274                 id_haystack = thing['href']
275                 match = regexp.compile(id_re).search(id_haystack)
276                 if match is None:
277                         self.soupm(thing, ('incomprehensible %s id ref'+
278                                 ' (%s in %s)') % (cf, id_re, id_haystack))
279                         return None
280                 name = soup_text(thing)
281                 return (match.group(1), name)
282
283         def __str__(self):
284                 return `(self.crew, self.flag, self.standings, self.msgs)`
285
286 #---------- scraper for crew pages ----------
287
288 class CrewInfo(SomethingSoupInfo):
289         # Public data members:
290         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
291         #              ('Senior Officer', [...]),
292         #               ... ]
293         #  pi.msgs = [ 'message describing problem with scrape' ]
294
295         def __init__(self, crewid, max_age=300):
296                 SomethingSoupInfo.__init__(self,
297                         'crew/info.wm?crewid=', crewid, max_age)
298                 self._find_crew()
299
300         def _find_crew(self):
301                 self.crew = []
302                 capts = self._soup.findAll('img',
303                         src='/yoweb/images/crew-captain.png')
304                 if len(capts) != 1:
305                         self.msg('crew members: no. of captain images != 1')
306                         return
307                 tbl = capts[0]
308                 while not tbl.find('a', href=pirate_ref_re):
309                         tbl = tbl.findParent('table')
310                         if not tbl:
311                                 self.msg('crew members: cannot find table')
312                                 return
313                 current_rank_crew = None
314                 crew_rank_re = regexp.compile('/yoweb/images/crew')
315                 for row in tbl.contents:
316                         # findAll(recurse=False)
317                         if isinstance(row,basestring):
318                                 continue
319
320                         is_rank = row.find('img', attrs={'src': crew_rank_re})
321                         if is_rank:
322                                 rank = soup_text(row)
323                                 current_rank_crew = []
324                                 self.crew.append((rank, current_rank_crew))
325                                 continue
326                         for cell in row.findAll('a', href=pirate_ref_re):
327                                 if current_rank_crew is None:
328                                         self.soupm(cell, 'crew members: crew'
329                                                 ' before rank')
330                                         continue
331                                 current_rank_crew.append(soup_text(cell))
332
333         def __str__(self):
334                 return `(self.crew, self.msgs)`
335
336 #---------- pretty-printer for tables of pirate puzzle standings ----------
337
338 class StandingsTable:
339         def __init__(self, use_puzzles=None, col_width=6):
340                 if use_puzzles is None:
341                         if opts.ship_duty:
342                                 use_puzzles=[
343                                         'Navigating','Battle Navigation',
344                                         'Gunning',
345                                         ['Sailing','Rigging'],
346                                         'Bilging',
347                                         'Carpentry',
348                                         'Treasure Haul'
349                                 ]
350                         else:
351                                 use_puzzles=puzzles
352                 self._puzzles = use_puzzles
353                 self.s = ''
354                 self._cw = col_width-1
355
356         def _pline(self, pirate, puzstrs, extra):
357                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
358                 for v in puzstrs:
359                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
360                 if extra:
361                         self.s += ' ' + extra
362                 self.s += '\n'
363
364         def _puzstr(self, pi, puzzle):
365                 if not isinstance(puzzle,list): puzzle = [puzzle]
366                 try: standing = max([pi.standings[p] for p in puzzle])
367                 except KeyError: return '?'
368                 if not standing: return ''
369                 s = ''
370                 if self._cw > 4:
371                         c1 = standingvals[standing][0]
372                         if standing < 3: c1 = c1.lower() # 3 = Master
373                         s += `standing`
374                 if self._cw > 5:
375                         s += ' '
376                 s += '*' * (standing / 2)
377                 s += '+' * (standing % 2)
378                 return s
379
380         def headings(self):
381                 def puzn_redact(name):
382                         if isinstance(name,list):
383                                 return '/'.join(
384                                         ["%.*s" % (self._cw/2, puzn_redact(n))
385                                          for n in name])
386                         spc = name.find(' ')
387                         if spc < 0: return name
388                         return name[0:min(4,spc)] + name[spc+1:]
389                 self._pline('', map(puzn_redact, self._puzzles), None)
390         def literalline(self, line):
391                 self.s += line + '\n'
392         def pirate_dummy(self, name, standingstring, extra=None):
393                 self._pline(name, standingstring * len(self._puzzles), extra)
394         def pirate(self, pi, extra=None):
395                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
396                 self._pline(pi.name, puzstrs, extra)
397
398         def results(self):
399                 return self.s
400
401 #---------- chat log parser ----------
402
403 class PirateAboard:
404         # This is essentially a transparent, dumb, data class.
405         #  pa.v
406         #  pa.name
407         #  pa.last_time
408         #  pa.last_event
409         #  pa.gunner
410         #  pa.last_chat_time
411         #  pa.last_chat_chan
412         #  pa.pi
413
414         def __init__(pa, pn, v, time, event):
415                 pa.name = pn
416                 pa.v = v
417                 pa.last_time = time
418                 pa.last_event = event
419                 pa.last_chat_time = None
420                 pa.last_chat_chan = None
421                 pa.gunner = False
422                 pa.pi = None
423
424         def pirate_info(pa):
425                 now = time.time()
426                 if pa.pi:
427                         age = now - pa.pi_fetched
428                         guide = random.randint(120,240)
429                         if age <= guide:
430                                 return pa.pi
431                         debug('PirateAboard refresh %d > %d  %s' % (
432                                 age, guide, pa.name))
433                         imaginary = [2,6]
434                 else:
435                         imaginary = [1]
436                 wait = fetcher.need_wait(now, imaginary)
437                 if wait:
438                         debug('PirateAboard fetcher not ready %d' % wait)
439                         return pa.pi
440                 pa.pi = PirateInfo(pa.name, 600)
441                 pa.pi_fetched = now
442                 return pa.pi
443
444 class ChatLogTracker:
445         # This is quite complex so we make it opaque.  Use the
446         # official invokers, accessors etc.
447
448         def __init__(self, myself_pi, logfn):
449                 self._pl = {}   # self._pl['Pirate'] =
450                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
451                                 # self._vl['Vessel']['#lastinfo']
452                                 # self._vl['Vessel']['#name']
453                                 # self._v = self._vl[self._vessel]
454                 self._date = None
455                 self._myself = myself_pi
456                 self._f = file(logfn)
457                 self._lbuf = ''
458                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
459                 self._disembark_myself()
460                 self._need_redisplay = False
461                 self._lastvessel = None
462
463         def _disembark_myself(self):
464                 self._v = None
465                 self._vessel = None
466                 self.force_redisplay()
467
468         def force_redisplay(self):
469                 self._need_redisplay = True
470
471         def _vessel_updated(self, v, timestamp):
472                 v['#lastinfo'] = timestamp
473                 self.force_redisplay()
474
475         def _onboard_event(self,v,timestamp,pirate,event):
476                 pa = self._pl.get(pirate, None)
477                 if pa is not None and pa.v is v:
478                         pa.last_time = timestamp
479                         pa.last_event = event
480                 else:
481                         if pa is not None: del pa.v[pirate]
482                         pa = PirateAboard(pirate, v, timestamp, event)
483                         self._pl[pirate] = pa
484                         v[pirate] = pa
485                 self._vessel_updated(v, timestamp)
486                 return pa
487
488         def _trash_vessel(self, v):
489                 for pn in v:
490                         if pn.startswith('#'): continue
491                         del self._pl[pn]
492                 vn = v['#name']
493                 del self._vl[vn]
494                 if v is self._v: self._disembark_myself()
495                 self.force_redisplay()
496
497         def _vessel_stale(self, v, timestamp):
498                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
499
500         def _vessel_check_expire(self, v, timestamp):
501                 if not self._vessel_stale(v, timestamp):
502                         return v
503                 self._debug_line_disposition(timestamp,'',
504                         'stale-reset ' + v['#name'])
505                 self._trash_vessel(v)
506                 return None
507
508         def expire_garbage(self, timestamp):
509                 for v in self._vl.values():
510                         self._vessel_check_expire(v, timestamp)
511
512         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
513                 v = self._vl.get(vn, None)
514                 if v is not None:
515                         v = self._vessel_check_expire(v, timestamp)
516                 if v is not None:
517                         dml.append('found')
518                         return v
519                 if not create:
520                         dml.append('no')
521                 dml.append('new')
522                 self._vl[vn] = v = { '#name': vn }
523                 self._vessel_updated(v, timestamp)
524                 return v
525
526         def _find_matching_vessel(self, pattern, timestamp, cmdr,
527                                         dml=[], create=False):
528                 # use when a commander pirate `cmdr' specified a vessel
529                 #  by name `pattern' (either may be None)
530                 # if create is true, will create the vessel
531                 #  record if an exact name is specified
532
533                 if (pattern is not None and
534                     not '*' in pattern
535                     and len(pattern.split(' ')) == 2):
536                         vn = pattern.title()
537                         dml.append('exact')
538                         return self._vessel_lookup(
539                                 vn, timestamp, dml=dml, create=create)
540
541                 if pattern is None:
542                         pattern_check = lambda vn: True
543                 else:
544                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.+')
545                         pattern_check = regexp.compile(re, regexp.I).match
546
547                 tries = []
548
549                 cmdr_pa = self._pl.get(cmdr, None)
550                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
551
552                 tries.append((self._v, 'here'))
553                 tried_vns = []
554
555                 for (v, dm) in tries:
556                         if v is None: dml.append(dm+'?'); continue
557                         
558                         vn = v['#name']
559                         if not pattern_check(vn):
560                                 tried_vns.append(vn)
561                                 dml.append(dm+'#')
562                                 continue
563
564                         dml.append(dm+'!')
565                         return v
566
567                 if pattern is not None and '*' in pattern:
568                         search = [
569                                 (vn,v)
570                                 for (vn,v) in self._vl.iteritems()
571                                 if not self._vessel_stale(v, timestamp)
572                                 if pattern_check(vn)
573                                 ]
574                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
575                         #       re,
576                         #       '/'.join(tried_vns),
577                         #       '/'.join([vn for (vn,v) in search])))
578
579                         if len(search)==1:
580                                 dml.append('one')
581                                 return search[0][1]
582                         elif search:
583                                 dml.append('many')
584                         else:
585                                 dml.append('none')
586
587         def _debug_line_disposition(self,timestamp,l,m):
588                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
589
590         def chatline(self,l):
591                 rm = lambda re: regexp.match(re,l)
592                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
593                 timestamp = None
594
595                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
596                 if m:
597                         self._date = [int(x) for x in m.groups()]
598                         self._previous_timestamp = None
599                         return d('date '+`self._date`)
600
601                 if self._date is None:
602                         return d('date unset')
603
604                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
605                 if not m:
606                         return d('no timestamp')
607
608                 while True:
609                         time_tuple = (self._date +
610                                       [int(x) for x in m.groups()] +
611                                       [-1,-1,-1])
612                         timestamp = time.mktime(time_tuple)
613                         if timestamp >= self._previous_timestamp: break
614                         self._date[2] += 1
615                         self._debug_line_disposition(timestamp,'',
616                                 'new date '+`self._date`)
617
618                 self._previous_timestamp = timestamp
619
620                 l = l[l.find(' ')+1:]
621
622                 def ob_x(pirate,event):
623                         return self._onboard_event(
624                                         self._v, timestamp, pirate, event)
625                 def ob1(did): ob_x(m.group(1), did); return d(did)
626                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
627
628                 def disembark(v, timestamp, pirate, event):
629                         self._onboard_event(
630                                         v, timestamp, pirate, 'leaving '+event)
631                         del v[pirate]
632                         del self._pl[pirate]
633
634                 def disembark_me(why):
635                         self._disembark_myself()
636                         return d('disembark-me '+why)
637
638                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
639                 if m:
640                         dm = ['boarding']
641                         pn = self._myself.name
642                         vn = m.group(1)
643                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
644                         self._lastvessel = self._vessel = vn
645                         self._v = v
646                         ob_x(pn, 'we boarded')
647                         self.expire_garbage(timestamp)
648                         return d(' '.join(dm))
649
650                 if self._v is None:
651                         return d('no vessel')
652
653                 m = rm('(\\w+) has come aboard\\.$')
654                 if m: return ob1('boarded');
655
656                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
657                 if m:
658                         (who,what) = m.groups()
659                         pa = ob_x(who,'ord '+what)
660                         if what == 'Gunning':
661                                 pa.gunner = True
662                         return d('duty order')
663
664                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
665                 if m: oba('stopped'); return d("end")
666
667                 def chat_core(speaker, chan):
668                         try: pa = self._pl[speaker]
669                         except KeyError: return 'mystery'
670                         if pa.v is not self._v: return 'elsewhere'
671                         pa.last_chat_time = timestamp
672                         pa.last_chat_chan = chan
673                         self.force_redisplay()
674                         return 'here'
675
676                 def chat(chan):
677                         speaker = m.group(1)
678                         dm = chat_core(speaker, chan)
679                         return d('chat %s %s' % (chan, dm))
680
681                 def chat_metacmd(chan):
682                         (cmdr, metacmd) = m.groups()
683                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
684                         m2 = regexp.match(
685                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
686                                 metacmd)
687                         if not m2: return chat(chan)
688
689                         (cmd, pattern, targets) = m2.groups()
690                         dml = ['cmd', chan, cmd]
691
692                         if cmd == 'a': each = self._onboard_event
693                         else: each = disembark
694
695                         if cmdr == self._myself.name:
696                                 dml.append('self')
697                                 how = 'cmd: %s' % cmd
698                         else:
699                                 dml.append('other')
700                                 how = 'cmd: %s %s' % (cmd,cmdr)
701
702                         v = self._find_matching_vessel(
703                                 pattern, timestamp, cmdr, dml, create=True)
704
705                         if v is not None:
706                                 targets = targets.strip().split(' ')
707                                 dml.append(`len(targets)`)
708                                 for target in targets:
709                                         each(v, timestamp, target.title(), how)
710                                 self._vessel_updated(v, timestamp)
711
712                         dm = ' '.join(dml)
713                         chat_core(cmdr, 'cmd '+chan)
714                         return d(dm)
715
716                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
717                 if m: return ob1('general order');
718
719                 m = rm('(\\w+) says, "')
720                 if m: return chat('public')
721
722                 m = rm('(\\w+) tells ye, "')
723                 if m: return chat('private')
724
725                 m = rm('Ye told (\\w+), "(.*)"$')
726                 if m: return chat_metacmd('private')
727
728                 m = rm('(\\w+) flag officer chats, "')
729                 if m: return chat('flag officer')
730
731                 m = rm('(\\w+) officer chats, "(.*)"$')
732                 if m: return chat_metacmd('officer')
733
734                 m = rm('Ye accepted the offer to job with ')
735                 if m: return disembark_me('jobbing')
736
737                 m = rm('Ye hop on the ferry and are whisked away ')
738                 if m: return disembark_me('ferry')
739
740                 m = rm('Whisking away to yer home on the magical winds')
741                 if m: return disembark_me('home')
742
743                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
744                 if m:
745                         pl = m.group(1).split(', ')
746                         if not self._myself.name in pl:
747                                 return d('lost melee')
748                         for pn in pl:
749                                 if ' ' in pn: continue
750                                 ob_x(pn,'won melee')
751                         return d('won melee')
752
753                 m = rm('(\\w+) is eliminated\\!')
754                 if m: return ob1('eliminated in fray');
755
756                 m = rm('(\\w+) has driven \w+ from the ship\\!')
757                 if m: return ob1('boarder repelled');
758
759                 m = rm('\w+ has bested (\\w+), and turns'+
760                         ' to the rest of the ship\\.')
761                 if m: return ob1('boarder unrepelled');
762
763                 m = rm('(\\w+) has left the vessel\.')
764                 if m:
765                         pirate = m.group(1)
766                         disembark(self._v, timestamp, pirate, 'disembarked')
767                         return d('disembarked')
768
769                 return d('not-matched')
770
771         def _str_vessel(self, vn, v):
772                 s = ' vessel %s\n' % vn
773                 s += ' '*20 + "%-*s   %13s\n" % (
774                                 max_pirate_namelen, '#lastinfo',
775                                 v['#lastinfo'])
776                 assert v['#name'] == vn
777                 for pn in sorted(v.keys()):
778                         if pn.startswith('#'): continue
779                         pa = v[pn]
780                         assert pa.v == v
781                         assert self._pl[pn] == pa
782                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
783                                 (' ','G')[pa.gunner],
784                                 max_pirate_namelen, pn,
785                                 pa.last_time, pa.last_event,
786                                 pa.last_chat_time, pa.last_chat_chan)
787                 return s
788
789         def __str__(self):
790                 s = '''<ChatLogTracker
791  myself %s
792  vessel %s
793 '''                     % (self._myself.name, self._vessel)
794                 assert ((self._v is None and self._vessel is None) or
795                         (self._v is self._vl[self._vessel]))
796                 if self._vessel is not None:
797                         s += self._str_vessel(self._vessel, self._v)
798                 for vn in sorted(self._vl.keys()):
799                         if vn == self._vessel: continue
800                         s += self._str_vessel(vn, self._vl[vn])
801                 for p in self._pl:
802                         pa = self._pl[p]
803                         assert pa.v[p] is pa
804                         assert pa.v in self._vl.values()
805                 s += '>\n'
806                 return s
807
808         def catchup(self, progress=None):
809                 while True:
810                         more = self._f.readline()
811                         if not more: break
812
813                         self._progress[0] += len(more)
814                         if progress: progress.progress(*self._progress)
815
816                         self._lbuf += more
817                         if self._lbuf.endswith('\n'):
818                                 self.chatline(self._lbuf.rstrip())
819                                 self._lbuf = ''
820                                 if opts.debug >= 2:
821                                         debug(self.__str__())
822                 if progress: progress.caughtup()
823
824         def changed(self):
825                 rv = self._need_redisplay
826                 self._need_redisplay = False
827                 return rv
828         def myname(self):
829                 # returns our pirate name
830                 return self._myself.name
831         def vesselname(self):
832                 # returns the vessel name we're aboard or None
833                 return self._vessel
834         def lastvesselname(self):
835                 # returns the last vessel name we were aboard or None
836                 return self._lastvessel
837         def aboard(self, vesselname=True):
838                 # returns a list of PirateAboard the vessel
839                 #  sorted by pirate name
840                 #  you can pass this None and you'll get []
841                 #  or True for the current vessel (which is the default)
842                 if vesselname is True: v = self._v
843                 else: v = self._vl.get(vesselname.title())
844                 if v is None: return []
845                 return [ v[pn]
846                          for pn in sorted(v.keys())
847                          if not pn.startswith('#') ]
848
849 #---------- implementations of actual operation modes ----------
850
851 def do_pirate(pirates, bu):
852         print '{'
853         for pirate in pirates:
854                 info = PirateInfo(pirate)
855                 print '%s: %s,' % (`pirate`, info)
856         print '}'
857
858 def prep_crew_of(args, bu, max_age=300):
859         if len(args) != 1: bu('crew-of takes one pirate name')
860         pi = PirateInfo(args[0], max_age)
861         if pi.crew is None: return None
862         return CrewInfo(pi.crew[0], max_age)
863
864 def do_crew_of(args, bu):
865         ci = prep_crew_of(args, bu)
866         print ci
867
868 def do_standings_crew_of(args, bu):
869         ci = prep_crew_of(args, bu, 60)
870         tab = StandingsTable()
871         tab.headings()
872         for (rank, members) in ci.crew:
873                 if not members: continue
874                 tab.literalline('%s:' % rank)
875                 for p in members:
876                         pi = PirateInfo(p, random.randint(900,1800))
877                         tab.pirate(pi)
878         print tab.results()
879
880 class ProgressPrintPercentage:
881         def __init__(self, f=sys.stdout):
882                 self._f = f
883         def progress_string(self,done,total):
884                 return "scan chat logs %3d%%\r" % ((done*100) / total)
885         def progress(self,*a):
886                 self._f.write(self.progress_string(*a))
887                 self._f.flush()
888         def show_init(self, pirate, ocean):
889                 print >>self._f, 'Starting up, %s on the %s ocean' % (
890                         pirate, ocean)
891         def caughtup(self):
892                 self._f.write('                   \r')
893                 self._f.flush()
894
895 #----- modes which use the chat log parser are quite complex -----
896
897 def prep_chat_log(args, bu,
898                 progress=ProgressPrintPercentage(),
899                 max_myself_age=3600):
900         if len(args) != 1: bu('this action takes only chat log filename')
901         logfn = args[0]
902         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
903         match = regexp.match(logfn_re, logfn)
904         if not match: bu('chat log filename is not in expected format')
905         (pirate, ocean) = match.groups()
906         fetcher.default_ocean(ocean)
907
908         progress.show_init(pirate, fetcher.ocean)
909         myself = PirateInfo(pirate,max_myself_age)
910         track = ChatLogTracker(myself, logfn)
911
912         opts.debug -= 2
913         track.catchup(progress)
914         opts.debug += 2
915
916         track.force_redisplay()
917
918         return (myself, track)
919
920 def do_track_chat_log(args, bu):
921         (myself, track) = prep_chat_log(args, bu)
922         while True:
923                 track.catchup()
924                 if track.changed():
925                         print track
926                 sleep(1)
927
928 #----- ship management aid -----
929
930 class Display_dumb(ProgressPrintPercentage):
931         def __init__(self):
932                 ProgressPrintPercentage.__init__(self)
933         def show(self, s):
934                 print '\n\n', s;
935         def realstart(self):
936                 pass
937
938 class Display_overwrite(ProgressPrintPercentage):
939         def __init__(self):
940                 ProgressPrintPercentage.__init__(self)
941
942                 null = file('/dev/null','w')
943                 curses.setupterm(fd=null.fileno())
944
945                 self._clear = curses.tigetstr('clear')
946                 if not self._clear:
947                         self._debug('missing clear!')
948                         self.show = Display_dumb.show
949                         return
950
951                 self._t = {'el':'', 'ed':''}
952                 if not self._init_sophisticated():
953                         for k in self._t.keys(): self._t[k] = ''
954                         self._t['ho'] = self._clear
955
956         def _debug(self,m): debug('display overwrite: '+m)
957
958         def _init_sophisticated(self):
959                 for k in self._t.keys():
960                         s = curses.tigetstr(k)
961                         self._t[k] = s
962                 self._t['ho'] = curses.tigetstr('ho')
963                 if not self._t['ho']:
964                         cup = curses.tigetstr('cup')
965                         self._t['ho'] = curses.tparm(cup,0,0)
966                 missing = [k for k in self._t.keys() if not self._t[k]]
967                 if missing:
968                         self.debug('missing '+(' '.join(missing)))
969                         return 0
970                 return 1
971
972         def show(self, s):
973                 w = sys.stdout.write
974                 def wti(k): w(self._t[k])
975
976                 wti('ho')
977                 nl = ''
978                 for l in s.rstrip().split('\n'):
979                         w(nl)
980                         w(l)
981                         wti('el')
982                         nl = '\r\n'
983                 wti('ed')
984                 w(' ')
985                 sys.stdout.flush()
986
987         def realstart(self):
988                 sys.stdout.write(self._clear)
989                 sys.stdout.flush()
990                         
991
992 def do_ship_aid(args, bu):
993         if opts.ship_duty is None: opts.ship_duty = True
994
995         displayer = globals()['Display_'+opts.display]()
996
997         (myself, track) = prep_chat_log(args, bu, progress=displayer)
998
999         displayer.realstart()
1000
1001         if os.isatty(0): kr_create = KeystrokeReader
1002         else: kr_create = DummyKeystrokeReader
1003
1004         try:
1005                 kreader = kr_create(0, 10)
1006                 ship_aid_core(myself, track, displayer, kreader)
1007         finally:
1008                 kreader.stop()
1009                 print '\n'
1010
1011 def ship_aid_core(myself, track, displayer, kreader):
1012
1013         def find_vessel():
1014                 vn = track.vesselname()
1015                 if vn: return (vn, " on board the %s" % vn)
1016                 vn = track.lastvesselname()
1017                 if vn: return (vn, " ashore from the %s" % vn)
1018                 return (None, " not on a vessel")
1019
1020         def timeevent(t,e):
1021                 if t is None: return ' ' * 22
1022                 return " %-4s %-16s" % (format_time_interval(now - t),e)
1023
1024         displayer.show(track.myname() + find_vessel()[1] + '...')
1025
1026         rotate_nya = '/-\\'
1027
1028         while True:
1029                 track.catchup()
1030                 now = time.time()
1031
1032                 (vn, s) = find_vessel()
1033                 s = track.myname() + s
1034                 s += " at %s" % time.strftime("%Y-%m-%d %H:%M:%S")
1035                 s += kreader.info()
1036                 s += '\n'
1037
1038                 tbl = StandingsTable()
1039                 tbl.headings()
1040
1041                 aboard = track.aboard(vn)
1042
1043                 for pa in aboard:
1044                         pi = pa.pirate_info()
1045
1046                         xs = ''
1047                         if pa.gunner: xs += 'G '
1048                         else: xs += '  '
1049                         xs += timeevent(pa.last_time, pa.last_event)
1050                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
1051
1052                         if pi is None:
1053                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1054                         else:
1055                                 tbl.pirate(pi, xs)
1056
1057                 s += tbl.results()
1058                 displayer.show(s)
1059
1060                 k = kreader.getch()
1061                 if k is None:
1062                         rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1063                         continue
1064
1065                 if k == 'q':
1066                         break
1067
1068 #---------- individual keystroke input ----------
1069
1070 class DummyKeystrokeReader:
1071         def __init__(self,fd,timeout_dummy): pass
1072         def stop(self): pass
1073         def getch(self): sleep(1); return None
1074         def info(self): return ' [noninteractive]'
1075
1076 class KeystrokeReader(DummyKeystrokeReader):
1077         def __init__(self, fd, timeout_decisec=0):
1078                 self._fd = fd
1079                 self._saved = termios.tcgetattr(fd)
1080                 a = termios.tcgetattr(fd)
1081                 a[3] &= ~(termios.ECHO | termios.ECHONL |
1082                           termios.ICANON | termios.IEXTEN)
1083                 a[6][termios.VMIN] = 0
1084                 a[6][termios.VTIME] = timeout_decisec
1085                 termios.tcsetattr(fd, termios.TCSANOW, a)
1086         def stop(self):
1087                 termios.tcsetattr(self._fd, termios.TCSANOW, self._saved)
1088         def getch(self):
1089                 debug_flush()
1090                 byte = os.read(self._fd, 1)
1091                 if not len(byte): return None
1092                 return byte
1093         def info(self):
1094                 return ''
1095
1096 #---------- main program ----------
1097
1098 def main():
1099         global opts, fetcher
1100
1101         pa = OptionParser(
1102 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1103 actions:
1104  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1105  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1106  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1107  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1108  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1109
1110 display modes (for --display) apply to ship-aid:
1111  --display=dumb       just print new information, scrolling the screen
1112  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
1113 ''')
1114         ao = pa.add_option
1115         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1116                 help='select ocean OCEAN')
1117         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1118                 default='~/.yoweb-scrape-cache',
1119                 help='cache yoweb pages in DIR')
1120         ao('-D','--debug', action='count', dest='debug', default=0,
1121                 help='enable debugging output')
1122         ao('--debug-fd', type='int', dest='debug_fd',
1123                 help='write any debugging output to specified fd')
1124         ao('-q','--quiet', action='store_true', dest='quiet',
1125                 help='suppress warning output')
1126         ao('--display', action='store', dest='display',
1127                 type='choice', choices=['dumb','overwrite'],
1128                 help='how to display ship aid')
1129
1130         ao('--ship-duty', action='store_true', dest='ship_duty',
1131                 help='show ship duty station puzzles')
1132         ao('--all-puzzles', action='store_false', dest='ship_duty',
1133                 help='show all puzzles, not just ship duty stations')
1134
1135         (opts,args) = pa.parse_args()
1136         random.seed()
1137
1138         if len(args) < 1:
1139                 pa.error('need a mode argument')
1140
1141         if opts.debug_fd is not None:
1142                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1143         else:
1144                 opts.debug_file = sys.stdout
1145
1146         mode = args[0]
1147         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1148         try: mode_fn = globals()[mode_fn_name]
1149         except KeyError: pa.error('unknown mode "%s"' % mode)
1150
1151         # fixed parameters
1152         opts.min_max_age = 60
1153         opts.expire_age = 3600
1154         opts.ship_reboard_clearout = 3600
1155
1156         if opts.cache_dir.startswith('~/'):
1157                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1158
1159         if opts.display is None:
1160                 if ((opts.debug > 0 and opts.debug_fd is None)
1161                     or not os.isatty(sys.stdout.fileno())):
1162                         opts.display = 'dumb'
1163                 else:
1164                         opts.display = 'overwrite'
1165
1166         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1167
1168         mode_fn(args[1:], pa.error)
1169
1170 main()