chiark / gitweb /
prettier startup output
[ypp-sc-tools.db-test.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 from optparse import OptionParser
18
19 from BeautifulSoup import BeautifulSoup
20
21 opts = None
22
23 #---------- YPP parameters and arrays ----------
24
25 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
26         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
27         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
28         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
29
30 standingvals = ('Able/Distinguished/Respected/Master'+
31                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
32
33 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
34
35 max_pirate_namelen = 12
36
37
38 #---------- general utilities ----------
39
40 def debug(m):
41         if opts.debug > 0:
42                 print >>opts.debug_file, m
43
44 def sleep(seconds):
45         if opts.debug > 0:
46                 opts.debug_file.flush()
47         time.sleep(seconds)
48
49 def format_time_interval(ti):
50         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
51         if ti < 7200: return '%2dm' % (ti / 60)
52         if ti < 86400: return '%dh' % (ti / 3600)
53         return '%dd' % (ti / 86400)
54
55 #---------- caching and rate-limiting data fetcher ----------
56
57 class Fetcher:
58         def __init__(self, ocean, cachedir):
59                 debug('Fetcher init %s' % cachedir)
60                 self.ocean = ocean
61                 self.cachedir = cachedir
62                 try: os.mkdir(cachedir)
63                 except (OSError,IOError), oe:
64                         if oe.errno != errno.EEXIST: raise
65                 self._cache_scan(time.time())
66
67         def default_ocean(self, ocean='ice'):
68                 if self.ocean is None:
69                         self.ocean = ocean
70
71         def _cache_scan(self, now):
72                 # returns list of ages, unsorted
73                 ages = []
74                 debug('Fetcher   scan_cache')
75                 for leaf in os.listdir(self.cachedir):
76                         if not leaf.startswith('#'): continue
77                         path = self.cachedir + '/' + leaf
78                         try: s = os.stat(path)
79                         except (OSError,IOError), oe:
80                                 if oe.errno != errno.ENOENT: raise
81                                 continue
82                         age = now - s.st_mtime
83                         if age > opts.expire_age:
84                                 debug('Fetcher    expire %d %s' % (age, path))
85                                 try: os.remove(path)
86                                 except (OSError,IOError), oe:
87                                         if oe.errno != errno.ENOENT: raise
88                                 continue
89                         ages.append(age)
90                 return ages
91
92         def need_wait(self, now):
93                 ages = self._cache_scan(now)
94                 ages.sort()
95                 debug('Fetcher   ages ' + `ages`)
96                 min_age = 1
97                 need_wait = 0
98                 for age in ages:
99                         if age < min_age and age < 300:
100                                 debug('Fetcher   morewait min=%d age=%d' %
101                                         (min_age, age))
102                                 need_wait = max(need_wait, min_age - age)
103                         min_age += 3
104                         min_age *= 1.25
105                 return need_wait
106
107         def _rate_limit_cache_clean(self, now):
108                 need_wait = self.need_wait(now)
109                 if need_wait > 0:
110                         debug('Fetcher   wait %d' % need_wait)
111                         sleep(need_wait)
112
113         def fetch(self, url, max_age):
114                 debug('Fetcher fetch %s' % url)
115                 cache_corename = urllib.quote_plus(url)
116                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
117                 try: f = file(cache_item, 'r')
118                 except (OSError,IOError), oe:
119                         if oe.errno != errno.ENOENT: raise
120                         f = None
121                 now = time.time()
122                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
123                 if f is not None:
124                         s = os.fstat(f.fileno())
125                         age = now - s.st_mtime
126                         if age > max_age:
127                                 debug('Fetcher  stale %d < %d'% (max_age, age))
128                                 f = None
129                 if f is not None:
130                         data = f.read()
131                         f.close()
132                         debug('Fetcher  cached %d > %d' % (max_age, age))
133                         return data
134
135                 debug('Fetcher  fetch')
136                 self._rate_limit_cache_clean(now)
137
138                 stream = urllib2.urlopen(url)
139                 data = stream.read()
140                 cache_tmp = "%s/#%s~%d#" % (
141                         self.cachedir, cache_corename, os.getpid())
142                 f = file(cache_tmp, 'w')
143                 f.write(data)
144                 f.close()
145                 os.rename(cache_tmp, cache_item)
146                 debug('Fetcher  stored')
147                 return data
148
149         def yoweb(self, kind, tail, max_age):
150                 self.default_ocean()
151                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
152                         self.ocean, kind, tail)
153                 return self.fetch(url, max_age)
154
155 #---------- logging assistance for troubled screenscrapers ----------
156
157 class SoupLog:
158         def __init__(self):
159                 self.msgs = [ ]
160         def msg(self, m):
161                 self.msgs.append(m)
162         def soupm(self, obj, m):
163                 self.msg(m + '; in ' + `obj`)
164         def needs_msgs(self, child_souplog):
165                 self.msgs += child_souplog.msgs
166                 child_souplog.msgs = [ ]
167
168 def soup_text(obj):
169         str = ''.join(obj.findAll(text=True))
170         return str.strip()
171
172 class SomethingSoupInfo(SoupLog):
173         def __init__(self, kind, tail, max_age):
174                 SoupLog.__init__(self)
175                 html = fetcher.yoweb(kind, tail, max_age)
176                 self._soup = BeautifulSoup(html,
177                         convertEntities=BeautifulSoup.HTML_ENTITIES
178                         )
179
180 #---------- scraper for pirate pages ----------
181
182 class PirateInfo(SomethingSoupInfo):
183         # Public data members:
184         #  pi.standings = { 'Treasure Haul': 'Able' ... }
185         #  pi.name = name
186         #  pi.crew = (id, name)
187         #  pi.flag = (id, name)
188         #  pi.msgs = [ 'message describing problem with scrape' ]
189                 
190         def __init__(self, pirate, max_age=300):
191                 SomethingSoupInfo.__init__(self,
192                         'pirate.wm?target=', pirate, max_age)
193                 self.name = pirate
194                 self._find_standings()
195                 self.crew = self._find_crewflag('crew',
196                         '^/yoweb/crew/info\\.wm')
197                 self.flag = self._find_crewflag('flag',
198                         '^/yoweb/flag/info\\.wm')
199
200         def _find_standings(self):
201                 imgs = self._soup.findAll('img',
202                         src=regexp.compile('/yoweb/images/stat.*'))
203                 re = regexp.compile(
204 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
205                         )
206                 standings = { }
207
208                 for skill in puzzles:
209                         standings[skill] = [ ]
210
211                 skl = SoupLog()
212
213                 for img in imgs:
214                         try: puzzle = img['alt']
215                         except KeyError: continue
216
217                         if not puzzle in puzzles:
218                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
219                                 continue
220                         key = img.findParent('td')
221                         if key is None:
222                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
223                                 continue
224                         valelem = key.findNextSibling('td')
225                         if valelem is None:
226                                 skl.soupm(key, 'puzzle missing sibling "%s"'
227                                         % puzzle)
228                                 continue
229                         valstr = soup_text(valelem)
230                         match = re.match(valstr)
231                         if match is None:
232                                 skl.soupm(key, ('puzzle "%s" unparseable'+
233                                         ' standing "%s"') % (puzzle, valstr))
234                                 continue
235                         standing = match.group(match.lastindex)
236                         standings[puzzle].append(standing)
237
238                 self.standings = { }
239
240                 for puzzle in puzzles:
241                         sl = standings[puzzle]
242                         if len(sl) > 1:
243                                 skl.msg('puzzle "%s" multiple standings %s' %
244                                                 (puzzle, `sl`))
245                                 continue
246                         if not sl:
247                                 skl.msg('puzzle "%s" no standing found' % puzzle)
248                                 continue
249                         standing = sl[0]
250                         for i in range(0, len(standingvals)-1):
251                                 if standing == standingvals[i]:
252                                         self.standings[puzzle] = i
253                         if not puzzle in self.standings:
254                                 skl.msg('puzzle "%s" unknown standing "%s"' %
255                                         (puzzle, standing))
256
257                 all_standings_ok = True
258                 for puzzle in puzzles:
259                         if not puzzle in self.standings:
260                                 self.needs_msgs(skl)
261
262         def _find_crewflag(self, cf, yoweb_re):
263                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
264                 if len(things) != 1:
265                         self.msg('zero or several %s id references found' % cf)
266                         return None
267                 thing = things[0]
268                 id_re = '\\b%sid\\=(\\w+)$' % cf
269                 id_haystack = thing['href']
270                 match = regexp.compile(id_re).search(id_haystack)
271                 if match is None:
272                         self.soupm(thing, ('incomprehensible %s id ref'+
273                                 ' (%s in %s)') % (cf, id_re, id_haystack))
274                         return None
275                 name = soup_text(thing)
276                 return (match.group(1), name)
277
278         def __str__(self):
279                 return `(self.crew, self.flag, self.standings, self.msgs)`
280
281 #---------- scraper for crew pages ----------
282
283 class CrewInfo(SomethingSoupInfo):
284         # Public data members:
285         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
286         #              ('Senior Officer', [...]),
287         #               ... ]
288         #  pi.msgs = [ 'message describing problem with scrape' ]
289
290         def __init__(self, crewid, max_age=300):
291                 SomethingSoupInfo.__init__(self,
292                         'crew/info.wm?crewid=', crewid, max_age)
293                 self._find_crew()
294
295         def _find_crew(self):
296                 self.crew = []
297                 capts = self._soup.findAll('img',
298                         src='/yoweb/images/crew-captain.png')
299                 if len(capts) != 1:
300                         self.msg('crew members: no. of captain images != 1')
301                         return
302                 tbl = capts[0]
303                 while not tbl.find('a', href=pirate_ref_re):
304                         tbl = tbl.findParent('table')
305                         if not tbl:
306                                 self.msg('crew members: cannot find table')
307                                 return
308                 current_rank_crew = None
309                 crew_rank_re = regexp.compile('/yoweb/images/crew')
310                 for row in tbl.contents:
311                         # findAll(recurse=False)
312                         if isinstance(row,basestring):
313                                 continue
314
315                         is_rank = row.find('img', attrs={'src': crew_rank_re})
316                         if is_rank:
317                                 rank = soup_text(row)
318                                 current_rank_crew = []
319                                 self.crew.append((rank, current_rank_crew))
320                                 continue
321                         for cell in row.findAll('a', href=pirate_ref_re):
322                                 if current_rank_crew is None:
323                                         self.soupm(cell, 'crew members: crew'
324                                                 ' before rank')
325                                         continue
326                                 current_rank_crew.append(soup_text(cell))
327
328         def __str__(self):
329                 return `(self.crew, self.msgs)`
330
331 #---------- pretty-printer for tables of pirate puzzle standings ----------
332
333 class StandingsTable:
334         def __init__(self, use_puzzles=None, col_width=6):
335                 if use_puzzles is None:
336                         if opts.ship_duty:
337                                 use_puzzles=[
338                                         'Navigating','Battle Navigation',
339                                         'Gunning',
340                                         ['Sailing','Rigging'],
341                                         'Bilging',
342                                         'Carpentry',
343                                         'Treasure Haul'
344                                 ]
345                         else:
346                                 use_puzzles=puzzles
347                 self._puzzles = use_puzzles
348                 self.s = ''
349                 self._cw = col_width-1
350
351         def _pline(self, pirate, puzstrs, extra):
352                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
353                 for v in puzstrs:
354                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
355                 if extra:
356                         self.s += ' ' + extra
357                 self.s += '\n'
358
359         def _puzstr(self, pi, puzzle):
360                 if not isinstance(puzzle,list): puzzle = [puzzle]
361                 try: standing = max([pi.standings[p] for p in puzzle])
362                 except KeyError: return '?'
363                 if not standing: return ''
364                 s = ''
365                 if self._cw > 4:
366                         c1 = standingvals[standing][0]
367                         if standing < 3: c1 = c1.lower() # 3 = Master
368                         s += `standing`
369                 if self._cw > 5:
370                         s += ' '
371                 s += '*' * (standing / 2)
372                 s += '+' * (standing % 2)
373                 return s
374
375         def headings(self):
376                 def puzn_redact(name):
377                         if isinstance(name,list):
378                                 return '/'.join(
379                                         ["%.*s" % (self._cw/2, puzn_redact(n))
380                                          for n in name])
381                         spc = name.find(' ')
382                         if spc < 0: return name
383                         return name[0:min(4,spc)] + name[spc+1:]
384                 self._pline('', map(puzn_redact, self._puzzles), None)
385         def literalline(self, line):
386                 self.s += line + '\n'
387         def pirate_dummy(self, name, standingstring, extra=None):
388                 self._pline(name, standingstring * len(self._puzzles), extra)
389         def pirate(self, pi, extra=None):
390                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
391                 self._pline(pi.name, puzstrs, extra)
392
393         def results(self):
394                 return self.s
395
396 #---------- chat log parser ----------
397
398 class PirateAboard:
399         # This is essentially a transparent, dumb, data class.
400         #  pa.v
401         #  pa.name
402         #  pa.last_time
403         #  pa.last_event
404         #  pa.gunner
405         #  pa.last_chat_time
406         #  pa.last_chat_chan
407         #  pa.pi
408
409         def __init__(pa, pn, v, time, event):
410                 pa.name = pn
411                 pa.v = v
412                 pa.last_time = time
413                 pa.last_event = event
414                 pa.last_chat_time = None
415                 pa.last_chat_chan = None
416                 pa.gunner = False
417                 pa.pi = None
418
419         def pirate_info(pa):
420                 if not pa.pi and not fetcher.need_wait(time.time()):
421                         pa.pi = PirateInfo(pa.name, 3600)
422                 return pa.pi
423
424 class ChatLogTracker:
425         # This is quite complex so we make it opaque.  Use the
426         # official invokers, accessors etc.
427
428         def __init__(self, myself_pi, logfn):
429                 self._pl = {}   # self._pl['Pirate'] =
430                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
431                                 # self._vl['Vessel']['#lastinfo']
432                                 # self._vl['Vessel']['#name']
433                                 # self._v = self._vl[self._vessel]
434                 self._date = None
435                 self._myself = myself_pi
436                 self._f = file(logfn)
437                 self._lbuf = ''
438                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
439                 self._disembark_myself()
440                 self._need_redisplay = False
441                 self._lastvessel = None
442
443         def _disembark_myself(self):
444                 self._v = None
445                 self._vessel = None
446                 self.force_redisplay()
447
448         def force_redisplay(self):
449                 self._need_redisplay = True
450
451         def _vessel_updated(self, v, timestamp):
452                 v['#lastinfo'] = timestamp
453                 self.force_redisplay()
454
455         def _onboard_event(self,v,timestamp,pirate,event):
456                 pa = self._pl.get(pirate, None)
457                 if pa is not None and pa.v is v:
458                         pa.last_time = timestamp
459                         pa.last_event = event
460                 else:
461                         if pa is not None: del pa.v[pirate]
462                         pa = PirateAboard(pirate, v, timestamp, event)
463                         self._pl[pirate] = pa
464                         v[pirate] = pa
465                 self._vessel_updated(v, timestamp)
466                 return pa
467
468         def _trash_vessel(self, v):
469                 for pn in v:
470                         if pn.startswith('#'): continue
471                         del self._pl[pn]
472                 vn = v['#name']
473                 del self._vl[vn]
474                 if v is self._v: self._disembark_myself()
475                 self.force_redisplay()
476
477         def _vessel_stale(self, v, timestamp):
478                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
479
480         def _vessel_check_expire(self, v, timestamp):
481                 if not self._vessel_stale(v, timestamp):
482                         return v
483                 self._debug_line_disposition(timestamp,'',
484                         'stale-reset ' + v['#name'])
485                 self._trash_vessel(v)
486                 return None
487
488         def expire_garbage(self, timestamp):
489                 for v in self._vl.values():
490                         self._vessel_check_expire(v, timestamp)
491
492         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
493                 v = self._vl.get(vn, None)
494                 if v is not None:
495                         v = self._vessel_check_expire(v, timestamp)
496                 if v is not None:
497                         dml.append('found')
498                         return v
499                 if not create:
500                         dml.append('no')
501                 dml.append('new')
502                 self._vl[vn] = v = { '#name': vn }
503                 self._vessel_updated(v, timestamp)
504                 return v
505
506         def _find_matching_vessel(self, pattern, timestamp, cmdr,
507                                         dml=[], create=False):
508                 # use when a commander pirate `cmdr' specified a vessel
509                 #  by name `pattern' (either may be None)
510                 # if create is true, will create the vessel
511                 #  record if an exact name is specified
512
513                 if (pattern is not None and
514                     not '*' in pattern
515                     and len(pattern.split(' ')) == 2):
516                         vn = pattern.title()
517                         dml.append('exact')
518                         return self._vessel_lookup(
519                                 vn, timestamp, dml=dml, create=create)
520
521                 if pattern is None:
522                         pattern_check = lambda vn: True
523                 else:
524                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.*')
525                         pattern_check = regexp.compile(re, regexp.I).match
526
527                 tries = []
528
529                 cmdr_pa = self._pl.get(cmdr, None)
530                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
531
532                 tries.append((self._v, 'here'))
533                 tried_vns = []
534
535                 for (v, dm) in tries:
536                         if v is None: dml.append(dm+'?'); continue
537                         
538                         vn = v['#name']
539                         if not pattern_check(vn):
540                                 tried_vns.append(vn)
541                                 dml.append(dm+'#')
542                                 continue
543
544                         dml.append(dm+'!')
545                         return v
546
547                 if pattern is not None and '*' in pattern:
548                         search = [
549                                 (vn,v)
550                                 for (vn,v) in self._vl.iteritems()
551                                 if not self._vessel_stale(v, timestamp)
552                                 if pattern_check(vn)
553                                 ]
554                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
555                         #       re,
556                         #       '/'.join(tried_vns),
557                         #       '/'.join([vn for (vn,v) in search])))
558
559                         if len(search)==1:
560                                 dml.append('one')
561                                 return search[0][1]
562                         elif search:
563                                 dml.append('many')
564                         else:
565                                 dml.append('none')
566
567         def _debug_line_disposition(self,timestamp,l,m):
568                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
569
570         def chatline(self,l):
571                 rm = lambda re: regexp.match(re,l)
572                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
573                 timestamp = None
574
575                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
576                 if m:
577                         self._date = [int(x) for x in m.groups()]
578                         self._previous_timestamp = None
579                         return d('date '+`self._date`)
580
581                 if self._date is None:
582                         return d('date unset')
583
584                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
585                 if not m:
586                         return d('no timestamp')
587
588                 while True:
589                         time_tuple = (self._date +
590                                       [int(x) for x in m.groups()] +
591                                       [-1,-1,-1])
592                         timestamp = time.mktime(time_tuple)
593                         if timestamp >= self._previous_timestamp: break
594                         self._date[2] += 1
595                         self._debug_line_disposition(timestamp,'',
596                                 'new date '+`self._date`)
597
598                 self._previous_timestamp = timestamp
599
600                 l = l[l.find(' ')+1:]
601
602                 def ob_x(pirate,event):
603                         return self._onboard_event(
604                                         self._v, timestamp, pirate, event)
605                 def ob1(did): ob_x(m.group(1), did); return d(did)
606                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
607
608                 def disembark(v, timestamp, pirate, event):
609                         self._onboard_event(
610                                         v, timestamp, pirate, 'leaving '+event)
611                         del v[pirate]
612                         del self._pl[pirate]
613
614                 def disembark_me(why):
615                         self._disembark_myself()
616                         return d('disembark-me '+why)
617
618                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
619                 if m:
620                         dm = ['boarding']
621                         pn = self._myself.name
622                         vn = m.group(1)
623                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
624                         self._lastvessel = self._vessel = vn
625                         self._v = v
626                         ob_x(pn, 'we boarded')
627                         self.expire_garbage(timestamp)
628                         return d(' '.join(dm))
629
630                 if self._v is None:
631                         return d('no vessel')
632
633                 m = rm('(\\w+) has come aboard\\.$')
634                 if m: return ob1('boarded');
635
636                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
637                 if m:
638                         (who,what) = m.groups()
639                         pa = ob_x(who,'ord '+what)
640                         if what == 'Gunning':
641                                 pa.gunner = True
642                         return d('duty order')
643
644                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
645                 if m: oba('stopped'); return d("end")
646
647                 def chat_core(speaker, chan):
648                         try: pa = self._pl[speaker]
649                         except KeyError: return 'mystery'
650                         if pa.v is not self._v: return 'elsewhere'
651                         pa.last_chat_time = timestamp
652                         pa.last_chat_chan = chan
653                         self.force_redisplay()
654                         return 'here'
655
656                 def chat(chan):
657                         speaker = m.group(1)
658                         dm = chat_core(speaker, chan)
659                         return d('chat %s %s' % (chan, dm))
660
661                 def chat_metacmd(chan):
662                         (cmdr, metacmd) = m.groups()
663                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
664                         m2 = regexp.match(
665                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
666                                 metacmd)
667                         if not m2: return chat(chan)
668
669                         (cmd, pattern, targets) = m2.groups()
670                         dml = ['cmd', chan, cmd]
671
672                         if cmd == 'a': each = self._onboard_event
673                         else: each = disembark
674
675                         if cmdr == self._myself.name:
676                                 dml.append('self')
677                                 how = 'cmd: %s' % cmd
678                         else:
679                                 dml.append('other')
680                                 how = 'cmd: %s %s' % (cmd,cmdr)
681
682                         v = self._find_matching_vessel(
683                                 pattern, timestamp, cmdr, dml, create=True)
684
685                         if v is not None:
686                                 targets = targets.strip().split(' ')
687                                 dml.append(`len(targets)`)
688                                 for target in targets:
689                                         each(v, timestamp, target.title(), how)
690                                 self._vessel_updated(v, timestamp)
691
692                         dm = ' '.join(dml)
693                         chat_core(cmdr, 'cmd '+chan)
694                         return d(dm)
695
696                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
697                 if m: return ob1('general order');
698
699                 m = rm('(\\w+) says, "')
700                 if m: return chat('public')
701
702                 m = rm('(\\w+) tells ye, "')
703                 if m: return chat('private')
704
705                 m = rm('Ye told (\\w+), "(.*)"$')
706                 if m: return chat_metacmd('private')
707
708                 m = rm('(\\w+) flag officer chats, "')
709                 if m: return chat('flag officer')
710
711                 m = rm('(\\w+) officer chats, "(.*)"$')
712                 if m: return chat_metacmd('officer')
713
714                 m = rm('Ye accepted the offer to job with ')
715                 if m: return disembark_me('jobbing')
716
717                 m = rm('Ye hop on the ferry and are whisked away ')
718                 if m: return disembark_me('ferry')
719
720                 m = rm('Whisking away to yer home on the magical winds')
721                 if m: return disembark_me('home')
722
723                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
724                 if m:
725                         pl = m.group(1).split(', ')
726                         if not self._myself.name in pl:
727                                 return d('lost melee')
728                         for pn in pl:
729                                 if ' ' in pn: continue
730                                 ob_x(pn,'won melee')
731                         return d('won melee')
732
733                 m = rm('(\\w+) is eliminated\\!')
734                 if m: return ob1('eliminated in fray');
735
736                 m = rm('(\\w+) has driven \w+ from the ship\\!')
737                 if m: return ob1('boarder repelled');
738
739                 m = rm('\w+ has bested (\\w+), and turns'+
740                         ' to the rest of the ship\\.')
741                 if m: return ob1('boarder unrepelled');
742
743                 m = rm('(\\w+) has left the vessel\.')
744                 if m:
745                         pirate = m.group(1)
746                         disembark(self._v, timestamp, pirate, 'disembarked')
747                         return d('disembarked')
748
749                 return d('not-matched')
750
751         def _str_vessel(self, vn, v):
752                 s = ' vessel %s\n' % vn
753                 s += ' '*20 + "%-*s   %13s\n" % (
754                                 max_pirate_namelen, '#lastinfo',
755                                 v['#lastinfo'])
756                 assert v['#name'] == vn
757                 for pn in sorted(v.keys()):
758                         if pn.startswith('#'): continue
759                         pa = v[pn]
760                         assert pa.v == v
761                         assert self._pl[pn] == pa
762                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
763                                 (' ','G')[pa.gunner],
764                                 max_pirate_namelen, pn,
765                                 pa.last_time, pa.last_event,
766                                 pa.last_chat_time, pa.last_chat_chan)
767                 return s
768
769         def __str__(self):
770                 s = '''<ChatLogTracker
771  myself %s
772  vessel %s
773 '''                     % (self._myself.name, self._vessel)
774                 assert ((self._v is None and self._vessel is None) or
775                         (self._v is self._vl[self._vessel]))
776                 if self._vessel is not None:
777                         s += self._str_vessel(self._vessel, self._v)
778                 for vn in sorted(self._vl.keys()):
779                         if vn == self._vessel: continue
780                         s += self._str_vessel(vn, self._vl[vn])
781                 for p in self._pl:
782                         pa = self._pl[p]
783                         assert pa.v[p] is pa
784                         assert pa.v in self._vl.values()
785                 s += '>\n'
786                 return s
787
788         def catchup(self, progress=None):
789                 while True:
790                         more = self._f.readline()
791                         if not more: break
792
793                         self._progress[0] += len(more)
794                         if progress: progress.progress(*self._progress)
795
796                         self._lbuf += more
797                         if self._lbuf.endswith('\n'):
798                                 self.chatline(self._lbuf.rstrip())
799                                 self._lbuf = ''
800                                 if opts.debug >= 2:
801                                         debug(self.__str__())
802                 if progress: progress.caughtup()
803
804         def changed(self):
805                 rv = self._need_redisplay
806                 self._need_redisplay = False
807                 return rv
808         def myname(self):
809                 # returns our pirate name
810                 return self._myself.name
811         def vesselname(self):
812                 # returns the vessel name we're aboard or None
813                 return self._vessel
814         def lastvesselname(self):
815                 # returns the last vessel name we were aboard or None
816                 return self._lastvessel
817         def aboard(self, vesselname=True):
818                 # returns a list of PirateAboard the vessel
819                 #  sorted by pirate name
820                 #  you can pass this None and you'll get []
821                 #  or True for the current vessel (which is the default)
822                 if vesselname is True: v = self._v
823                 else: v = self._vl.get(vesselname.title())
824                 if v is None: return []
825                 return [ v[pn]
826                          for pn in sorted(v.keys())
827                          if not pn.startswith('#') ]
828
829 #---------- implementations of actual operation modes ----------
830
831 def do_pirate(pirates, bu):
832         print '{'
833         for pirate in pirates:
834                 info = PirateInfo(pirate)
835                 print '%s: %s,' % (`pirate`, info)
836         print '}'
837
838 def prep_crew_of(args, bu, max_age=300):
839         if len(args) != 1: bu('crew-of takes one pirate name')
840         pi = PirateInfo(args[0], max_age)
841         if pi.crew is None: return None
842         return CrewInfo(pi.crew[0], max_age)
843
844 def do_crew_of(args, bu):
845         ci = prep_crew_of(args, bu)
846         print ci
847
848 def do_standings_crew_of(args, bu):
849         ci = prep_crew_of(args, bu, 60)
850         tab = StandingsTable()
851         tab.headings()
852         for (rank, members) in ci.crew:
853                 if not members: continue
854                 tab.literalline('%s:' % rank)
855                 for p in members:
856                         pi = PirateInfo(p, random.randint(900,1800))
857                         tab.pirate(pi)
858         print tab.results()
859
860 class ProgressPrintPercentage:
861         def __init__(self, f=sys.stdout):
862                 self._f = f
863         def progress_string(self,done,total):
864                 return "scan chat logs %3d%%\r" % ((done*100) / total)
865         def progress(self,*a):
866                 self._f.write(self.progress_string(*a))
867                 self._f.flush()
868         def show_init(self, pirate, ocean):
869                 print >>self._f, 'Starting up, %s on the %s ocean' % (
870                         pirate, ocean)
871         def caughtup(self):
872                 self._f.write('                   \r')
873                 self._f.flush()
874
875 #----- modes which use the chat log parser are quite complex -----
876
877 def prep_chat_log(args, bu,
878                 progress=ProgressPrintPercentage(),
879                 max_myself_age=3600):
880         if len(args) != 1: bu('this action takes only chat log filename')
881         logfn = args[0]
882         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
883         match = regexp.match(logfn_re, logfn)
884         if not match: bu('chat log filename is not in expected format')
885         (pirate, ocean) = match.groups()
886         fetcher.default_ocean(ocean)
887
888         progress.show_init(pirate, fetcher.ocean)
889         myself = PirateInfo(pirate,max_myself_age)
890         track = ChatLogTracker(myself, logfn)
891
892         opts.debug -= 2
893         track.catchup(progress)
894         opts.debug += 2
895
896         track.force_redisplay()
897
898         return (myself, track)
899
900 def do_track_chat_log(args, bu):
901         (myself, track) = prep_chat_log(args, bu)
902         while True:
903                 track.catchup()
904                 if track.changed():
905                         print track
906                 sleep(1)
907
908 #----- ship management aid -----
909
910 class Display_dumb(ProgressPrintPercentage):
911         def __init__(self):
912                 ProgressPrintPercentage.__init__(self)
913         def show(self, s):
914                 print '\n\n', s;
915         def realstart(self):
916                 pass
917
918 class Display_overwrite(ProgressPrintPercentage):
919         def __init__(self):
920                 ProgressPrintPercentage.__init__(self)
921
922                 null = file('/dev/null','w')
923                 curses.setupterm(fd=null.fileno())
924
925                 self._clear = curses.tigetstr('clear')
926                 if not self._clear:
927                         self._debug('missing clear!')
928                         self.show = Display_dumb.show
929                         return
930
931                 self._t = {'el':'', 'ed':''}
932                 if not self._init_sophisticated():
933                         for k in self._t.keys(): self._t[k] = ''
934                         self._t['ho'] = self._clear
935
936         def _debug(self,m): debug('display overwrite: '+m)
937
938         def _init_sophisticated(self):
939                 for k in self._t.keys():
940                         s = curses.tigetstr(k)
941                         self._t[k] = s
942                 self._t['ho'] = curses.tigetstr('ho')
943                 if not self._t['ho']:
944                         cup = curses.tigetstr('cup')
945                         self._t['ho'] = curses.tparm(cup,0,0)
946                 missing = [k for k in self._t.keys() if not self._t[k]]
947                 if missing:
948                         self.debug('missing '+(' '.join(missing)))
949                         return 0
950                 return 1
951
952         def show(self, s):
953                 w = sys.stdout.write
954                 def wti(k): w(self._t[k])
955
956                 wti('ho')
957                 nl = ''
958                 for l in s.rstrip().split('\n'):
959                         w(nl)
960                         w(l)
961                         wti('el')
962                         nl = '\r\n'
963                 wti('ed')
964                 w(' ')
965                 sys.stdout.flush()
966
967         def realstart(self):
968                 sys.stdout.write(self._clear)
969                 sys.stdout.flush()
970                         
971
972 def do_ship_aid(args, bu):
973         if opts.ship_duty is None: opts.ship_duty = True
974
975         displayer = globals()['Display_'+opts.display]()
976         rotate_nya = '/-\\'
977
978         (myself, track) = prep_chat_log(args, bu, progress=displayer)
979
980         def timeevent(t,e):
981                 if t is None: return ' ' * 22
982                 return " %-4s %-16s" % (format_time_interval(now - t),e)
983
984         displayer.realstart()
985
986         def find_vessel():
987                 vn = track.vesselname()
988                 if vn: return (vn, " on board the %s" % vn)
989                 vn = track.lastvesselname()
990                 if vn: return (vn, " ashore from the %s" % vn)
991                 return (None, " not on a vessel")
992
993         displayer.show(track.myname() + find_vessel()[1] + '...')
994
995         while True:
996                 track.catchup()
997                 now = time.time()
998
999                 (vn, s) = find_vessel()
1000                 s = track.myname() + s
1001                 s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")
1002
1003                 tbl = StandingsTable()
1004                 tbl.headings()
1005
1006                 aboard = track.aboard(vn)
1007
1008                 for pa in aboard:
1009                         pi = pa.pirate_info()
1010
1011                         xs = ''
1012                         if pa.gunner: xs += 'G '
1013                         else: xs += '  '
1014                         xs += timeevent(pa.last_time, pa.last_event)
1015                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
1016
1017                         if pi is None:
1018                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1019                         else:
1020                                 tbl.pirate(pi, xs)
1021
1022                 s += tbl.results()
1023
1024                 displayer.show(s)
1025                 sleep(1)
1026                 rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1027
1028 #---------- main program ----------
1029
1030 def main():
1031         global opts, fetcher
1032
1033         pa = OptionParser(
1034 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1035 actions:
1036  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1037  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1038  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1039  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1040  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1041
1042 display modes (for --display) apply to ship-aid:
1043  --display=dumb       just print new information, scrolling the screen
1044  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
1045 ''')
1046         ao = pa.add_option
1047         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1048                 help='select ocean OCEAN')
1049         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1050                 default='~/.yoweb-scrape-cache',
1051                 help='cache yoweb pages in DIR')
1052         ao('-D','--debug', action='count', dest='debug', default=0,
1053                 help='enable debugging output')
1054         ao('--debug-fd', type='int', dest='debug_fd',
1055                 help='write any debugging output to specified fd')
1056         ao('-q','--quiet', action='store_true', dest='quiet',
1057                 help='suppress warning output')
1058         ao('--display', action='store', dest='display',
1059                 type='choice', choices=['dumb','overwrite'],
1060                 help='how to display ship aid')
1061
1062         ao('--ship-duty', action='store_true', dest='ship_duty',
1063                 help='show ship duty station puzzles')
1064         ao('--all-puzzles', action='store_false', dest='ship_duty',
1065                 help='show all puzzles, not just ship duty stations')
1066
1067         (opts,args) = pa.parse_args()
1068         random.seed()
1069
1070         if len(args) < 1:
1071                 pa.error('need a mode argument')
1072
1073         if opts.debug_fd is not None:
1074                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1075         else:
1076                 opts.debug_file = sys.stdout
1077
1078         mode = args[0]
1079         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1080         try: mode_fn = globals()[mode_fn_name]
1081         except KeyError: pa.error('unknown mode "%s"' % mode)
1082
1083         # fixed parameters
1084         opts.min_max_age = 60
1085         opts.expire_age = 3600
1086         opts.ship_reboard_clearout = 3600
1087
1088         if opts.cache_dir.startswith('~/'):
1089                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1090
1091         if opts.display is None:
1092                 if ((opts.debug > 0 and opts.debug_fd is None)
1093                     or not os.isatty(sys.stdout.fileno())):
1094                         opts.display = 'dumb'
1095                 else:
1096                         opts.display = 'overwrite'
1097
1098         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1099
1100         mode_fn(args[1:], pa.error)
1101
1102 main()