chiark / gitweb /
947e5e4c05d3089ee441a1cbf694c5c5e69d15f3
[ypp-sc-tools.db-live.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 from optparse import OptionParser
18
19 from BeautifulSoup import BeautifulSoup
20
21 opts = None
22
23 #---------- YPP parameters and arrays ----------
24
25 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
26         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
27         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
28         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
29
30 standingvals = ('Able/Distinguished/Respected/Master'+
31                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
32
33 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
34
35 max_pirate_namelen = 12
36
37
38 #---------- general utilities ----------
39
40 def debug(m):
41         if opts.debug > 0:
42                 print >>opts.debug_file, m
43
44 def sleep(seconds):
45         if opts.debug > 0:
46                 opts.debug_file.flush()
47         time.sleep(seconds)
48
49 def format_time_interval(ti):
50         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
51         if ti < 7200: return '%2dm' % (ti / 60)
52         if ti < 86400: return '%dh' % (ti / 3600)
53         return '%dd' % (ti / 86400)
54
55 #---------- caching and rate-limiting data fetcher ----------
56
57 class Fetcher:
58         def __init__(self, ocean, cachedir):
59                 debug('Fetcher init %s' % cachedir)
60                 self.ocean = ocean
61                 self.cachedir = cachedir
62                 try: os.mkdir(cachedir)
63                 except (OSError,IOError), oe:
64                         if oe.errno != errno.EEXIST: raise
65                 self._cache_scan(time.time())
66
67         def default_ocean(self, ocean='ice'):
68                 if self.ocean is None:
69                         self.ocean = ocean
70
71         def _cache_scan(self, now):
72                 # returns list of ages, unsorted
73                 ages = []
74                 debug('Fetcher   scan_cache')
75                 for leaf in os.listdir(self.cachedir):
76                         if not leaf.startswith('#'): continue
77                         path = self.cachedir + '/' + leaf
78                         try: s = os.stat(path)
79                         except (OSError,IOError), oe:
80                                 if oe.errno != errno.ENOENT: raise
81                                 continue
82                         age = now - s.st_mtime
83                         if age > opts.expire_age:
84                                 debug('Fetcher    expire %d %s' % (age, path))
85                                 try: os.remove(path)
86                                 except (OSError,IOError), oe:
87                                         if oe.errno != errno.ENOENT: raise
88                                 continue
89                         ages.append(age)
90                 return ages
91
92         def need_wait(self, now):
93                 ages = self._cache_scan(now)
94                 ages.sort()
95                 debug('Fetcher   ages ' + `ages`)
96                 min_age = 1
97                 need_wait = 0
98                 for age in ages:
99                         if age < min_age and age < 300:
100                                 debug('Fetcher   morewait min=%d age=%d' %
101                                         (min_age, age))
102                                 need_wait = max(need_wait, min_age - age)
103                         min_age += 3
104                         min_age *= 1.25
105                 return need_wait
106
107         def _rate_limit_cache_clean(self, now):
108                 need_wait = self.need_wait(now)
109                 if need_wait > 0:
110                         debug('Fetcher   wait %d' % need_wait)
111                         sleep(need_wait)
112
113         def fetch(self, url, max_age):
114                 debug('Fetcher fetch %s' % url)
115                 cache_corename = urllib.quote_plus(url)
116                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
117                 try: f = file(cache_item, 'r')
118                 except (OSError,IOError), oe:
119                         if oe.errno != errno.ENOENT: raise
120                         f = None
121                 now = time.time()
122                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
123                 if f is not None:
124                         s = os.fstat(f.fileno())
125                         age = now - s.st_mtime
126                         if age > max_age:
127                                 debug('Fetcher  stale %d < %d'% (max_age, age))
128                                 f = None
129                 if f is not None:
130                         data = f.read()
131                         f.close()
132                         debug('Fetcher  cached %d > %d' % (max_age, age))
133                         return data
134
135                 debug('Fetcher  fetch')
136                 self._rate_limit_cache_clean(now)
137
138                 stream = urllib2.urlopen(url)
139                 data = stream.read()
140                 cache_tmp = "%s/#%s~%d#" % (
141                         self.cachedir, cache_corename, os.getpid())
142                 f = file(cache_tmp, 'w')
143                 f.write(data)
144                 f.close()
145                 os.rename(cache_tmp, cache_item)
146                 debug('Fetcher  stored')
147                 return data
148
149         def yoweb(self, kind, tail, max_age):
150                 self.default_ocean()
151                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
152                         self.ocean, kind, tail)
153                 return self.fetch(url, max_age)
154
155 #---------- logging assistance for troubled screenscrapers ----------
156
157 class SoupLog:
158         def __init__(self):
159                 self.msgs = [ ]
160         def msg(self, m):
161                 self.msgs.append(m)
162         def soupm(self, obj, m):
163                 self.msg(m + '; in ' + `obj`)
164         def needs_msgs(self, child_souplog):
165                 self.msgs += child_souplog.msgs
166                 child_souplog.msgs = [ ]
167
168 def soup_text(obj):
169         str = ''.join(obj.findAll(text=True))
170         return str.strip()
171
172 class SomethingSoupInfo(SoupLog):
173         def __init__(self, kind, tail, max_age):
174                 SoupLog.__init__(self)
175                 html = fetcher.yoweb(kind, tail, max_age)
176                 self._soup = BeautifulSoup(html,
177                         convertEntities=BeautifulSoup.HTML_ENTITIES
178                         )
179
180 #---------- scraper for pirate pages ----------
181
182 class PirateInfo(SomethingSoupInfo):
183         # Public data members:
184         #  pi.standings = { 'Treasure Haul': 'Able' ... }
185         #  pi.name = name
186         #  pi.crew = (id, name)
187         #  pi.flag = (id, name)
188         #  pi.msgs = [ 'message describing problem with scrape' ]
189                 
190         def __init__(self, pirate, max_age=300):
191                 SomethingSoupInfo.__init__(self,
192                         'pirate.wm?target=', pirate, max_age)
193                 self.name = pirate
194                 self._find_standings()
195                 self.crew = self._find_crewflag('crew',
196                         '^/yoweb/crew/info\\.wm')
197                 self.flag = self._find_crewflag('flag',
198                         '^/yoweb/flag/info\\.wm')
199
200         def _find_standings(self):
201                 imgs = self._soup.findAll('img',
202                         src=regexp.compile('/yoweb/images/stat.*'))
203                 re = regexp.compile(
204 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
205                         )
206                 standings = { }
207
208                 for skill in puzzles:
209                         standings[skill] = [ ]
210
211                 skl = SoupLog()
212
213                 for img in imgs:
214                         try: puzzle = img['alt']
215                         except KeyError: continue
216
217                         if not puzzle in puzzles:
218                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
219                                 continue
220                         key = img.findParent('td')
221                         if key is None:
222                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
223                                 continue
224                         valelem = key.findNextSibling('td')
225                         if valelem is None:
226                                 skl.soupm(key, 'puzzle missing sibling "%s"'
227                                         % puzzle)
228                                 continue
229                         valstr = soup_text(valelem)
230                         match = re.match(valstr)
231                         if match is None:
232                                 skl.soupm(key, ('puzzle "%s" unparseable'+
233                                         ' standing "%s"') % (puzzle, valstr))
234                                 continue
235                         standing = match.group(match.lastindex)
236                         standings[puzzle].append(standing)
237
238                 self.standings = { }
239
240                 for puzzle in puzzles:
241                         sl = standings[puzzle]
242                         if len(sl) > 1:
243                                 skl.msg('puzzle "%s" multiple standings %s' %
244                                                 (puzzle, `sl`))
245                                 continue
246                         if not sl:
247                                 skl.msg('puzzle "%s" no standing found' % puzzle)
248                                 continue
249                         standing = sl[0]
250                         for i in range(0, len(standingvals)-1):
251                                 if standing == standingvals[i]:
252                                         self.standings[puzzle] = i
253                         if not puzzle in self.standings:
254                                 skl.msg('puzzle "%s" unknown standing "%s"' %
255                                         (puzzle, standing))
256
257                 all_standings_ok = True
258                 for puzzle in puzzles:
259                         if not puzzle in self.standings:
260                                 self.needs_msgs(skl)
261
262         def _find_crewflag(self, cf, yoweb_re):
263                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
264                 if len(things) != 1:
265                         self.msg('zero or several %s id references found' % cf)
266                         return None
267                 thing = things[0]
268                 id_re = '\\b%sid\\=(\\w+)$' % cf
269                 id_haystack = thing['href']
270                 match = regexp.compile(id_re).search(id_haystack)
271                 if match is None:
272                         self.soupm(thing, ('incomprehensible %s id ref'+
273                                 ' (%s in %s)') % (cf, id_re, id_haystack))
274                         return None
275                 name = soup_text(thing)
276                 return (match.group(1), name)
277
278         def __str__(self):
279                 return `(self.crew, self.flag, self.standings, self.msgs)`
280
281 #---------- scraper for crew pages ----------
282
283 class CrewInfo(SomethingSoupInfo):
284         # Public data members:
285         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
286         #              ('Senior Officer', [...]),
287         #               ... ]
288         #  pi.msgs = [ 'message describing problem with scrape' ]
289
290         def __init__(self, crewid, max_age=300):
291                 SomethingSoupInfo.__init__(self,
292                         'crew/info.wm?crewid=', crewid, max_age)
293                 self._find_crew()
294
295         def _find_crew(self):
296                 self.crew = []
297                 capts = self._soup.findAll('img',
298                         src='/yoweb/images/crew-captain.png')
299                 if len(capts) != 1:
300                         self.msg('crew members: no. of captain images != 1')
301                         return
302                 tbl = capts[0]
303                 while not tbl.find('a', href=pirate_ref_re):
304                         tbl = tbl.findParent('table')
305                         if not tbl:
306                                 self.msg('crew members: cannot find table')
307                                 return
308                 current_rank_crew = None
309                 crew_rank_re = regexp.compile('/yoweb/images/crew')
310                 for row in tbl.contents:
311                         # findAll(recurse=False)
312                         if isinstance(row,basestring):
313                                 continue
314
315                         is_rank = row.find('img', attrs={'src': crew_rank_re})
316                         if is_rank:
317                                 rank = soup_text(row)
318                                 current_rank_crew = []
319                                 self.crew.append((rank, current_rank_crew))
320                                 continue
321                         for cell in row.findAll('a', href=pirate_ref_re):
322                                 if current_rank_crew is None:
323                                         self.soupm(cell, 'crew members: crew'
324                                                 ' before rank')
325                                         continue
326                                 current_rank_crew.append(soup_text(cell))
327
328         def __str__(self):
329                 return `(self.crew, self.msgs)`
330
331 #---------- pretty-printer for tables of pirate puzzle standings ----------
332
333 class StandingsTable:
334         def __init__(self, use_puzzles=None, col_width=6):
335                 if use_puzzles is None:
336                         if opts.ship_duty:
337                                 use_puzzles=[
338                                         'Navigating','Battle Navigation',
339                                         'Gunning',
340                                         ['Sailing','Rigging'],
341                                         'Bilging',
342                                         'Carpentry',
343                                         'Treasure Haul'
344                                 ]
345                         else:
346                                 use_puzzles=puzzles
347                 self._puzzles = use_puzzles
348                 self.s = ''
349                 self._cw = col_width-1
350
351         def _pline(self, pirate, puzstrs, extra):
352                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
353                 for v in puzstrs:
354                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
355                 if extra:
356                         self.s += ' ' + extra
357                 self.s += '\n'
358
359         def _puzstr(self, pi, puzzle):
360                 if not isinstance(puzzle,list): puzzle = [puzzle]
361                 try: standing = max([pi.standings[p] for p in puzzle])
362                 except KeyError: return '?'
363                 if not standing: return ''
364                 s = ''
365                 if self._cw > 4:
366                         c1 = standingvals[standing][0]
367                         if standing < 3: c1 = c1.lower() # 3 = Master
368                         s += `standing`
369                 if self._cw > 5:
370                         s += ' '
371                 s += '*' * (standing / 2)
372                 s += '+' * (standing % 2)
373                 return s
374
375         def headings(self):
376                 def puzn_redact(name):
377                         if isinstance(name,list):
378                                 return '/'.join(
379                                         ["%.*s" % (self._cw/2, puzn_redact(n))
380                                          for n in name])
381                         spc = name.find(' ')
382                         if spc < 0: return name
383                         return name[0:min(4,spc)] + name[spc+1:]
384                 self._pline('', map(puzn_redact, self._puzzles), None)
385         def literalline(self, line):
386                 self.s += line + '\n'
387         def pirate_dummy(self, name, standingstring, extra=None):
388                 self._pline(name, standingstring * len(self._puzzles), extra)
389         def pirate(self, pi, extra=None):
390                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
391                 self._pline(pi.name, puzstrs, extra)
392
393         def results(self):
394                 return self.s
395
396 #---------- chat log parser ----------
397
398 class PirateAboard:
399         # This is essentially a transparent, dumb, data class.
400         #  pa.v
401         #  pa.name
402         #  pa.last_time
403         #  pa.last_event
404         #  pa.gunner
405         #  pa.last_chat_time
406         #  pa.last_chat_chan
407         #  pa.pi
408
409         def __init__(pa, pn, v, time, event):
410                 pa.name = pn
411                 pa.v = v
412                 pa.last_time = time
413                 pa.last_event = event
414                 pa.last_chat_time = None
415                 pa.last_chat_chan = None
416                 pa.gunner = False
417                 pa.pi = None
418
419         def pirate_info(pa):
420                 if not pa.pi and not fetcher.need_wait(time.time()):
421                         pa.pi = PirateInfo(pa.name, 3600)
422                 return pa.pi
423
424 class ChatLogTracker:
425         # This is quite complex so we make it opaque.  Use the
426         # official invokers, accessors etc.
427
428         def __init__(self, myself_pi, logfn):
429                 self._pl = {}   # self._pl['Pirate'] =
430                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
431                                 # self._vl['Vessel']['#lastinfo']
432                                 # self._vl['Vessel']['#name']
433                                 # self._v = self._vl[self._vessel]
434                 self._date = None
435                 self._myself = myself_pi
436                 self._f = file(logfn)
437                 self._lbuf = ''
438                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
439                 self._disembark_myself()
440                 self._need_redisplay = False
441
442         def _disembark_myself(self):
443                 self._v = None
444                 self._vessel = None
445                 self.force_redisplay()
446
447         def force_redisplay(self):
448                 self._need_redisplay = True
449
450         def _vessel_updated(self, v, timestamp):
451                 v['#lastinfo'] = timestamp
452                 self.force_redisplay()
453
454         def _onboard_event(self,v,timestamp,pirate,event):
455                 pa = self._pl.get(pirate, None)
456                 if pa is not None and pa.v is v:
457                         pa.last_time = timestamp
458                         pa.last_event = event
459                 else:
460                         if pa is not None: del pa.v[pirate]
461                         pa = PirateAboard(pirate, v, timestamp, event)
462                         self._pl[pirate] = pa
463                         v[pirate] = pa
464                 self._vessel_updated(v, timestamp)
465                 return pa
466
467         def _trash_vessel(self, v):
468                 for pn in v:
469                         if pn.startswith('#'): continue
470                         del self._pl[pn]
471                 vn = v['#name']
472                 del self._vl[vn]
473                 if v is self._v: self._disembark_myself()
474                 self.force_redisplay()
475
476         def _vessel_stale(self, v, timestamp):
477                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
478
479         def _vessel_check_expire(self, v, timestamp):
480                 if not self._vessel_stale(v, timestamp):
481                         return v
482                 self._debug_line_disposition(timestamp,'',
483                         'stale-reset ' + v['#name'])
484                 self._trash_vessel(v)
485                 return None
486
487         def expire_garbage(self, timestamp):
488                 for v in self._vl.values():
489                         self._vessel_check_expire(v, timestamp)
490
491         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
492                 v = self._vl.get(vn, None)
493                 if v is not None:
494                         v = self._vessel_check_expire(v, timestamp)
495                 if v is not None:
496                         dml.append('found')
497                         return v
498                 if not create:
499                         dml.append('no')
500                 dml.append('new')
501                 self._vl[vn] = v = { '#name': vn }
502                 self._vessel_updated(v, timestamp)
503                 return v
504
505         def _find_matching_vessel(self, pattern, timestamp, cmdr,
506                                         dml=[], create=False):
507                 # use when a commander pirate `cmdr' specified a vessel
508                 #  by name `pattern' (either may be None)
509                 # if create is true, will create the vessel
510                 #  record if an exact name is specified
511
512                 if (pattern is not None and
513                     not '*' in pattern
514                     and len(pattern.split(' ')) == 2):
515                         vn = pattern.title()
516                         dml.append('exact')
517                         return self._vessel_lookup(
518                                 vn, timestamp, dml=dml, create=create)
519
520                 if pattern is None:
521                         pattern_check = lambda vn: True
522                 else:
523                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.*')
524                         pattern_check = regexp.compile(re, regexp.I).match
525
526                 tries = []
527
528                 cmdr_pa = self._pl.get(cmdr, None)
529                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
530
531                 tries.append((self._v, 'here'))
532                 tried_vns = []
533
534                 for (v, dm) in tries:
535                         if v is None: dml.append(dm+'?'); continue
536                         
537                         vn = v['#name']
538                         if not pattern_check(vn):
539                                 tried_vns.append(vn)
540                                 dml.append(dm+'#')
541                                 continue
542
543                         dml.append(dm+'!')
544                         return v
545
546                 if pattern is not None and '*' in pattern:
547                         search = [
548                                 (vn,v)
549                                 for (vn,v) in self._vl.iteritems()
550                                 if not self._vessel_stale(v, timestamp)
551                                 if pattern_check(vn)
552                                 ]
553                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
554                         #       re,
555                         #       '/'.join(tried_vns),
556                         #       '/'.join([vn for (vn,v) in search])))
557
558                         if len(search)==1:
559                                 dml.append('one')
560                                 return search[0][1]
561                         elif search:
562                                 dml.append('many')
563                         else:
564                                 dml.append('none')
565
566         def _debug_line_disposition(self,timestamp,l,m):
567                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
568
569         def chatline(self,l):
570                 rm = lambda re: regexp.match(re,l)
571                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
572                 timestamp = None
573
574                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
575                 if m:
576                         self._date = [int(x) for x in m.groups()]
577                         self._previous_timestamp = None
578                         return d('date '+`self._date`)
579
580                 if self._date is None:
581                         return d('date unset')
582
583                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
584                 if not m:
585                         return d('no timestamp')
586
587                 while True:
588                         time_tuple = (self._date +
589                                       [int(x) for x in m.groups()] +
590                                       [-1,-1,-1])
591                         timestamp = time.mktime(time_tuple)
592                         if timestamp >= self._previous_timestamp: break
593                         self._date[2] += 1
594                         self._debug_line_disposition(timestamp,'',
595                                 'new date '+`self._date`)
596
597                 self._previous_timestamp = timestamp
598
599                 l = l[l.find(' ')+1:]
600
601                 def ob_x(pirate,event):
602                         return self._onboard_event(
603                                         self._v, timestamp, pirate, event)
604                 def ob1(did): ob_x(m.group(1), did); return d(did)
605                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
606
607                 def disembark(v, timestamp, pirate, event):
608                         self._onboard_event(
609                                         v, timestamp, pirate, 'leaving '+event)
610                         del v[pirate]
611                         del self._pl[pirate]
612
613                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
614                 if m:
615                         dm = ['boarding']
616                         pn = self._myself.name
617                         vn = m.group(1)
618                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
619                         self._vessel = vn
620                         self._v = v
621                         ob_x(pn, 'we boarded')
622                         self.expire_garbage(timestamp)
623                         return d(' '.join(dm))
624
625                 if self._v is None:
626                         return d('no vessel')
627
628                 m = rm('(\\w+) has come aboard\\.$')
629                 if m: return ob1('boarded');
630
631                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
632                 if m:
633                         (who,what) = m.groups()
634                         pa = ob_x(who,'ord '+what)
635                         if what == 'Gunning':
636                                 pa.gunner = True
637                         return d('duty order')
638
639                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
640                 if m: oba('stopped'); return d("end")
641
642                 def chat_core(speaker, chan):
643                         try: pa = self._pl[speaker]
644                         except KeyError: return 'mystery'
645                         if pa.v is not self._v: return 'elsewhere'
646                         pa.last_chat_time = timestamp
647                         pa.last_chat_chan = chan
648                         self.force_redisplay()
649                         return 'here'
650
651                 def chat(chan):
652                         speaker = m.group(1)
653                         dm = chat_core(speaker, chan)
654                         return d('chat %s %s' % (chan, dm))
655
656                 def chat_metacmd(chan):
657                         (cmdr, metacmd) = m.groups()
658                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
659                         m2 = regexp.match(
660                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
661                                 metacmd)
662                         if not m2: return chat(chan)
663
664                         (cmd, pattern, targets) = m2.groups()
665                         dml = ['cmd', chan, cmd]
666
667                         if cmd == 'a': each = self._onboard_event
668                         else: each = disembark
669
670                         if cmdr == self._myself.name:
671                                 dml.append('self')
672                                 how = 'cmd: %s' % cmd
673                         else:
674                                 dml.append('other')
675                                 how = 'cmd: %s %s' % (cmd,cmdr)
676
677                         v = self._find_matching_vessel(
678                                 pattern, timestamp, cmdr, dml, create=True)
679
680                         if v is not None:
681                                 targets = targets.strip().split(' ')
682                                 dml.append(`len(targets)`)
683                                 for target in targets:
684                                         each(v, timestamp, target.title(), how)
685                                 self._vessel_updated(v, timestamp)
686
687                         dm = ' '.join(dml)
688                         chat_core(cmdr, 'cmd '+chan)
689                         return d(dm)
690
691                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
692                 if m: return ob1('general order');
693
694                 m = rm('(\\w+) says, "')
695                 if m: return chat('public')
696
697                 m = rm('(\\w+) tells ye, "')
698                 if m: return chat('private')
699
700                 m = rm('Ye told (\\w+), "(.*)"$')
701                 if m: return chat_metacmd('private')
702
703                 m = rm('(\\w+) flag officer chats, "')
704                 if m: return chat('flag officer')
705
706                 m = rm('(\\w+) officer chats, "(.*)"$')
707                 if m: return chat_metacmd('officer')
708
709                 m = rm('Ye accepted the offer to job with ')
710                 if m:
711                         self._disembark_myself()
712                         return d('taking-job')
713
714                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
715                 if m:
716                         pl = m.group(1).split(', ')
717                         if not self._myself.name in pl:
718                                 return d('lost boarding battle')
719                         for pn in pl:
720                                 if ' ' in pn: continue
721                                 ob_x(pn,'won boarding battle')
722                         return d('won boarding battle')
723
724                 m = rm('(\\w+) is eliminated\\!')
725                 if m: return ob1('eliminated in fray');
726
727                 m = rm('(\\w+) has left the vessel\.')
728                 if m:
729                         pirate = m.group(1)
730                         disembark(self._v, timestamp, pirate, 'disembarked')
731                         return d('disembarked')
732
733                 return d('not-matched')
734
735         def _str_vessel(self, vn, v):
736                 s = ' vessel %s\n' % vn
737                 s += ' '*20 + "%-*s   %13s\n" % (
738                                 max_pirate_namelen, '#lastinfo',
739                                 v['#lastinfo'])
740                 assert v['#name'] == vn
741                 for pn in sorted(v.keys()):
742                         if pn.startswith('#'): continue
743                         pa = v[pn]
744                         assert pa.v == v
745                         assert self._pl[pn] == pa
746                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
747                                 (' ','G')[pa.gunner],
748                                 max_pirate_namelen, pn,
749                                 pa.last_time, pa.last_event,
750                                 pa.last_chat_time, pa.last_chat_chan)
751                 return s
752
753         def __str__(self):
754                 s = '''<ChatLogTracker
755  myself %s
756  vessel %s
757 '''                     % (self._myself.name, self._vessel)
758                 assert ((self._v is None and self._vessel is None) or
759                         (self._v is self._vl[self._vessel]))
760                 if self._vessel is not None:
761                         s += self._str_vessel(self._vessel, self._v)
762                 for vn in sorted(self._vl.keys()):
763                         if vn == self._vessel: continue
764                         s += self._str_vessel(vn, self._vl[vn])
765                 for p in self._pl:
766                         pa = self._pl[p]
767                         assert pa.v[p] is pa
768                         assert pa.v in self._vl.values()
769                 s += '>\n'
770                 return s
771
772         def catchup(self, progress=None):
773                 while True:
774                         more = self._f.readline()
775                         if not more: break
776
777                         self._progress[0] += len(more)
778                         if progress: progress.progress(*self._progress)
779
780                         self._lbuf += more
781                         if self._lbuf.endswith('\n'):
782                                 self.chatline(self._lbuf.rstrip())
783                                 self._lbuf = ''
784                                 if opts.debug >= 2:
785                                         debug(self.__str__())
786                 if progress: progress.caughtup()
787
788         def changed(self):
789                 rv = self._need_redisplay
790                 self._need_redisplay = False
791                 return rv
792         def myname(self):
793                 # returns our pirate name
794                 return self._myself.name
795         def vessel(self):
796                 # returns the vessel we're aboard or None
797                 return self._vessel
798         def aboard(self):
799                 # returns a list of PirateAboard sorted by name
800                 if self._v is None: return []
801                 return [ self._v[pn]
802                          for pn in sorted(self._v.keys())
803                          if not pn.startswith('#') ]
804
805 #---------- implementations of actual operation modes ----------
806
807 def do_pirate(pirates, bu):
808         print '{'
809         for pirate in pirates:
810                 info = PirateInfo(pirate)
811                 print '%s: %s,' % (`pirate`, info)
812         print '}'
813
814 def prep_crew_of(args, bu, max_age=300):
815         if len(args) != 1: bu('crew-of takes one pirate name')
816         pi = PirateInfo(args[0], max_age)
817         if pi.crew is None: return None
818         return CrewInfo(pi.crew[0], max_age)
819
820 def do_crew_of(args, bu):
821         ci = prep_crew_of(args, bu)
822         print ci
823
824 def do_standings_crew_of(args, bu):
825         ci = prep_crew_of(args, bu, 60)
826         tab = StandingsTable()
827         tab.headings()
828         for (rank, members) in ci.crew:
829                 if not members: continue
830                 tab.literalline('%s:' % rank)
831                 for p in members:
832                         pi = PirateInfo(p, random.randint(900,1800))
833                         tab.pirate(pi)
834         print tab.results()
835
836 class ProgressPrintPercentage:
837         def __init__(self, f=sys.stdout):
838                 self._f = f
839         def progress_string(self,done,total):
840                 return "scan chat logs %3d%%\r" % ((done*100) / total)
841         def progress(self,*a):
842                 self._f.write(self.progress_string(*a))
843                 self._f.flush()
844         def show_init(self, pirate, ocean):
845                 print >>self._f, 'Starting up, %s on the %s ocean' % (
846                         pirate, ocean)
847         def caughtup(self):
848                 self._f.write('                   \r')
849                 self._f.flush()
850
851 #----- modes which use the chat log parser are quite complex -----
852
853 def prep_chat_log(args, bu,
854                 progress=ProgressPrintPercentage(),
855                 max_myself_age=3600):
856         if len(args) != 1: bu('this action takes only chat log filename')
857         logfn = args[0]
858         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
859         match = regexp.match(logfn_re, logfn)
860         if not match: bu('chat log filename is not in expected format')
861         (pirate, ocean) = match.groups()
862         fetcher.default_ocean(ocean)
863         
864         myself = PirateInfo(pirate,max_myself_age)
865         progress.show_init(pirate, fetcher.ocean)
866         track = ChatLogTracker(myself, logfn)
867
868         opts.debug -= 2
869         track.catchup(progress)
870         opts.debug += 2
871
872         track.force_redisplay()
873
874         return (myself, track)
875
876 def do_track_chat_log(args, bu):
877         (myself, track) = prep_chat_log(args, bu)
878         while True:
879                 track.catchup()
880                 if track.changed():
881                         print track
882                 sleep(1)
883
884 #----- ship management aid -----
885
886 class Display_dumb(ProgressPrintPercentage):
887         def __init__(self):
888                 ProgressPrintPercentage.__init__(self)
889         def show(self, s):
890                 print '\n\n', s;
891         def realstart(self):
892                 pass
893
894 class Display_overwrite(ProgressPrintPercentage):
895         def __init__(self):
896                 ProgressPrintPercentage.__init__(self)
897
898                 null = file('/dev/null','w')
899                 curses.setupterm(fd=null.fileno())
900
901                 self._clear = curses.tigetstr('clear')
902                 if not self._clear:
903                         self._debug('missing clear!')
904                         self.show = Display_dumb.show
905                         return
906
907                 self._t = {'el':'', 'ed':''}
908                 if not self._init_sophisticated():
909                         for k in self._t.keys(): self._t[k] = ''
910                         self._t['ho'] = self._clear
911
912         def _debug(self,m): debug('display overwrite: '+m)
913
914         def _init_sophisticated(self):
915                 for k in self._t.keys():
916                         s = curses.tigetstr(k)
917                         self._t[k] = s
918                 self._t['ho'] = curses.tigetstr('ho')
919                 if not self._t['ho']:
920                         cup = curses.tigetstr('cup')
921                         self._t['ho'] = curses.tparm(cup,0,0)
922                 missing = [k for k in self._t.keys() if not self._t[k]]
923                 if missing:
924                         self.debug('missing '+(' '.join(missing)))
925                         return 0
926                 return 1
927
928         def show(self, s):
929                 w = sys.stdout.write
930                 def wti(k): w(self._t[k])
931
932                 wti('ho')
933                 nl = ''
934                 for l in s.rstrip().split('\n'):
935                         w(nl)
936                         w(l)
937                         wti('el')
938                         nl = '\r\n'
939                 wti('ed')
940                 w(' ')
941                 sys.stdout.flush()
942
943         def realstart(self):
944                 sys.stdout.write(self._clear)
945                 sys.stdout.flush()
946                         
947
948 def do_ship_aid(args, bu):
949         if opts.ship_duty is None: opts.ship_duty = True
950
951         displayer = globals()['Display_'+opts.display]()
952         rotate_nya = '/-\\'
953
954         (myself, track) = prep_chat_log(args, bu, progress=displayer)
955
956         def timeevent(t,e):
957                 if t is None: return ' ' * 22
958                 return " %-4s %-16s" % (format_time_interval(now - t),e)
959
960         displayer.realstart()
961
962         while True:
963                 track.catchup()
964                 now = time.time()
965
966                 s = "%s" % track.myname()
967
968                 vn = track.vessel()
969                 if vn is None: s += " not on a vessel"
970                 else: s += " on board the %s" % vn
971                 s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")
972
973                 tbl = StandingsTable()
974                 tbl.headings()
975
976                 for pa in track.aboard():
977                         pi = pa.pirate_info()
978
979                         xs = ''
980                         if pa.gunner: xs += 'G '
981                         else: xs += '  '
982                         xs += timeevent(pa.last_time, pa.last_event)
983                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
984
985                         if pi is None:
986                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
987                         else:
988                                 tbl.pirate(pi, xs)
989
990                 s += tbl.results()
991
992                 displayer.show(s)
993                 sleep(1)
994                 rotate_nya = rotate_nya[1:3] + rotate_nya[0]
995
996 #---------- main program ----------
997
998 def main():
999         global opts, fetcher
1000
1001         pa = OptionParser(
1002 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1003 actions:
1004  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1005  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1006  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1007  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1008  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1009
1010 display modes (for --display) apply to ship-aid:
1011  --display=dumb       just print new information, scrolling the screen
1012  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
1013 ''')
1014         ao = pa.add_option
1015         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1016                 help='select ocean OCEAN')
1017         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1018                 default='~/.yoweb-scrape-cache',
1019                 help='cache yoweb pages in DIR')
1020         ao('-D','--debug', action='count', dest='debug', default=0,
1021                 help='enable debugging output')
1022         ao('--debug-fd', type='int', dest='debug_fd',
1023                 help='write any debugging output to specified fd')
1024         ao('-q','--quiet', action='store_true', dest='quiet',
1025                 help='suppress warning output')
1026         ao('--display', action='store', dest='display',
1027                 type='choice', choices=['dumb','overwrite'],
1028                 help='how to display ship aid')
1029
1030         ao('--ship-duty', action='store_true', dest='ship_duty',
1031                 help='show ship duty station puzzles')
1032         ao('--all-puzzles', action='store_false', dest='ship_duty',
1033                 help='show all puzzles, not just ship duty stations')
1034
1035         (opts,args) = pa.parse_args()
1036         random.seed()
1037
1038         if len(args) < 1:
1039                 pa.error('need a mode argument')
1040
1041         if opts.debug_fd is not None:
1042                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1043         else:
1044                 opts.debug_file = sys.stdout
1045
1046         mode = args[0]
1047         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1048         try: mode_fn = globals()[mode_fn_name]
1049         except KeyError: pa.error('unknown mode "%s"' % mode)
1050
1051         # fixed parameters
1052         opts.min_max_age = 60
1053         opts.expire_age = 3600
1054         opts.ship_reboard_clearout = 3600
1055
1056         if opts.cache_dir.startswith('~/'):
1057                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1058
1059         if opts.display is None:
1060                 if ((opts.debug > 0 and opts.debug_fd is None)
1061                     or not os.isatty(sys.stdout.fileno())):
1062                         opts.display = 'dumb'
1063                 else:
1064                         opts.display = 'overwrite'
1065
1066         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1067
1068         mode_fn(args[1:], pa.error)
1069
1070 main()