chiark / gitweb /
4369cab5a99432ede79e1ddc4b02c445785f7837
[ypp-sc-tools.main.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 import termios
18 from optparse import OptionParser
19
20 from BeautifulSoup import BeautifulSoup
21
22 opts = None
23
24 #---------- YPP parameters and arrays ----------
25
26 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
27         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
28         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
29         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
30
31 core_duty_puzzles = [
32                 'Gunning',
33                 ['Sailing','Rigging'],
34                 'Bilging',
35                 'Carpentry',
36                 ]
37
38 duty_puzzles = ([ 'Navigating', 'Battle Navigation' ] +
39                 core_duty_puzzles +
40                 [ 'Treasure Haul' ])
41
42 standingvals = ('Able/Distinguished/Respected/Master'+
43                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
44 standing_limit = len(standingvals)
45
46 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
47
48 max_pirate_namelen = 12
49
50
51 #---------- general utilities ----------
52
53 def debug(m):
54         if opts.debug > 0:
55                 print >>opts.debug_file, m
56
57 def debug_flush():
58         if opts.debug > 0:
59                 opts.debug_file.flush() 
60
61 def sleep(seconds):
62         debug_flush()
63         time.sleep(seconds)
64
65 def format_time_interval(ti):
66         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
67         if ti < 7200: return '%2dm' % (ti / 60)
68         if ti < 86400: return '%dh' % (ti / 3600)
69         return '%dd' % (ti / 86400)
70
71 #---------- caching and rate-limiting data fetcher ----------
72
73 class Fetcher:
74         def __init__(self, ocean, cachedir):
75                 debug('Fetcher init %s' % cachedir)
76                 self.ocean = ocean
77                 self.cachedir = cachedir
78                 try: os.mkdir(cachedir)
79                 except (OSError,IOError), oe:
80                         if oe.errno != errno.EEXIST: raise
81                 self._cache_scan(time.time())
82
83         def default_ocean(self, ocean='ice'):
84                 if self.ocean is None:
85                         self.ocean = ocean
86
87         def _cache_scan(self, now):
88                 # returns list of ages, unsorted
89                 ages = []
90                 debug('Fetcher   scan_cache')
91                 for leaf in os.listdir(self.cachedir):
92                         if not leaf.startswith('#'): continue
93                         path = self.cachedir + '/' + leaf
94                         try: s = os.stat(path)
95                         except (OSError,IOError), oe:
96                                 if oe.errno != errno.ENOENT: raise
97                                 continue
98                         age = now - s.st_mtime
99                         if age > opts.expire_age:
100                                 debug('Fetcher    expire %d %s' % (age, path))
101                                 try: os.remove(path)
102                                 except (OSError,IOError), oe:
103                                         if oe.errno != errno.ENOENT: raise
104                                 continue
105                         ages.append(age)
106                 return ages
107
108         def need_wait(self, now, imaginary=[]):
109                 ages = self._cache_scan(now)
110                 ages += imaginary
111                 ages.sort()
112                 debug('Fetcher   ages ' + `ages`)
113                 min_age = 1
114                 need_wait = 0
115                 for age in ages:
116                         if age < min_age and age < 300:
117                                 debug('Fetcher   morewait min=%d age=%d' %
118                                         (min_age, age))
119                                 need_wait = max(need_wait, min_age - age)
120                         min_age += 3
121                         min_age *= 1.25
122                 return need_wait
123
124         def _rate_limit_cache_clean(self, now):
125                 need_wait = self.need_wait(now)
126                 if need_wait > 0:
127                         debug('Fetcher   wait %d' % need_wait)
128                         sleep(need_wait)
129
130         def fetch(self, url, max_age):
131                 debug('Fetcher fetch %s' % url)
132                 cache_corename = urllib.quote_plus(url)
133                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
134                 try: f = file(cache_item, 'r')
135                 except (OSError,IOError), oe:
136                         if oe.errno != errno.ENOENT: raise
137                         f = None
138                 now = time.time()
139                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
140                 if f is not None:
141                         s = os.fstat(f.fileno())
142                         age = now - s.st_mtime
143                         if age > max_age:
144                                 debug('Fetcher  stale %d < %d'% (max_age, age))
145                                 f = None
146                 if f is not None:
147                         data = f.read()
148                         f.close()
149                         debug('Fetcher  cached %d > %d' % (max_age, age))
150                         return data
151
152                 debug('Fetcher  fetch')
153                 self._rate_limit_cache_clean(now)
154
155                 stream = urllib2.urlopen(url)
156                 data = stream.read()
157                 cache_tmp = "%s/#%s~%d#" % (
158                         self.cachedir, cache_corename, os.getpid())
159                 f = file(cache_tmp, 'w')
160                 f.write(data)
161                 f.close()
162                 os.rename(cache_tmp, cache_item)
163                 debug('Fetcher  stored')
164                 return data
165
166         def yoweb(self, kind, tail, max_age):
167                 self.default_ocean()
168                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
169                         self.ocean, kind, tail)
170                 return self.fetch(url, max_age)
171
172 #---------- logging assistance for troubled screenscrapers ----------
173
174 class SoupLog:
175         def __init__(self):
176                 self.msgs = [ ]
177         def msg(self, m):
178                 self.msgs.append(m)
179         def soupm(self, obj, m):
180                 self.msg(m + '; in ' + `obj`)
181         def needs_msgs(self, child_souplog):
182                 self.msgs += child_souplog.msgs
183                 child_souplog.msgs = [ ]
184
185 def soup_text(obj):
186         str = ''.join(obj.findAll(text=True))
187         return str.strip()
188
189 class SomethingSoupInfo(SoupLog):
190         def __init__(self, kind, tail, max_age):
191                 SoupLog.__init__(self)
192                 html = fetcher.yoweb(kind, tail, max_age)
193                 self._soup = BeautifulSoup(html,
194                         convertEntities=BeautifulSoup.HTML_ENTITIES
195                         )
196
197 #---------- scraper for pirate pages ----------
198
199 class PirateInfo(SomethingSoupInfo):
200         # Public data members:
201         #  pi.standings = { 'Treasure Haul': 'Able' ... }
202         #  pi.name = name
203         #  pi.crew = (id, name)
204         #  pi.flag = (id, name)
205         #  pi.msgs = [ 'message describing problem with scrape' ]
206                 
207         def __init__(self, pirate, max_age=300):
208                 SomethingSoupInfo.__init__(self,
209                         'pirate.wm?target=', pirate, max_age)
210                 self.name = pirate
211                 self._find_standings()
212                 self.crew = self._find_crewflag('crew',
213                         '^/yoweb/crew/info\\.wm')
214                 self.flag = self._find_crewflag('flag',
215                         '^/yoweb/flag/info\\.wm')
216
217         def _find_standings(self):
218                 imgs = self._soup.findAll('img',
219                         src=regexp.compile('/yoweb/images/stat.*'))
220                 re = regexp.compile(
221 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
222                         )
223                 standings = { }
224
225                 for skill in puzzles:
226                         standings[skill] = [ ]
227
228                 skl = SoupLog()
229
230                 for img in imgs:
231                         try: puzzle = img['alt']
232                         except KeyError: continue
233
234                         if not puzzle in puzzles:
235                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
236                                 continue
237                         key = img.findParent('td')
238                         if key is None:
239                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
240                                 continue
241                         valelem = key.findNextSibling('td')
242                         if valelem is None:
243                                 skl.soupm(key, 'puzzle missing sibling "%s"'
244                                         % puzzle)
245                                 continue
246                         valstr = soup_text(valelem)
247                         match = re.match(valstr)
248                         if match is None:
249                                 skl.soupm(key, ('puzzle "%s" unparseable'+
250                                         ' standing "%s"') % (puzzle, valstr))
251                                 continue
252                         standing = match.group(match.lastindex)
253                         standings[puzzle].append(standing)
254
255                 self.standings = { }
256
257                 for puzzle in puzzles:
258                         sl = standings[puzzle]
259                         if len(sl) > 1:
260                                 skl.msg('puzzle "%s" multiple standings %s' %
261                                                 (puzzle, `sl`))
262                                 continue
263                         if not sl:
264                                 skl.msg('puzzle "%s" no standing found' % puzzle)
265                                 continue
266                         standing = sl[0]
267                         for i in range(0, standing_limit):
268                                 if standing == standingvals[i]:
269                                         self.standings[puzzle] = i
270                         if not puzzle in self.standings:
271                                 skl.msg('puzzle "%s" unknown standing "%s"' %
272                                         (puzzle, standing))
273
274                 all_standings_ok = True
275                 for puzzle in puzzles:
276                         if not puzzle in self.standings:
277                                 self.needs_msgs(skl)
278
279         def _find_crewflag(self, cf, yoweb_re):
280                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
281                 if len(things) != 1:
282                         self.msg('zero or several %s id references found' % cf)
283                         return None
284                 thing = things[0]
285                 id_re = '\\b%sid\\=(\\w+)$' % cf
286                 id_haystack = thing['href']
287                 match = regexp.compile(id_re).search(id_haystack)
288                 if match is None:
289                         self.soupm(thing, ('incomprehensible %s id ref'+
290                                 ' (%s in %s)') % (cf, id_re, id_haystack))
291                         return None
292                 name = soup_text(thing)
293                 return (match.group(1), name)
294
295         def __str__(self):
296                 return `(self.crew, self.flag, self.standings, self.msgs)`
297
298 #---------- scraper for crew pages ----------
299
300 class CrewInfo(SomethingSoupInfo):
301         # Public data members:
302         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
303         #              ('Senior Officer', [...]),
304         #               ... ]
305         #  pi.msgs = [ 'message describing problem with scrape' ]
306
307         def __init__(self, crewid, max_age=300):
308                 SomethingSoupInfo.__init__(self,
309                         'crew/info.wm?crewid=', crewid, max_age)
310                 self._find_crew()
311
312         def _find_crew(self):
313                 self.crew = []
314                 capts = self._soup.findAll('img',
315                         src='/yoweb/images/crew-captain.png')
316                 if len(capts) != 1:
317                         self.msg('crew members: no. of captain images != 1')
318                         return
319                 tbl = capts[0]
320                 while not tbl.find('a', href=pirate_ref_re):
321                         tbl = tbl.findParent('table')
322                         if not tbl:
323                                 self.msg('crew members: cannot find table')
324                                 return
325                 current_rank_crew = None
326                 crew_rank_re = regexp.compile('/yoweb/images/crew')
327                 for row in tbl.contents:
328                         # findAll(recurse=False)
329                         if isinstance(row,basestring):
330                                 continue
331
332                         is_rank = row.find('img', attrs={'src': crew_rank_re})
333                         if is_rank:
334                                 rank = soup_text(row)
335                                 current_rank_crew = []
336                                 self.crew.append((rank, current_rank_crew))
337                                 continue
338                         for cell in row.findAll('a', href=pirate_ref_re):
339                                 if current_rank_crew is None:
340                                         self.soupm(cell, 'crew members: crew'
341                                                 ' before rank')
342                                         continue
343                                 current_rank_crew.append(soup_text(cell))
344
345         def __str__(self):
346                 return `(self.crew, self.msgs)`
347
348 #---------- pretty-printer for tables of pirate puzzle standings ----------
349
350 class StandingsTable:
351         def __init__(self, use_puzzles=None, col_width=6, gap_every=5):
352                 if use_puzzles is None:
353                         if opts.ship_duty:
354                                 use_puzzles=duty_puzzles
355                         else:
356                                 use_puzzles=puzzles
357                 self._puzzles = use_puzzles
358                 self.s = ''
359                 self._cw = col_width-1
360                 self._gap_every = gap_every
361                 self._linecount = 0
362
363         def _pline(self, pirate, puzstrs, extra):
364                 if (self._linecount > 0
365                     and self._gap_every is not None
366                     and not (self._linecount % self._gap_every)):
367                         self.s += '\n'
368                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
369                 for v in puzstrs:
370                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
371                 if extra:
372                         self.s += ' ' + extra
373                 self.s += '\n'
374                 self._linecount += 1
375
376         def _puzstr(self, pi, puzzle):
377                 if not isinstance(puzzle,list): puzzle = [puzzle]
378                 try: standing = max([pi.standings[p] for p in puzzle])
379                 except KeyError: return '?'
380                 if not standing: return ''
381                 s = ''
382                 if self._cw > 4:
383                         c1 = standingvals[standing][0]
384                         if standing < 3: c1 = c1.lower() # 3 = Master
385                         s += `standing`
386                 if self._cw > 5:
387                         s += ' '
388                 s += '*' * (standing / 2)
389                 s += '+' * (standing % 2)
390                 return s
391
392         def headings(self, lhs='', rhs=None):
393                 def puzn_redact(name):
394                         if isinstance(name,list):
395                                 return '/'.join(
396                                         ["%.*s" % (self._cw/2, puzn_redact(n))
397                                          for n in name])
398                         spc = name.find(' ')
399                         if spc < 0: return name
400                         return name[0:min(4,spc)] + name[spc+1:]
401                 self._linecount = -2
402                 self._pline(lhs, map(puzn_redact, self._puzzles), rhs)
403                 self._linecount = 0
404         def literalline(self, line):
405                 self.s += line + '\n'
406                 self._linecount = 0
407         def pirate_dummy(self, name, standingstring, extra=None):
408                 self._pline(name, standingstring * len(self._puzzles), extra)
409         def pirate(self, pi, extra=None):
410                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
411                 self._pline(pi.name, puzstrs, extra)
412
413         def results(self):
414                 return self.s
415
416 #---------- chat log parser ----------
417
418 class PirateAboard:
419         # This is essentially a transparent, dumb, data class.
420         #  pa.v
421         #  pa.name
422         #  pa.last_time
423         #  pa.last_event
424         #  pa.gunner
425         #  pa.last_chat_time
426         #  pa.last_chat_chan
427         #  pa.pi
428
429         def __init__(pa, pn, v, time, event):
430                 pa.name = pn
431                 pa.v = v
432                 pa.last_time = time
433                 pa.last_event = event
434                 pa.last_chat_time = None
435                 pa.last_chat_chan = None
436                 pa.gunner = False
437                 pa.pi = None
438
439         def pirate_info(pa):
440                 now = time.time()
441                 if pa.pi:
442                         age = now - pa.pi_fetched
443                         guide = random.randint(120,240)
444                         if age <= guide:
445                                 return pa.pi
446                         debug('PirateAboard refresh %d > %d  %s' % (
447                                 age, guide, pa.name))
448                         imaginary = [2,6]
449                 else:
450                         imaginary = [1]
451                 wait = fetcher.need_wait(now, imaginary)
452                 if wait:
453                         debug('PirateAboard fetcher not ready %d' % wait)
454                         return pa.pi
455                 pa.pi = PirateInfo(pa.name, 600)
456                 pa.pi_fetched = now
457                 return pa.pi
458
459 class ChatLogTracker:
460         # This is quite complex so we make it opaque.  Use the
461         # official invokers, accessors etc.
462
463         def __init__(self, myself_pi, logfn):
464                 self._pl = {}   # self._pl['Pirate'] =
465                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
466                                 # self._vl['Vessel']['#lastinfo']
467                                 # self._vl['Vessel']['#name']
468                                 # self._v = self._vl[self._vessel]
469                 self._date = None
470                 self._myself = myself_pi
471                 self._f = file(logfn)
472                 self._lbuf = ''
473                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
474                 self._disembark_myself()
475                 self._need_redisplay = False
476                 self._lastvessel = None
477
478         def _disembark_myself(self):
479                 self._v = None
480                 self._vessel = None
481                 self.force_redisplay()
482
483         def force_redisplay(self):
484                 self._need_redisplay = True
485
486         def _vessel_updated(self, v, timestamp):
487                 v['#lastinfo'] = timestamp
488                 self.force_redisplay()
489
490         def _onboard_event(self,v,timestamp,pirate,event):
491                 pa = self._pl.get(pirate, None)
492                 if pa is not None and pa.v is v:
493                         pa.last_time = timestamp
494                         pa.last_event = event
495                 else:
496                         if pa is not None: del pa.v[pirate]
497                         pa = PirateAboard(pirate, v, timestamp, event)
498                         self._pl[pirate] = pa
499                         v[pirate] = pa
500                 self._vessel_updated(v, timestamp)
501                 return pa
502
503         def _trash_vessel(self, v):
504                 for pn in v:
505                         if pn.startswith('#'): continue
506                         del self._pl[pn]
507                 vn = v['#name']
508                 del self._vl[vn]
509                 if v is self._v: self._disembark_myself()
510                 self.force_redisplay()
511
512         def _vessel_stale(self, v, timestamp):
513                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
514
515         def _vessel_check_expire(self, v, timestamp):
516                 if not self._vessel_stale(v, timestamp):
517                         return v
518                 self._debug_line_disposition(timestamp,'',
519                         'stale-reset ' + v['#name'])
520                 self._trash_vessel(v)
521                 return None
522
523         def expire_garbage(self, timestamp):
524                 for v in self._vl.values():
525                         self._vessel_check_expire(v, timestamp)
526
527         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
528                 v = self._vl.get(vn, None)
529                 if v is not None:
530                         v = self._vessel_check_expire(v, timestamp)
531                 if v is not None:
532                         dml.append('found')
533                         return v
534                 if not create:
535                         dml.append('no')
536                 dml.append('new')
537                 self._vl[vn] = v = { '#name': vn }
538                 self._vessel_updated(v, timestamp)
539                 return v
540
541         def _find_matching_vessel(self, pattern, timestamp, cmdr,
542                                         dml=[], create=False):
543                 # use when a commander pirate `cmdr' specified a vessel
544                 #  by name `pattern' (either may be None)
545                 # if create is true, will create the vessel
546                 #  record if an exact name is specified
547
548                 if (pattern is not None and
549                     not '*' in pattern
550                     and len(pattern.split(' ')) == 2):
551                         vn = pattern.title()
552                         dml.append('exact')
553                         return self._vessel_lookup(
554                                 vn, timestamp, dml=dml, create=create)
555
556                 if pattern is None:
557                         pattern_check = lambda vn: True
558                 else:
559                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.+')
560                         pattern_check = regexp.compile(re, regexp.I).match
561
562                 tries = []
563
564                 cmdr_pa = self._pl.get(cmdr, None)
565                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
566
567                 tries.append((self._v, 'here'))
568                 tried_vns = []
569
570                 for (v, dm) in tries:
571                         if v is None: dml.append(dm+'?'); continue
572                         
573                         vn = v['#name']
574                         if not pattern_check(vn):
575                                 tried_vns.append(vn)
576                                 dml.append(dm+'#')
577                                 continue
578
579                         dml.append(dm+'!')
580                         return v
581
582                 if pattern is not None and '*' in pattern:
583                         search = [
584                                 (vn,v)
585                                 for (vn,v) in self._vl.iteritems()
586                                 if not self._vessel_stale(v, timestamp)
587                                 if pattern_check(vn)
588                                 ]
589                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
590                         #       re,
591                         #       '/'.join(tried_vns),
592                         #       '/'.join([vn for (vn,v) in search])))
593
594                         if len(search)==1:
595                                 dml.append('one')
596                                 return search[0][1]
597                         elif search:
598                                 dml.append('many')
599                         else:
600                                 dml.append('none')
601
602         def _debug_line_disposition(self,timestamp,l,m):
603                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
604
605         def chatline(self,l):
606                 rm = lambda re: regexp.match(re,l)
607                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
608                 timestamp = None
609
610                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
611                 if m:
612                         self._date = [int(x) for x in m.groups()]
613                         self._previous_timestamp = None
614                         return d('date '+`self._date`)
615
616                 if self._date is None:
617                         return d('date unset')
618
619                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
620                 if not m:
621                         return d('no timestamp')
622
623                 while True:
624                         time_tuple = (self._date +
625                                       [int(x) for x in m.groups()] +
626                                       [-1,-1,-1])
627                         timestamp = time.mktime(time_tuple)
628                         if timestamp >= self._previous_timestamp: break
629                         self._date[2] += 1
630                         self._debug_line_disposition(timestamp,'',
631                                 'new date '+`self._date`)
632
633                 self._previous_timestamp = timestamp
634
635                 l = l[l.find(' ')+1:]
636
637                 def ob_x(pirate,event):
638                         return self._onboard_event(
639                                         self._v, timestamp, pirate, event)
640                 def ob1(did): ob_x(m.group(1), did); return d(did)
641                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
642
643                 def disembark(v, timestamp, pirate, event):
644                         self._onboard_event(
645                                         v, timestamp, pirate, 'leaving '+event)
646                         del v[pirate]
647                         del self._pl[pirate]
648
649                 def disembark_me(why):
650                         self._disembark_myself()
651                         return d('disembark-me '+why)
652
653                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
654                 if m:
655                         dm = ['boarding']
656                         pn = self._myself.name
657                         vn = m.group(1)
658                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
659                         self._lastvessel = self._vessel = vn
660                         self._v = v
661                         ob_x(pn, 'we boarded')
662                         self.expire_garbage(timestamp)
663                         return d(' '.join(dm))
664
665                 if self._v is None:
666                         return d('no vessel')
667
668                 m = rm('(\\w+) has come aboard\\.$')
669                 if m: return ob1('boarded');
670
671                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
672                 if m:
673                         (who,what) = m.groups()
674                         pa = ob_x(who,'ord '+what)
675                         if what == 'Gunning':
676                                 pa.gunner = True
677                         return d('duty order')
678
679                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
680                 if m: oba('stopped'); return d("end")
681
682                 def chat_core(speaker, chan):
683                         try: pa = self._pl[speaker]
684                         except KeyError: return 'mystery'
685                         if pa.v is not self._v: return 'elsewhere'
686                         pa.last_chat_time = timestamp
687                         pa.last_chat_chan = chan
688                         self.force_redisplay()
689                         return 'here'
690
691                 def chat(chan):
692                         speaker = m.group(1)
693                         dm = chat_core(speaker, chan)
694                         return d('chat %s %s' % (chan, dm))
695
696                 def chat_metacmd(chan):
697                         (cmdr, metacmd) = m.groups()
698                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
699                         m2 = regexp.match(
700                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
701                                 metacmd)
702                         if not m2: return chat(chan)
703
704                         (cmd, pattern, targets) = m2.groups()
705                         dml = ['cmd', chan, cmd]
706
707                         if cmd == 'a': each = self._onboard_event
708                         else: each = disembark
709
710                         if cmdr == self._myself.name:
711                                 dml.append('self')
712                                 how = 'cmd: %s' % cmd
713                         else:
714                                 dml.append('other')
715                                 how = 'cmd: %s %s' % (cmd,cmdr)
716
717                         v = self._find_matching_vessel(
718                                 pattern, timestamp, cmdr, dml, create=True)
719
720                         if v is not None:
721                                 targets = targets.strip().split(' ')
722                                 dml.append(`len(targets)`)
723                                 for target in targets:
724                                         each(v, timestamp, target.title(), how)
725                                 self._vessel_updated(v, timestamp)
726
727                         dm = ' '.join(dml)
728                         chat_core(cmdr, 'cmd '+chan)
729                         return d(dm)
730
731                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
732                 if m: return ob1('general order');
733
734                 m = rm('(\\w+) says, "')
735                 if m: return chat('public')
736
737                 m = rm('(\\w+) tells ye, "')
738                 if m: return chat('private')
739
740                 m = rm('Ye told (\\w+), "(.*)"$')
741                 if m: return chat_metacmd('private')
742
743                 m = rm('(\\w+) flag officer chats, "')
744                 if m: return chat('flag officer')
745
746                 m = rm('(\\w+) officer chats, "(.*)"$')
747                 if m: return chat_metacmd('officer')
748
749                 m = rm('Ye accepted the offer to job with ')
750                 if m: return disembark_me('jobbing')
751
752                 m = rm('Ye hop on the ferry and are whisked away ')
753                 if m: return disembark_me('ferry')
754
755                 m = rm('Whisking away to yer home on the magical winds')
756                 if m: return disembark_me('home')
757
758                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
759                 if m:
760                         pl = m.group(1).split(', ')
761                         if not self._myself.name in pl:
762                                 return d('lost melee')
763                         for pn in pl:
764                                 if ' ' in pn: continue
765                                 ob_x(pn,'won melee')
766                         return d('won melee')
767
768                 m = rm('(\\w+) is eliminated\\!')
769                 if m: return ob1('eliminated in fray');
770
771                 m = rm('(\\w+) has driven \w+ from the ship\\!')
772                 if m: return ob1('boarder repelled');
773
774                 m = rm('\w+ has bested (\\w+), and turns'+
775                         ' to the rest of the ship\\.')
776                 if m: return ob1('boarder unrepelled');
777
778                 m = rm('(\\w+) has left the vessel\.')
779                 if m:
780                         pirate = m.group(1)
781                         disembark(self._v, timestamp, pirate, 'disembarked')
782                         return d('disembarked')
783
784                 return d('not-matched')
785
786         def _str_vessel(self, vn, v):
787                 s = ' vessel %s\n' % vn
788                 s += ' '*20 + "%-*s   %13s\n" % (
789                                 max_pirate_namelen, '#lastinfo',
790                                 v['#lastinfo'])
791                 assert v['#name'] == vn
792                 for pn in sorted(v.keys()):
793                         if pn.startswith('#'): continue
794                         pa = v[pn]
795                         assert pa.v == v
796                         assert self._pl[pn] == pa
797                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
798                                 (' ','G')[pa.gunner],
799                                 max_pirate_namelen, pn,
800                                 pa.last_time, pa.last_event,
801                                 pa.last_chat_time, pa.last_chat_chan)
802                 return s
803
804         def __str__(self):
805                 s = '''<ChatLogTracker
806  myself %s
807  vessel %s
808 '''                     % (self._myself.name, self._vessel)
809                 assert ((self._v is None and self._vessel is None) or
810                         (self._v is self._vl[self._vessel]))
811                 if self._vessel is not None:
812                         s += self._str_vessel(self._vessel, self._v)
813                 for vn in sorted(self._vl.keys()):
814                         if vn == self._vessel: continue
815                         s += self._str_vessel(vn, self._vl[vn])
816                 for p in self._pl:
817                         pa = self._pl[p]
818                         assert pa.v[p] is pa
819                         assert pa.v in self._vl.values()
820                 s += '>\n'
821                 return s
822
823         def catchup(self, progress=None):
824                 while True:
825                         more = self._f.readline()
826                         if not more: break
827
828                         self._progress[0] += len(more)
829                         if progress: progress.progress(*self._progress)
830
831                         self._lbuf += more
832                         if self._lbuf.endswith('\n'):
833                                 self.chatline(self._lbuf.rstrip())
834                                 self._lbuf = ''
835                                 if opts.debug >= 2:
836                                         debug(self.__str__())
837                 if progress: progress.caughtup()
838
839         def changed(self):
840                 rv = self._need_redisplay
841                 self._need_redisplay = False
842                 return rv
843         def myname(self):
844                 # returns our pirate name
845                 return self._myself.name
846         def vesselname(self):
847                 # returns the vessel name we're aboard or None
848                 return self._vessel
849         def lastvesselname(self):
850                 # returns the last vessel name we were aboard or None
851                 return self._lastvessel
852         def aboard(self, vesselname=True):
853                 # returns a list of PirateAboard the vessel
854                 #  sorted by pirate name
855                 #  you can pass this None and you'll get []
856                 #  or True for the current vessel (which is the default)
857                 #  the returned value is a fresh list of persistent
858                 #  PirateAboard objects
859                 if vesselname is True: v = self._v
860                 else: v = self._vl.get(vesselname.title())
861                 if v is None: return []
862                 return [ v[pn]
863                          for pn in sorted(v.keys())
864                          if not pn.startswith('#') ]
865
866 #---------- implementations of actual operation modes ----------
867
868 def do_pirate(pirates, bu):
869         print '{'
870         for pirate in pirates:
871                 info = PirateInfo(pirate)
872                 print '%s: %s,' % (`pirate`, info)
873         print '}'
874
875 def prep_crew_of(args, bu, max_age=300):
876         if len(args) != 1: bu('crew-of takes one pirate name')
877         pi = PirateInfo(args[0], max_age)
878         if pi.crew is None: return None
879         return CrewInfo(pi.crew[0], max_age)
880
881 def do_crew_of(args, bu):
882         ci = prep_crew_of(args, bu)
883         print ci
884
885 def do_standings_crew_of(args, bu):
886         ci = prep_crew_of(args, bu, 60)
887         tab = StandingsTable()
888         tab.headings()
889         for (rank, members) in ci.crew:
890                 if not members: continue
891                 tab.literalline('')
892                 tab.literalline('%s:' % rank)
893                 for p in members:
894                         pi = PirateInfo(p, random.randint(900,1800))
895                         tab.pirate(pi)
896         print tab.results()
897
898 class ProgressPrintPercentage:
899         def __init__(self, f=sys.stdout):
900                 self._f = f
901         def progress_string(self,done,total):
902                 return "scan chat logs %3d%%\r" % ((done*100) / total)
903         def progress(self,*a):
904                 self._f.write(self.progress_string(*a))
905                 self._f.flush()
906         def show_init(self, pirate, ocean):
907                 print >>self._f, 'Starting up, %s on the %s ocean' % (
908                         pirate, ocean)
909         def caughtup(self):
910                 self._f.write('                   \r')
911                 self._f.flush()
912
913 #----- modes which use the chat log parser are quite complex -----
914
915 def prep_chat_log(args, bu,
916                 progress=ProgressPrintPercentage(),
917                 max_myself_age=3600):
918         if len(args) != 1: bu('this action takes only chat log filename')
919         logfn = args[0]
920         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
921         match = regexp.match(logfn_re, logfn)
922         if not match: bu('chat log filename is not in expected format')
923         (pirate, ocean) = match.groups()
924         fetcher.default_ocean(ocean)
925
926         progress.show_init(pirate, fetcher.ocean)
927         myself = PirateInfo(pirate,max_myself_age)
928         track = ChatLogTracker(myself, logfn)
929
930         opts.debug -= 2
931         track.catchup(progress)
932         opts.debug += 2
933
934         track.force_redisplay()
935
936         return (myself, track)
937
938 def do_track_chat_log(args, bu):
939         (myself, track) = prep_chat_log(args, bu)
940         while True:
941                 track.catchup()
942                 if track.changed():
943                         print track
944                 sleep(1)
945
946 #----- ship management aid -----
947
948 class Display_dumb(ProgressPrintPercentage):
949         def __init__(self):
950                 ProgressPrintPercentage.__init__(self)
951         def show(self, s):
952                 print '\n\n', s;
953         def realstart(self):
954                 pass
955
956 class Display_overwrite(ProgressPrintPercentage):
957         def __init__(self):
958                 ProgressPrintPercentage.__init__(self)
959
960                 null = file('/dev/null','w')
961                 curses.setupterm(fd=null.fileno())
962
963                 self._clear = curses.tigetstr('clear')
964                 if not self._clear:
965                         self._debug('missing clear!')
966                         self.show = Display_dumb.show
967                         return
968
969                 self._t = {'el':'', 'ed':''}
970                 if not self._init_sophisticated():
971                         for k in self._t.keys(): self._t[k] = ''
972                         self._t['ho'] = self._clear
973
974         def _debug(self,m): debug('display overwrite: '+m)
975
976         def _init_sophisticated(self):
977                 for k in self._t.keys():
978                         s = curses.tigetstr(k)
979                         self._t[k] = s
980                 self._t['ho'] = curses.tigetstr('ho')
981                 if not self._t['ho']:
982                         cup = curses.tigetstr('cup')
983                         self._t['ho'] = curses.tparm(cup,0,0)
984                 missing = [k for k in self._t.keys() if not self._t[k]]
985                 if missing:
986                         self.debug('missing '+(' '.join(missing)))
987                         return 0
988                 return 1
989
990         def show(self, s):
991                 w = sys.stdout.write
992                 def wti(k): w(self._t[k])
993
994                 wti('ho')
995                 nl = ''
996                 for l in s.rstrip().split('\n'):
997                         w(nl)
998                         w(l)
999                         wti('el')
1000                         nl = '\r\n'
1001                 wti('ed')
1002                 w(' ')
1003                 sys.stdout.flush()
1004
1005         def realstart(self):
1006                 sys.stdout.write(self._clear)
1007                 sys.stdout.flush()
1008                         
1009
1010 def do_ship_aid(args, bu):
1011         if opts.ship_duty is None: opts.ship_duty = True
1012
1013         displayer = globals()['Display_'+opts.display]()
1014
1015         (myself, track) = prep_chat_log(args, bu, progress=displayer)
1016
1017         displayer.realstart()
1018
1019         if os.isatty(0): kr_create = KeystrokeReader
1020         else: kr_create = DummyKeystrokeReader
1021
1022         try:
1023                 kreader = kr_create(0, 10)
1024                 ship_aid_core(myself, track, displayer, kreader)
1025         finally:
1026                 kreader.stop()
1027                 print '\n'
1028
1029 class KeyBasedSorter:
1030         def compar_key_pa(self, pa):
1031                 return self.compar_key(pa.pirate_info())
1032         def lsort_pa(self, l):
1033                 l.sort(key = self.compar_key_pa)
1034
1035 class NameSorter(KeyBasedSorter):
1036         def compar_key(self, pi): return pi.name
1037         def desc(self): return 'name'
1038
1039 class SkillSorter(NameSorter):
1040         def __init__(self, relevant):
1041                 self._want = frozenset(relevant.split('/'))
1042                 self._avoid = set()
1043                 for p in core_duty_puzzles:
1044                         if isinstance(p,basestring): self._avoid.add(p)
1045                         else: self._avoid |= set(p)
1046                 self._avoid -= self._want
1047                 self._desc = '%s' % relevant
1048         
1049         def desc(self): return self._desc
1050
1051         def compar_key(self, pi):
1052                 best_want = max([
1053                         pi.standings.get(puz,-1)
1054                         for puz in self._want
1055                         ])
1056                 best_avoid = [
1057                         -pi.standings.get(puz,standing_limit)
1058                         for puz in self._avoid
1059                         ]
1060                 best_avoid.sort()
1061                 def negate(x): return -x
1062                 debug('compar_key %s bw=%s ba=%s' % (pi.name, `best_want`,
1063                         `best_avoid`))
1064                 return (-best_want, map(negate, best_avoid), pi.name)
1065
1066 def ship_aid_core(myself, track, displayer, kreader):
1067
1068         def find_vessel():
1069                 vn = track.vesselname()
1070                 if vn: return (vn, " on board the %s" % vn)
1071                 vn = track.lastvesselname()
1072                 if vn: return (vn, " ashore from the %s" % vn)
1073                 return (None, " not on a vessel")
1074
1075         def timeevent(t,e):
1076                 if t is None: return ' ' * 22
1077                 return " %-4s %-16s" % (format_time_interval(now - t),e)
1078
1079         displayer.show(track.myname() + find_vessel()[1] + '...')
1080
1081         rotate_nya = '/-\\'
1082
1083         sort = NameSorter()
1084
1085         while True:
1086                 track.catchup()
1087                 now = time.time()
1088
1089                 (vn, s) = find_vessel()
1090                 s = track.myname() + s
1091                 s += " at %s" % time.strftime("%Y-%m-%d %H:%M:%S")
1092                 s += kreader.info()
1093                 s += '\n'
1094
1095                 aboard = track.aboard(vn)
1096                 sort.lsort_pa(aboard)
1097
1098                 tbl = StandingsTable()
1099                 tbl.headings(' %d aboard' % len(aboard),
1100                                 '  sorted by '+sort.desc())
1101
1102                 for pa in aboard:
1103                         pi = pa.pirate_info()
1104
1105                         xs = ''
1106                         if pa.gunner: xs += 'G '
1107                         else: xs += '  '
1108                         xs += timeevent(pa.last_time, pa.last_event)
1109                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
1110
1111                         if pi is None:
1112                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1113                         else:
1114                                 tbl.pirate(pi, xs)
1115
1116                 s += tbl.results()
1117                 displayer.show(s)
1118
1119                 k = kreader.getch()
1120                 if k is None:
1121                         rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1122                         continue
1123
1124                 if k == 'q': break
1125                 elif k == 'g': sort = SkillSorter('Gunning')
1126                 elif k == 'c': sort = SkillSorter('Carpentry')
1127                 elif k == 's': sort = SkillSorter('Sailing/Rigging')
1128                 elif k == 'b': sort = SkillSorter('Bilging')
1129                 elif k == 'n': sort = SkillSorter('Navigating')
1130                 elif k == 'd': sort = SkillSorter('Battle Navigation')
1131                 elif k == 't': sort = SkillSorter('Treasure Haul')
1132                 elif k == 'a': sort = NameSorter()
1133                 else: pass # unknown key command
1134
1135 #---------- individual keystroke input ----------
1136
1137 class DummyKeystrokeReader:
1138         def __init__(self,fd,timeout_dummy): pass
1139         def stop(self): pass
1140         def getch(self): sleep(1); return None
1141         def info(self): return ' [noninteractive]'
1142
1143 class KeystrokeReader(DummyKeystrokeReader):
1144         def __init__(self, fd, timeout_decisec=0):
1145                 self._fd = fd
1146                 self._saved = termios.tcgetattr(fd)
1147                 a = termios.tcgetattr(fd)
1148                 a[3] &= ~(termios.ECHO | termios.ECHONL |
1149                           termios.ICANON | termios.IEXTEN)
1150                 a[6][termios.VMIN] = 0
1151                 a[6][termios.VTIME] = timeout_decisec
1152                 termios.tcsetattr(fd, termios.TCSANOW, a)
1153         def stop(self):
1154                 termios.tcsetattr(self._fd, termios.TCSANOW, self._saved)
1155         def getch(self):
1156                 debug_flush()
1157                 byte = os.read(self._fd, 1)
1158                 if not len(byte): return None
1159                 return byte
1160         def info(self):
1161                 return ''
1162
1163 #---------- main program ----------
1164
1165 def main():
1166         global opts, fetcher
1167
1168         pa = OptionParser(
1169 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1170 actions:
1171  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1172  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1173  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1174  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1175  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1176
1177 display modes (for --display) apply to ship-aid:
1178  --display=dumb       just print new information, scrolling the screen
1179  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
1180 ''')
1181         ao = pa.add_option
1182         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1183                 help='select ocean OCEAN')
1184         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1185                 default='~/.yoweb-scrape-cache',
1186                 help='cache yoweb pages in DIR')
1187         ao('-D','--debug', action='count', dest='debug', default=0,
1188                 help='enable debugging output')
1189         ao('--debug-fd', type='int', dest='debug_fd',
1190                 help='write any debugging output to specified fd')
1191         ao('-q','--quiet', action='store_true', dest='quiet',
1192                 help='suppress warning output')
1193         ao('--display', action='store', dest='display',
1194                 type='choice', choices=['dumb','overwrite'],
1195                 help='how to display ship aid')
1196
1197         ao('--ship-duty', action='store_true', dest='ship_duty',
1198                 help='show ship duty station puzzles')
1199         ao('--all-puzzles', action='store_false', dest='ship_duty',
1200                 help='show all puzzles, not just ship duty stations')
1201
1202         ao('--min-cache-reuse', type='int', dest='min_max_age',
1203                 metavar='SECONDS', default=60,
1204                 help='always reuse cache yoweb data if no older than this')
1205
1206         (opts,args) = pa.parse_args()
1207         random.seed()
1208
1209         if len(args) < 1:
1210                 pa.error('need a mode argument')
1211
1212         if opts.debug_fd is not None:
1213                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1214         else:
1215                 opts.debug_file = sys.stdout
1216
1217         mode = args[0]
1218         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1219         try: mode_fn = globals()[mode_fn_name]
1220         except KeyError: pa.error('unknown mode "%s"' % mode)
1221
1222         # fixed parameters
1223         opts.expire_age = max(3600, opts.min_max_age)
1224
1225         opts.ship_reboard_clearout = 3600
1226
1227         if opts.cache_dir.startswith('~/'):
1228                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1229
1230         if opts.display is None:
1231                 if ((opts.debug > 0 and opts.debug_fd is None)
1232                     or not os.isatty(sys.stdout.fileno())):
1233                         opts.display = 'dumb'
1234                 else:
1235                         opts.display = 'overwrite'
1236
1237         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1238
1239         mode_fn(args[1:], pa.error)
1240
1241 main()