chiark / gitweb /
Reduce wait times to something more resembling a real pirate
[ypp-sc-tools.web-test.git] / yoweb-scrape
1 #!/usr/bin/python
2 # This is part of ypp-sc-tools, a set of third-party tools for assisting
3 # players of Yohoho Puzzle Pirates.
4 #
5 # Copyright (C) 2009 Ian Jackson <ijackson@chiark.greenend.org.uk>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 # Yohoho and Puzzle Pirates are probably trademarks of Three Rings and
21 # are used without permission.  This program is not endorsed or
22 # sponsored by Three Rings.
23
24 copyright_info = '''
25 yoweb-scrape is part of ypp-sc-tools  Copyright (C) 2009 Ian Jackson
26 This program comes with ABSOLUTELY NO WARRANTY; this is free software,
27 and you are welcome to redistribute it under certain conditions.
28 For details, read the top of the yoweb-scrape file.
29 '''
30
31 #---------- setup ----------
32
33 import signal
34 signal.signal(signal.SIGINT, signal.SIG_DFL)
35
36 import os
37 import time
38 import urllib
39 import urllib2
40 import errno
41 import sys
42 import re as regexp
43 import random
44 import curses
45 import termios
46 import random
47 from optparse import OptionParser
48 from StringIO import StringIO
49
50 from BeautifulSoup import BeautifulSoup
51
52 opts = None
53
54 #---------- YPP parameters and arrays ----------
55
56 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
57         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
58         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
59         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
60
61 core_duty_puzzles = [
62                 'Gunning',
63                 ['Sailing','Rigging'],
64                 'Bilging',
65                 'Carpentry',
66                 ]
67
68 duty_puzzles = ([ 'Navigating', 'Battle Navigation' ] +
69                 core_duty_puzzles +
70                 [ 'Treasure Haul' ])
71
72 standingvals = ('Able/Proficient/Distinguished/Respected/Master'+
73                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
74 standing_limit = len(standingvals)
75
76 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
77
78 max_pirate_namelen = 12
79
80
81 #---------- general utilities ----------
82
83 def debug(m):
84         if opts.debug > 0:
85                 print >>opts.debug_file, m
86
87 def debug_flush():
88         if opts.debug > 0:
89                 opts.debug_file.flush() 
90
91 def sleep(seconds):
92         debug_flush()
93         time.sleep(seconds)
94
95 def format_time_interval(ti):
96         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
97         if ti < 7200: return '%2dm' % (ti / 60)
98         if ti < 86400: return '%dh' % (ti / 3600)
99         return '%dd' % (ti / 86400)
100
101 #---------- caching and rate-limiting data fetcher ----------
102
103 class Fetcher:
104         def __init__(self, ocean, cachedir):
105                 debug('Fetcher init %s' % cachedir)
106                 self.ocean = ocean
107                 self.cachedir = cachedir
108                 try: os.mkdir(cachedir)
109                 except (OSError,IOError), oe:
110                         if oe.errno != errno.EEXIST: raise
111                 self._cache_scan(time.time())
112
113         def default_ocean(self, ocean='ice'):
114                 if self.ocean is None:
115                         self.ocean = ocean
116
117         def _cache_scan(self, now):
118                 # returns list of ages, unsorted
119                 ages = []
120                 debug('Fetcher   scan_cache')
121                 for leaf in os.listdir(self.cachedir):
122                         if not leaf.startswith('#'): continue
123                         path = self.cachedir + '/' + leaf
124                         try: s = os.stat(path)
125                         except (OSError,IOError), oe:
126                                 if oe.errno != errno.ENOENT: raise
127                                 continue
128                         age = now - s.st_mtime
129                         if age > opts.expire_age:
130                                 debug('Fetcher    expire %d %s' % (age, path))
131                                 try: os.remove(path)
132                                 except (OSError,IOError), oe:
133                                         if oe.errno != errno.ENOENT: raise
134                                 continue
135                         ages.append(age)
136                 return ages
137
138         def need_wait(self, now, imaginary=[]):
139                 ages = self._cache_scan(now)
140                 ages += imaginary
141                 ages.sort()
142                 debug('Fetcher   ages ' + `ages`)
143                 min_age = 1
144                 need_wait = 0
145                 for age in ages:
146                         if age < min_age and age <= 5:
147                                 debug('Fetcher   morewait min=%d age=%d' %
148                                         (min_age, age))
149                                 need_wait = max(need_wait, min_age - age)
150                         min_age += 3
151                         min_age *= 1.25
152                 return need_wait
153
154         def _rate_limit_cache_clean(self, now):
155                 need_wait = self.need_wait(now)
156                 if need_wait > 0:
157                         debug('Fetcher   wait %d' % need_wait)
158                         sleep(need_wait)
159
160         def fetch(self, url, max_age):
161                 debug('Fetcher fetch %s' % url)
162                 cache_corename = urllib.quote_plus(url)
163                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
164                 try: f = file(cache_item, 'r')
165                 except (OSError,IOError), oe:
166                         if oe.errno != errno.ENOENT: raise
167                         f = None
168                 now = time.time()
169                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
170                 if f is not None:
171                         s = os.fstat(f.fileno())
172                         age = now - s.st_mtime
173                         if age > max_age:
174                                 debug('Fetcher  stale %d < %d'% (max_age, age))
175                                 f = None
176                 if f is not None:
177                         data = f.read()
178                         f.close()
179                         debug('Fetcher  cached %d > %d' % (max_age, age))
180                         return data
181
182                 debug('Fetcher  fetch')
183                 self._rate_limit_cache_clean(now)
184
185                 stream = urllib2.urlopen(url)
186                 data = stream.read()
187                 cache_tmp = "%s/#%s~%d#" % (
188                         self.cachedir, cache_corename, os.getpid())
189                 f = file(cache_tmp, 'w')
190                 f.write(data)
191                 f.close()
192                 os.rename(cache_tmp, cache_item)
193                 debug('Fetcher  stored')
194                 return data
195
196         def yoweb(self, kind, tail, max_age):
197                 self.default_ocean()
198                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
199                         self.ocean, kind, tail)
200                 return self.fetch(url, max_age)
201
202 #---------- logging assistance for troubled screenscrapers ----------
203
204 class SoupLog:
205         def __init__(self):
206                 self.msgs = [ ]
207         def msg(self, m):
208                 self.msgs.append(m)
209         def soupm(self, obj, m):
210                 self.msg(m + '; in ' + `obj`)
211         def needs_msgs(self, child_souplog):
212                 self.msgs += child_souplog.msgs
213                 child_souplog.msgs = [ ]
214
215 def soup_text(obj):
216         str = ''.join(obj.findAll(text=True))
217         return str.strip()
218
219 class SomethingSoupInfo(SoupLog):
220         def __init__(self, kind, tail, max_age):
221                 SoupLog.__init__(self)
222                 html = fetcher.yoweb(kind, tail, max_age)
223                 self._soup = BeautifulSoup(html,
224                         convertEntities=BeautifulSoup.HTML_ENTITIES
225                         )
226
227 #---------- scraper for pirate pages ----------
228
229 class PirateInfo(SomethingSoupInfo):
230         # Public data members:
231         #  pi.standings = { 'Treasure Haul': 'Able' ... }
232         #  pi.name = name
233         #  pi.crew = (id, name)
234         #  pi.flag = (id, name)
235         #  pi.msgs = [ 'message describing problem with scrape' ]
236                 
237         def __init__(self, pirate, max_age=300):
238                 SomethingSoupInfo.__init__(self,
239                         'pirate.wm?target=', pirate, max_age)
240                 self.name = pirate
241                 self._find_standings()
242                 self.crew = self._find_crewflag('crew',
243                         '^/yoweb/crew/info\\.wm')
244                 self.flag = self._find_crewflag('flag',
245                         '^/yoweb/flag/info\\.wm')
246
247         def _find_standings(self):
248                 imgs = self._soup.findAll('img',
249                         src=regexp.compile('/yoweb/images/stat.*'))
250                 re = regexp.compile(
251 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
252                         )
253                 standings = { }
254
255                 for skill in puzzles:
256                         standings[skill] = [ ]
257
258                 skl = SoupLog()
259
260                 for img in imgs:
261                         try: puzzle = img['alt']
262                         except KeyError: continue
263
264                         if not puzzle in puzzles:
265                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
266                                 continue
267                         key = img.findParent('td')
268                         if key is None:
269                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
270                                 continue
271                         valelem = key.findNextSibling('td')
272                         if valelem is None:
273                                 skl.soupm(key, 'puzzle missing sibling "%s"'
274                                         % puzzle)
275                                 continue
276                         valstr = soup_text(valelem)
277                         match = re.match(valstr)
278                         if match is None:
279                                 skl.soupm(key, ('puzzle "%s" unparseable'+
280                                         ' standing "%s"') % (puzzle, valstr))
281                                 continue
282                         standing = match.group(match.lastindex)
283                         standings[puzzle].append(standing)
284
285                 self.standings = { }
286
287                 for puzzle in puzzles:
288                         sl = standings[puzzle]
289                         if len(sl) > 1:
290                                 skl.msg('puzzle "%s" multiple standings %s' %
291                                                 (puzzle, `sl`))
292                                 continue
293                         if not sl:
294                                 skl.msg('puzzle "%s" no standing found' % puzzle)
295                                 continue
296                         standing = sl[0]
297                         for i in range(0, standing_limit):
298                                 if standing == standingvals[i]:
299                                         self.standings[puzzle] = i
300                         if not puzzle in self.standings:
301                                 skl.msg('puzzle "%s" unknown standing "%s"' %
302                                         (puzzle, standing))
303
304                 all_standings_ok = True
305                 for puzzle in puzzles:
306                         if not puzzle in self.standings:
307                                 self.needs_msgs(skl)
308
309         def _find_crewflag(self, cf, yoweb_re):
310                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
311                 if len(things) != 1:
312                         self.msg('zero or several %s id references found' % cf)
313                         return None
314                 thing = things[0]
315                 id_re = '\\b%sid\\=(\\w+)$' % cf
316                 id_haystack = thing['href']
317                 match = regexp.compile(id_re).search(id_haystack)
318                 if match is None:
319                         self.soupm(thing, ('incomprehensible %s id ref'+
320                                 ' (%s in %s)') % (cf, id_re, id_haystack))
321                         return None
322                 name = soup_text(thing)
323                 return (match.group(1), name)
324
325         def __str__(self):
326                 return `(self.crew, self.flag, self.standings, self.msgs)`
327
328 #---------- scraper for crew pages ----------
329
330 class CrewInfo(SomethingSoupInfo):
331         # Public data members:
332         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
333         #              ('Senior Officer', [...]),
334         #               ... ]
335         #  pi.msgs = [ 'message describing problem with scrape' ]
336
337         def __init__(self, crewid, max_age=300):
338                 SomethingSoupInfo.__init__(self,
339                         'crew/info.wm?crewid=', crewid, max_age)
340                 self._find_crew()
341
342         def _find_crew(self):
343                 self.crew = []
344                 capts = self._soup.findAll('img',
345                         src='/yoweb/images/crew-captain.png')
346                 if len(capts) != 1:
347                         self.msg('crew members: no. of captain images != 1')
348                         return
349                 tbl = capts[0]
350                 while not tbl.find('a', href=pirate_ref_re):
351                         tbl = tbl.findParent('table')
352                         if not tbl:
353                                 self.msg('crew members: cannot find table')
354                                 return
355                 current_rank_crew = None
356                 crew_rank_re = regexp.compile('/yoweb/images/crew')
357                 for row in tbl.contents:
358                         # findAll(recurse=False)
359                         if isinstance(row,basestring):
360                                 continue
361
362                         is_rank = row.find('img', attrs={'src': crew_rank_re})
363                         if is_rank:
364                                 rank = soup_text(row)
365                                 current_rank_crew = []
366                                 self.crew.append((rank, current_rank_crew))
367                                 continue
368                         for cell in row.findAll('a', href=pirate_ref_re):
369                                 if current_rank_crew is None:
370                                         self.soupm(cell, 'crew members: crew'
371                                                 ' before rank')
372                                         continue
373                                 current_rank_crew.append(soup_text(cell))
374
375         def __str__(self):
376                 return `(self.crew, self.msgs)`
377
378 #---------- pretty-printer for tables of pirate puzzle standings ----------
379
380 class StandingsTable:
381         def __init__(self, f, use_puzzles=None, col_width=6, gap_every=5):
382                 if use_puzzles is None:
383                         if opts.ship_duty:
384                                 use_puzzles=duty_puzzles
385                         else:
386                                 use_puzzles=puzzles
387                 self._puzzles = use_puzzles
388                 self.f = f
389                 self._cw = col_width-1
390                 self._gap_every = gap_every
391                 self._linecount = 0
392                 self._o = f.write
393
394         def _nl(self): self._o('\n')
395
396         def _pline(self, pirate, puzstrs, extra):
397                 if (self._linecount > 0
398                     and self._gap_every is not None
399                     and not (self._linecount % self._gap_every)):
400                         self._nl()
401                 self._o(' %-*s' % (max(max_pirate_namelen, 14), pirate))
402                 for v in puzstrs:
403                         self._o(' %-*.*s' % (self._cw,self._cw, v))
404                 if extra:
405                         self._o(' ' + extra)
406                 self._nl()
407                 self._linecount += 1
408
409         def _puzstr(self, pi, puzzle):
410                 if not isinstance(puzzle,list): puzzle = [puzzle]
411                 try: standing = max([pi.standings[p] for p in puzzle])
412                 except KeyError: return '?'
413                 if not standing: return ''
414                 s = ''
415                 if self._cw > 4:
416                         c1 = standingvals[standing][0]
417                         if standing < 3: c1 = c1.lower() # 3 = Master
418                         s += `standing`
419                 if self._cw > 5:
420                         s += ' '
421                 s += '*' * (standing / 2)
422                 s += '+' * (standing % 2)
423                 return s
424
425         def headings(self, lhs='', rhs=None):
426                 def puzn_redact(name):
427                         if isinstance(name,list):
428                                 return '/'.join(
429                                         ["%.*s" % (self._cw/2, puzn_redact(n))
430                                          for n in name])
431                         spc = name.find(' ')
432                         if spc < 0: return name
433                         return name[0:min(4,spc)] + name[spc+1:]
434                 self._linecount = -2
435                 self._pline(lhs, map(puzn_redact, self._puzzles), rhs)
436                 self._linecount = 0
437         def literalline(self, line):
438                 self._o(line)
439                 self._nl()
440                 self._linecount = 0
441         def pirate_dummy(self, name, standingstring, extra=None):
442                 self._pline(name, standingstring * len(self._puzzles), extra)
443         def pirate(self, pi, extra=None):
444                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
445                 self._pline(pi.name, puzstrs, extra)
446
447
448 #---------- chat log parser ----------
449
450 class PirateAboard:
451         # This is essentially a transparent, dumb, data class.
452         #  pa.v
453         #  pa.name
454         #  pa.last_time
455         #  pa.last_event
456         #  pa.gunner
457         #  pa.last_chat_time
458         #  pa.last_chat_chan
459         #  pa.pi
460
461         def __init__(pa, pn, v, time, event):
462                 pa.name = pn
463                 pa.v = v
464                 pa.last_time = time
465                 pa.last_event = event
466                 pa.last_chat_time = None
467                 pa.last_chat_chan = None
468                 pa.gunner = False
469                 pa.pi = None
470
471         def pirate_info(pa):
472                 now = time.time()
473                 if pa.pi:
474                         age = now - pa.pi_fetched
475                         guide = random.randint(120,240)
476                         if age <= guide:
477                                 return pa.pi
478                         debug('PirateAboard refresh %d > %d  %s' % (
479                                 age, guide, pa.name))
480                         imaginary = [2,4]
481                 else:
482                         imaginary = [1]
483                 wait = fetcher.need_wait(now, imaginary)
484                 if wait:
485                         debug('PirateAboard fetcher not ready %d' % wait)
486                         return pa.pi
487                 pa.pi = PirateInfo(pa.name, 600)
488                 pa.pi_fetched = now
489                 return pa.pi
490
491 class ChatLogTracker:
492         # This is quite complex so we make it opaque.  Use the
493         # official invokers, accessors etc.
494
495         def __init__(self, myself_pi, logfn):
496                 self._pl = {}   # self._pl['Pirate'] =
497                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
498                                 # self._vl['Vessel']['#lastinfo']
499                                 # self._vl['Vessel']['#name']
500                                 # self._v = self._vl[self._vessel]
501                 self._date = None
502                 self._myself = myself_pi
503                 self._f = file(logfn)
504                 self._lbuf = ''
505                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
506                 self._disembark_myself()
507                 self._need_redisplay = False
508                 self._lastvessel = None
509
510         def _disembark_myself(self):
511                 self._v = None
512                 self._vessel = None
513                 self.force_redisplay()
514
515         def force_redisplay(self):
516                 self._need_redisplay = True
517
518         def _vessel_updated(self, v, timestamp):
519                 v['#lastinfo'] = timestamp
520                 self.force_redisplay()
521
522         def _onboard_event(self,v,timestamp,pirate,event):
523                 pa = self._pl.get(pirate, None)
524                 if pa is not None and pa.v is v:
525                         pa.last_time = timestamp
526                         pa.last_event = event
527                 else:
528                         if pa is not None: del pa.v[pirate]
529                         pa = PirateAboard(pirate, v, timestamp, event)
530                         self._pl[pirate] = pa
531                         v[pirate] = pa
532                 self._vessel_updated(v, timestamp)
533                 return pa
534
535         def _trash_vessel(self, v):
536                 for pn in v:
537                         if pn.startswith('#'): continue
538                         del self._pl[pn]
539                 vn = v['#name']
540                 del self._vl[vn]
541                 if v is self._v: self._disembark_myself()
542                 self.force_redisplay()
543
544         def _vessel_stale(self, v, timestamp):
545                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
546
547         def _vessel_check_expire(self, v, timestamp):
548                 if not self._vessel_stale(v, timestamp):
549                         return v
550                 self._debug_line_disposition(timestamp,'',
551                         'stale-reset ' + v['#name'])
552                 self._trash_vessel(v)
553                 return None
554
555         def expire_garbage(self, timestamp):
556                 for v in self._vl.values():
557                         self._vessel_check_expire(v, timestamp)
558
559         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
560                 v = self._vl.get(vn, None)
561                 if v is not None:
562                         v = self._vessel_check_expire(v, timestamp)
563                 if v is not None:
564                         dml.append('found')
565                         return v
566                 if not create:
567                         dml.append('no')
568                 dml.append('new')
569                 self._vl[vn] = v = { '#name': vn }
570                 self._vessel_updated(v, timestamp)
571                 return v
572
573         def _find_matching_vessel(self, pattern, timestamp, cmdr,
574                                         dml=[], create=False):
575                 # use when a commander pirate `cmdr' specified a vessel
576                 #  by name `pattern' (either may be None)
577                 # if create is true, will create the vessel
578                 #  record if an exact name is specified
579
580                 if (pattern is not None and
581                     not '*' in pattern
582                     and len(pattern.split(' ')) == 2):
583                         vn = pattern.title()
584                         dml.append('exact')
585                         return self._vessel_lookup(
586                                 vn, timestamp, dml=dml, create=create)
587
588                 if pattern is None:
589                         pattern_check = lambda vn: True
590                 else:
591                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.+')
592                         pattern_check = regexp.compile(re, regexp.I).match
593
594                 tries = []
595
596                 cmdr_pa = self._pl.get(cmdr, None)
597                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
598
599                 tries.append((self._v, 'here'))
600                 tried_vns = []
601
602                 for (v, dm) in tries:
603                         if v is None: dml.append(dm+'?'); continue
604                         
605                         vn = v['#name']
606                         if not pattern_check(vn):
607                                 tried_vns.append(vn)
608                                 dml.append(dm+'#')
609                                 continue
610
611                         dml.append(dm+'!')
612                         return v
613
614                 if pattern is not None and '*' in pattern:
615                         search = [
616                                 (vn,v)
617                                 for (vn,v) in self._vl.iteritems()
618                                 if not self._vessel_stale(v, timestamp)
619                                 if pattern_check(vn)
620                                 ]
621                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
622                         #       re,
623                         #       '/'.join(tried_vns),
624                         #       '/'.join([vn for (vn,v) in search])))
625
626                         if len(search)==1:
627                                 dml.append('one')
628                                 return search[0][1]
629                         elif search:
630                                 dml.append('many')
631                         else:
632                                 dml.append('none')
633
634         def _debug_line_disposition(self,timestamp,l,m):
635                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
636
637         def chatline(self,l):
638                 rm = lambda re: regexp.match(re,l)
639                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
640                 timestamp = None
641
642                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
643                 if m:
644                         self._date = [int(x) for x in m.groups()]
645                         self._previous_timestamp = None
646                         return d('date '+`self._date`)
647
648                 if self._date is None:
649                         return d('date unset')
650
651                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
652                 if not m:
653                         return d('no timestamp')
654
655                 while True:
656                         time_tuple = (self._date +
657                                       [int(x) for x in m.groups()] +
658                                       [-1,-1,-1])
659                         timestamp = time.mktime(time_tuple)
660                         if timestamp >= self._previous_timestamp: break
661                         self._date[2] += 1
662                         self._debug_line_disposition(timestamp,'',
663                                 'new date '+`self._date`)
664
665                 self._previous_timestamp = timestamp
666
667                 l = l[l.find(' ')+1:]
668
669                 def ob_x(pirate,event):
670                         return self._onboard_event(
671                                         self._v, timestamp, pirate, event)
672                 def ob1(did): ob_x(m.group(1), did); return d(did)
673                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
674
675                 def disembark(v, timestamp, pirate, event):
676                         self._onboard_event(
677                                         v, timestamp, pirate, 'leaving '+event)
678                         del v[pirate]
679                         del self._pl[pirate]
680
681                 def disembark_me(why):
682                         self._disembark_myself()
683                         return d('disembark-me '+why)
684
685                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
686                 if m:
687                         dm = ['boarding']
688                         pn = self._myself.name
689                         vn = m.group(1)
690                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
691                         self._lastvessel = self._vessel = vn
692                         self._v = v
693                         ob_x(pn, 'we boarded')
694                         self.expire_garbage(timestamp)
695                         return d(' '.join(dm))
696
697                 if self._v is None:
698                         return d('no vessel')
699
700                 m = rm('(\\w+) has come aboard\\.$')
701                 if m: return ob1('boarded');
702
703                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
704                 if m:
705                         (who,what) = m.groups()
706                         pa = ob_x(who,'ord '+what)
707                         if what == 'Gunning':
708                                 pa.gunner = True
709                         return d('duty order')
710
711                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
712                 if m: oba('stopped'); return d("end")
713
714                 def chat_core(speaker, chan):
715                         try: pa = self._pl[speaker]
716                         except KeyError: return 'mystery'
717                         if pa.v is not self._v: return 'elsewhere'
718                         pa.last_chat_time = timestamp
719                         pa.last_chat_chan = chan
720                         self.force_redisplay()
721                         return 'here'
722
723                 def chat(chan):
724                         speaker = m.group(1)
725                         dm = chat_core(speaker, chan)
726                         return d('chat %s %s' % (chan, dm))
727
728                 def chat_metacmd(chan):
729                         (cmdr, metacmd) = m.groups()
730                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
731                         m2 = regexp.match(
732                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
733                                 metacmd)
734                         if not m2: return chat(chan)
735
736                         (cmd, pattern, targets) = m2.groups()
737                         dml = ['cmd', chan, cmd]
738
739                         if cmd == 'a': each = self._onboard_event
740                         else: each = disembark
741
742                         if cmdr == self._myself.name:
743                                 dml.append('self')
744                                 how = 'cmd: %s' % cmd
745                         else:
746                                 dml.append('other')
747                                 how = 'cmd: %s %s' % (cmd,cmdr)
748
749                         v = self._find_matching_vessel(
750                                 pattern, timestamp, cmdr, dml, create=True)
751
752                         if v is not None:
753                                 targets = targets.strip().split(' ')
754                                 dml.append(`len(targets)`)
755                                 for target in targets:
756                                         each(v, timestamp, target.title(), how)
757                                 self._vessel_updated(v, timestamp)
758
759                         dm = ' '.join(dml)
760                         chat_core(cmdr, 'cmd '+chan)
761                         return d(dm)
762
763                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
764                 if m: return ob1('general order');
765
766                 m = rm('(\\w+) says, "')
767                 if m: return chat('public')
768
769                 m = rm('(\\w+) tells ye, "')
770                 if m: return chat('private')
771
772                 m = rm('Ye told (\\w+), "(.*)"$')
773                 if m: return chat_metacmd('private')
774
775                 m = rm('(\\w+) flag officer chats, "')
776                 if m: return chat('flag officer')
777
778                 m = rm('(\\w+) officer chats, "(.*)"$')
779                 if m: return chat_metacmd('officer')
780
781                 m = rm('Ye accepted the offer to job with ')
782                 if m: return disembark_me('jobbing')
783
784                 m = rm('Ye hop on the ferry and are whisked away ')
785                 if m: return disembark_me('ferry')
786
787                 m = rm('Whisking away to yer home on the magical winds')
788                 if m: return disembark_me('home')
789
790                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
791                 if m:
792                         pl = m.group(1).split(', ')
793                         if not self._myself.name in pl:
794                                 return d('lost melee')
795                         for pn in pl:
796                                 if ' ' in pn: continue
797                                 ob_x(pn,'won melee')
798                         return d('won melee')
799
800                 m = rm('(\\w+) is eliminated\\!')
801                 if m: return ob1('eliminated in fray');
802
803                 m = rm('(\\w+) has driven \w+ from the ship\\!')
804                 if m: return ob1('boarder repelled');
805
806                 m = rm('\w+ has bested (\\w+), and turns'+
807                         ' to the rest of the ship\\.')
808                 if m: return ob1('boarder unrepelled');
809
810                 m = rm('(\\w+) has left the vessel\.')
811                 if m:
812                         pirate = m.group(1)
813                         disembark(self._v, timestamp, pirate, 'disembarked')
814                         return d('disembarked')
815
816                 return d('not-matched')
817
818         def _str_vessel(self, vn, v):
819                 s = ' vessel %s\n' % vn
820                 s += ' '*20 + "%-*s   %13s\n" % (
821                                 max_pirate_namelen, '#lastinfo',
822                                 v['#lastinfo'])
823                 assert v['#name'] == vn
824                 for pn in sorted(v.keys()):
825                         if pn.startswith('#'): continue
826                         pa = v[pn]
827                         assert pa.v == v
828                         assert self._pl[pn] == pa
829                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
830                                 (' ','G')[pa.gunner],
831                                 max_pirate_namelen, pn,
832                                 pa.last_time, pa.last_event,
833                                 pa.last_chat_time, pa.last_chat_chan)
834                 return s
835
836         def __str__(self):
837                 s = '''<ChatLogTracker
838  myself %s
839  vessel %s
840 '''                     % (self._myself.name, self._vessel)
841                 assert ((self._v is None and self._vessel is None) or
842                         (self._v is self._vl[self._vessel]))
843                 if self._vessel is not None:
844                         s += self._str_vessel(self._vessel, self._v)
845                 for vn in sorted(self._vl.keys()):
846                         if vn == self._vessel: continue
847                         s += self._str_vessel(vn, self._vl[vn])
848                 for p in self._pl:
849                         pa = self._pl[p]
850                         assert pa.v[p] is pa
851                         assert pa.v in self._vl.values()
852                 s += '>\n'
853                 return s
854
855         def catchup(self, progress=None):
856                 while True:
857                         more = self._f.readline()
858                         if not more: break
859
860                         self._progress[0] += len(more)
861                         if progress: progress.progress(*self._progress)
862
863                         self._lbuf += more
864                         if self._lbuf.endswith('\n'):
865                                 self.chatline(self._lbuf.rstrip())
866                                 self._lbuf = ''
867                                 if opts.debug >= 2:
868                                         debug(self.__str__())
869                 if progress: progress.caughtup()
870
871         def changed(self):
872                 rv = self._need_redisplay
873                 self._need_redisplay = False
874                 return rv
875         def myname(self):
876                 # returns our pirate name
877                 return self._myself.name
878         def vesselname(self):
879                 # returns the vessel name we're aboard or None
880                 return self._vessel
881         def lastvesselname(self):
882                 # returns the last vessel name we were aboard or None
883                 return self._lastvessel
884         def aboard(self, vesselname=True):
885                 # returns a list of PirateAboard the vessel
886                 #  sorted by pirate name
887                 #  you can pass this None and you'll get []
888                 #  or True for the current vessel (which is the default)
889                 #  the returned value is a fresh list of persistent
890                 #  PirateAboard objects
891                 if vesselname is True: v = self._v
892                 else: v = self._vl.get(vesselname.title())
893                 if v is None: return []
894                 return [ v[pn]
895                          for pn in sorted(v.keys())
896                          if not pn.startswith('#') ]
897
898 #---------- implementations of actual operation modes ----------
899
900 def do_pirate(pirates, bu):
901         print '{'
902         for pirate in pirates:
903                 info = PirateInfo(pirate)
904                 print '%s: %s,' % (`pirate`, info)
905         print '}'
906
907 def prep_crew_of(args, bu, max_age=300):
908         if len(args) != 1: bu('crew-of takes one pirate name')
909         pi = PirateInfo(args[0], max_age)
910         if pi.crew is None: return None
911         return CrewInfo(pi.crew[0], max_age)
912
913 def do_crew_of(args, bu):
914         ci = prep_crew_of(args, bu)
915         print ci
916
917 def do_standings_crew_of(args, bu):
918         ci = prep_crew_of(args, bu, 60)
919         tab = StandingsTable(sys.stdout)
920         tab.headings()
921         for (rank, members) in ci.crew:
922                 if not members: continue
923                 tab.literalline('')
924                 tab.literalline('%s:' % rank)
925                 for p in members:
926                         pi = PirateInfo(p, random.randint(900,1800))
927                         tab.pirate(pi)
928
929 class ProgressPrintPercentage:
930         def __init__(self, f=sys.stdout):
931                 self._f = f
932         def progress_string(self,done,total):
933                 return "scan chat logs %3d%%\r" % ((done*100) / total)
934         def progress(self,*a):
935                 self._f.write(self.progress_string(*a))
936                 self._f.flush()
937         def show_init(self, pirate, ocean):
938                 print >>self._f, 'Starting up, %s on the %s ocean' % (
939                         pirate, ocean)
940         def caughtup(self):
941                 self._f.write('                   \r')
942                 self._f.flush()
943
944 #----- modes which use the chat log parser are quite complex -----
945
946 def prep_chat_log(args, bu,
947                 progress=ProgressPrintPercentage(),
948                 max_myself_age=3600):
949         if len(args) != 1: bu('this action takes only chat log filename')
950         logfn = args[0]
951         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
952         match = regexp.match(logfn_re, logfn)
953         if not match: bu('chat log filename is not in expected format')
954         (pirate, ocean) = match.groups()
955         fetcher.default_ocean(ocean)
956
957         progress.show_init(pirate, fetcher.ocean)
958         myself = PirateInfo(pirate,max_myself_age)
959         track = ChatLogTracker(myself, logfn)
960
961         opts.debug -= 2
962         track.catchup(progress)
963         opts.debug += 2
964
965         track.force_redisplay()
966
967         return (myself, track)
968
969 def do_track_chat_log(args, bu):
970         (myself, track) = prep_chat_log(args, bu)
971         while True:
972                 track.catchup()
973                 if track.changed():
974                         print track
975                 sleep(1)
976
977 #----- ship management aid -----
978
979 class Display_dumb(ProgressPrintPercentage):
980         def __init__(self):
981                 ProgressPrintPercentage.__init__(self)
982         def show(self, s):
983                 print '\n\n', s;
984         def realstart(self):
985                 pass
986
987 class Display_overwrite(ProgressPrintPercentage):
988         def __init__(self):
989                 ProgressPrintPercentage.__init__(self)
990
991                 null = file('/dev/null','w')
992                 curses.setupterm(fd=null.fileno())
993
994                 self._clear = curses.tigetstr('clear')
995                 if not self._clear:
996                         self._debug('missing clear!')
997                         self.show = Display_dumb.show
998                         return
999
1000                 self._t = {'el':'', 'ed':''}
1001                 if not self._init_sophisticated():
1002                         for k in self._t.keys(): self._t[k] = ''
1003                         self._t['ho'] = self._clear
1004
1005         def _debug(self,m): debug('display overwrite: '+m)
1006
1007         def _init_sophisticated(self):
1008                 for k in self._t.keys():
1009                         s = curses.tigetstr(k)
1010                         self._t[k] = s
1011                 self._t['ho'] = curses.tigetstr('ho')
1012                 if not self._t['ho']:
1013                         cup = curses.tigetstr('cup')
1014                         self._t['ho'] = curses.tparm(cup,0,0)
1015                 missing = [k for k in self._t.keys() if not self._t[k]]
1016                 if missing:
1017                         self.debug('missing '+(' '.join(missing)))
1018                         return 0
1019                 return 1
1020
1021         def show(self, s):
1022                 w = sys.stdout.write
1023                 def wti(k): w(self._t[k])
1024
1025                 wti('ho')
1026                 nl = ''
1027                 for l in s.rstrip().split('\n'):
1028                         w(nl)
1029                         w(l)
1030                         wti('el')
1031                         nl = '\r\n'
1032                 wti('ed')
1033                 w(' ')
1034                 sys.stdout.flush()
1035
1036         def realstart(self):
1037                 sys.stdout.write(self._clear)
1038                 sys.stdout.flush()
1039                         
1040
1041 def do_ship_aid(args, bu):
1042         if opts.ship_duty is None: opts.ship_duty = True
1043
1044         displayer = globals()['Display_'+opts.display]()
1045
1046         (myself, track) = prep_chat_log(args, bu, progress=displayer)
1047
1048         displayer.realstart()
1049
1050         if os.isatty(0): kr_create = KeystrokeReader
1051         else: kr_create = DummyKeystrokeReader
1052
1053         try:
1054                 kreader = kr_create(0, 10)
1055                 ship_aid_core(myself, track, displayer, kreader)
1056         finally:
1057                 kreader.stop()
1058                 print '\n'
1059
1060 class KeyBasedSorter:
1061         def compar_key_pa(self, pa):
1062                 pi = pa.pirate_info()
1063                 if pi is None: return None
1064                 return self.compar_key(pi)
1065         def lsort_pa(self, l):
1066                 l.sort(key = self.compar_key_pa)
1067
1068 class NameSorter(KeyBasedSorter):
1069         def compar_key(self, pi): return pi.name
1070         def desc(self): return 'name'
1071
1072 class SkillSorter(NameSorter):
1073         def __init__(self, relevant):
1074                 self._want = frozenset(relevant.split('/'))
1075                 self._avoid = set()
1076                 for p in core_duty_puzzles:
1077                         if isinstance(p,basestring): self._avoid.add(p)
1078                         else: self._avoid |= set(p)
1079                 self._avoid -= self._want
1080                 self._desc = '%s' % relevant
1081         
1082         def desc(self): return self._desc
1083
1084         def compar_key(self, pi):
1085                 best_want = max([
1086                         pi.standings.get(puz,-1)
1087                         for puz in self._want
1088                         ])
1089                 best_avoid = [
1090                         -pi.standings.get(puz,standing_limit)
1091                         for puz in self._avoid
1092                         ]
1093                 best_avoid.sort()
1094                 def negate(x): return -x
1095                 debug('compar_key %s bw=%s ba=%s' % (pi.name, `best_want`,
1096                         `best_avoid`))
1097                 return (-best_want, map(negate, best_avoid), pi.name)
1098
1099 def ship_aid_core(myself, track, displayer, kreader):
1100
1101         def find_vessel():
1102                 vn = track.vesselname()
1103                 if vn: return (vn, " on board the %s" % vn)
1104                 vn = track.lastvesselname()
1105                 if vn: return (vn, " ashore from the %s" % vn)
1106                 return (None, " not on a vessel")
1107
1108         def timeevent(t,e):
1109                 if t is None: return ' ' * 22
1110                 return " %-4s %-16s" % (format_time_interval(now - t),e)
1111
1112         displayer.show(track.myname() + find_vessel()[1] + '...')
1113
1114         rotate_nya = '/-\\'
1115
1116         sort = NameSorter()
1117
1118         while True:
1119                 track.catchup()
1120                 now = time.time()
1121
1122                 (vn, s) = find_vessel()
1123                 s = track.myname() + s
1124                 s += " at %s" % time.strftime("%Y-%m-%d %H:%M:%S")
1125                 s += kreader.info()
1126                 s += '\n'
1127
1128                 aboard = track.aboard(vn)
1129                 sort.lsort_pa(aboard)
1130
1131                 tbl_s = StringIO()
1132                 tbl = StandingsTable(tbl_s)
1133
1134                 if track.vesselname(): howmany = ' %d aboard' % len(aboard)
1135                 else: howmany = ''
1136
1137                 tbl.headings(howmany, '  sorted by '+sort.desc())
1138
1139                 for pa in aboard:
1140                         pi = pa.pirate_info()
1141
1142                         xs = ''
1143                         if pa.gunner: xs += 'G '
1144                         else: xs += '  '
1145                         xs += timeevent(pa.last_time, pa.last_event)
1146                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
1147
1148                         if pi is None:
1149                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1150                         else:
1151                                 tbl.pirate(pi, xs)
1152
1153                 s += tbl_s.getvalue()
1154                 displayer.show(s)
1155                 tbl_s.close()
1156
1157                 k = kreader.getch()
1158                 if k is None:
1159                         rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1160                         continue
1161
1162                 if k == 'q': break
1163                 elif k == 'g': sort = SkillSorter('Gunning')
1164                 elif k == 'c': sort = SkillSorter('Carpentry')
1165                 elif k == 's': sort = SkillSorter('Sailing/Rigging')
1166                 elif k == 'b': sort = SkillSorter('Bilging')
1167                 elif k == 'n': sort = SkillSorter('Navigating')
1168                 elif k == 'd': sort = SkillSorter('Battle Navigation')
1169                 elif k == 't': sort = SkillSorter('Treasure Haul')
1170                 elif k == 'a': sort = NameSorter()
1171                 else: pass # unknown key command
1172
1173 #---------- individual keystroke input ----------
1174
1175 class DummyKeystrokeReader:
1176         def __init__(self,fd,timeout_dummy): pass
1177         def stop(self): pass
1178         def getch(self): sleep(1); return None
1179         def info(self): return ' [noninteractive]'
1180
1181 class KeystrokeReader(DummyKeystrokeReader):
1182         def __init__(self, fd, timeout_decisec=0):
1183                 self._fd = fd
1184                 self._saved = termios.tcgetattr(fd)
1185                 a = termios.tcgetattr(fd)
1186                 a[3] &= ~(termios.ECHO | termios.ECHONL |
1187                           termios.ICANON | termios.IEXTEN)
1188                 a[6][termios.VMIN] = 0
1189                 a[6][termios.VTIME] = timeout_decisec
1190                 termios.tcsetattr(fd, termios.TCSANOW, a)
1191         def stop(self):
1192                 termios.tcsetattr(self._fd, termios.TCSANOW, self._saved)
1193         def getch(self):
1194                 debug_flush()
1195                 byte = os.read(self._fd, 1)
1196                 if not len(byte): return None
1197                 return byte
1198         def info(self):
1199                 return ''
1200
1201 #---------- main program ----------
1202
1203 def main():
1204         global opts, fetcher
1205
1206         pa = OptionParser(
1207 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1208 actions:
1209  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1210  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1211  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1212  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1213  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1214
1215 display modes (for --display) apply to ship-aid:
1216  --display=dumb       just print new information, scrolling the screen
1217  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top''')
1218         ao = pa.add_option
1219         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1220                 help='select ocean OCEAN')
1221         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1222                 default='~/.yoweb-scrape-cache',
1223                 help='cache yoweb pages in DIR')
1224         ao('-D','--debug', action='count', dest='debug', default=0,
1225                 help='enable debugging output')
1226         ao('--debug-fd', type='int', dest='debug_fd',
1227                 help='write any debugging output to specified fd')
1228         ao('-q','--quiet', action='store_true', dest='quiet',
1229                 help='suppress warning output')
1230         ao('--display', action='store', dest='display',
1231                 type='choice', choices=['dumb','overwrite'],
1232                 help='how to display ship aid')
1233
1234         ao('--ship-duty', action='store_true', dest='ship_duty',
1235                 help='show ship duty station puzzles')
1236         ao('--all-puzzles', action='store_false', dest='ship_duty',
1237                 help='show all puzzles, not just ship duty stations')
1238
1239         ao('--min-cache-reuse', type='int', dest='min_max_age',
1240                 metavar='SECONDS', default=60,
1241                 help='always reuse cache yoweb data if no older than this')
1242
1243         (opts,args) = pa.parse_args()
1244         random.seed()
1245
1246         if len(args) < 1:
1247                 print >>sys.stderr, copyright_info
1248                 pa.error('need a mode argument')
1249
1250         if opts.debug_fd is not None:
1251                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1252         else:
1253                 opts.debug_file = sys.stdout
1254
1255         mode = args[0]
1256         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1257         try: mode_fn = globals()[mode_fn_name]
1258         except KeyError: pa.error('unknown mode "%s"' % mode)
1259
1260         # fixed parameters
1261         opts.expire_age = max(3600, opts.min_max_age)
1262
1263         opts.ship_reboard_clearout = 3600
1264
1265         if opts.cache_dir.startswith('~/'):
1266                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1267
1268         if opts.display is None:
1269                 if ((opts.debug > 0 and opts.debug_fd is None)
1270                     or not os.isatty(sys.stdout.fileno())):
1271                         opts.display = 'dumb'
1272                 else:
1273                         opts.display = 'overwrite'
1274
1275         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1276
1277         mode_fn(args[1:], pa.error)
1278
1279 main()