chiark / gitweb /
10cc734e2df672057d13952e63d00c721230b2e1
[ypp-sc-tools.web-live.git] / yoweb-scrape
1 #!/usr/bin/python
2 # This is part of ypp-sc-tools, a set of third-party tools for assisting
3 # players of Yohoho Puzzle Pirates.
4 #
5 # Copyright (C) 2009 Ian Jackson <ijackson@chiark.greenend.org.uk>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 # Yohoho and Puzzle Pirates are probably trademarks of Three Rings and
21 # are used without permission.  This program is not endorsed or
22 # sponsored by Three Rings.
23
24 copyright_info = '''
25 yoweb-scrape is part of ypp-sc-tools  Copyright (C) 2009 Ian Jackson
26 This program comes with ABSOLUTELY NO WARRANTY; this is free software,
27 and you are welcome to redistribute it under certain conditions.
28 For details, read the top of the yoweb-scrape file.
29 '''
30
31 #---------- setup ----------
32
33 import signal
34 signal.signal(signal.SIGINT, signal.SIG_DFL)
35
36 import os
37 import time
38 import urllib
39 import urllib2
40 import errno
41 import sys
42 import re as regexp
43 import random
44 import curses
45 import termios
46 import random
47 from optparse import OptionParser
48 from StringIO import StringIO
49
50 from BeautifulSoup import BeautifulSoup
51
52 opts = None
53
54 #---------- YPP parameters and arrays ----------
55
56 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
57         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
58         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
59         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
60
61 core_duty_puzzles = [
62                 'Gunning',
63                 ['Sailing','Rigging'],
64                 'Bilging',
65                 'Carpentry',
66                 ]
67
68 duty_puzzles = ([ 'Navigating', 'Battle Navigation' ] +
69                 core_duty_puzzles +
70                 [ 'Treasure Haul' ])
71
72 standingvals = ('Able/Proficient/Distinguished/Respected/Master'+
73                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
74 standing_limit = len(standingvals)
75
76 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
77
78 max_pirate_namelen = 12
79
80
81 #---------- general utilities ----------
82
83 def debug(m):
84         if opts.debug > 0:
85                 print >>opts.debug_file, m
86
87 def debug_flush():
88         if opts.debug > 0:
89                 opts.debug_file.flush() 
90
91 def sleep(seconds):
92         debug_flush()
93         time.sleep(seconds)
94
95 def format_time_interval(ti):
96         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
97         if ti < 7200: return '%2dm' % (ti / 60)
98         if ti < 86400: return '%dh' % (ti / 3600)
99         return '%dd' % (ti / 86400)
100
101 #---------- caching and rate-limiting data fetcher ----------
102
103 class Fetcher:
104         def __init__(self, ocean, cachedir):
105                 debug('Fetcher init %s' % cachedir)
106                 self.ocean = ocean
107                 self.cachedir = cachedir
108                 try: os.mkdir(cachedir)
109                 except (OSError,IOError), oe:
110                         if oe.errno != errno.EEXIST: raise
111                 self._cache_scan(time.time())
112
113         def default_ocean(self, ocean='ice'):
114                 if self.ocean is None:
115                         self.ocean = ocean
116
117         def _cache_scan(self, now):
118                 # returns list of ages, unsorted
119                 ages = []
120                 debug('Fetcher   scan_cache')
121                 for leaf in os.listdir(self.cachedir):
122                         if not leaf.startswith('#'): continue
123                         path = self.cachedir + '/' + leaf
124                         try: s = os.stat(path)
125                         except (OSError,IOError), oe:
126                                 if oe.errno != errno.ENOENT: raise
127                                 continue
128                         age = now - s.st_mtime
129                         if age > opts.expire_age:
130                                 debug('Fetcher    expire %d %s' % (age, path))
131                                 try: os.remove(path)
132                                 except (OSError,IOError), oe:
133                                         if oe.errno != errno.ENOENT: raise
134                                 continue
135                         ages.append(age)
136                 return ages
137
138         def need_wait(self, now, imaginary=[]):
139                 ages = self._cache_scan(now)
140                 ages += imaginary
141                 ages.sort()
142                 debug('Fetcher   ages ' + `ages`)
143                 min_age = 1
144                 need_wait = 0
145                 for age in ages:
146                         if age < min_age and age <= 5:
147                                 debug('Fetcher   morewait min=%d age=%d' %
148                                         (min_age, age))
149                                 need_wait = max(need_wait, min_age - age)
150                         min_age += 3
151                         min_age *= 1.25
152                 if need_wait > 0:
153                         need_wait += random.random() - 0.5
154                 return need_wait
155
156         def _rate_limit_cache_clean(self, now):
157                 need_wait = self.need_wait(now)
158                 if need_wait > 0:
159                         debug('Fetcher   wait %d' % need_wait)
160                         sleep(need_wait)
161
162         def fetch(self, url, max_age):
163                 debug('Fetcher fetch %s' % url)
164                 cache_corename = urllib.quote_plus(url)
165                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
166                 try: f = file(cache_item, 'r')
167                 except (OSError,IOError), oe:
168                         if oe.errno != errno.ENOENT: raise
169                         f = None
170                 now = time.time()
171                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
172                 if f is not None:
173                         s = os.fstat(f.fileno())
174                         age = now - s.st_mtime
175                         if age > max_age:
176                                 debug('Fetcher  stale %d < %d'% (max_age, age))
177                                 f = None
178                 if f is not None:
179                         data = f.read()
180                         f.close()
181                         debug('Fetcher  cached %d > %d' % (max_age, age))
182                         return data
183
184                 debug('Fetcher  fetch')
185                 self._rate_limit_cache_clean(now)
186
187                 stream = urllib2.urlopen(url)
188                 data = stream.read()
189                 cache_tmp = "%s/#%s~%d#" % (
190                         self.cachedir, cache_corename, os.getpid())
191                 f = file(cache_tmp, 'w')
192                 f.write(data)
193                 f.close()
194                 os.rename(cache_tmp, cache_item)
195                 debug('Fetcher  stored')
196                 return data
197
198         def yoweb(self, kind, tail, max_age):
199                 self.default_ocean()
200                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
201                         self.ocean, kind, tail)
202                 return self.fetch(url, max_age)
203
204 #---------- logging assistance for troubled screenscrapers ----------
205
206 class SoupLog:
207         def __init__(self):
208                 self.msgs = [ ]
209         def msg(self, m):
210                 self.msgs.append(m)
211         def soupm(self, obj, m):
212                 self.msg(m + '; in ' + `obj`)
213         def needs_msgs(self, child_souplog):
214                 self.msgs += child_souplog.msgs
215                 child_souplog.msgs = [ ]
216
217 def soup_text(obj):
218         str = ''.join(obj.findAll(text=True))
219         return str.strip()
220
221 class SomethingSoupInfo(SoupLog):
222         def __init__(self, kind, tail, max_age):
223                 SoupLog.__init__(self)
224                 html = fetcher.yoweb(kind, tail, max_age)
225                 self._soup = BeautifulSoup(html,
226                         convertEntities=BeautifulSoup.HTML_ENTITIES
227                         )
228
229 #---------- scraper for pirate pages ----------
230
231 class PirateInfo(SomethingSoupInfo):
232         # Public data members:
233         #  pi.standings = { 'Treasure Haul': 'Able' ... }
234         #  pi.name = name
235         #  pi.crew = (id, name)
236         #  pi.flag = (id, name)
237         #  pi.msgs = [ 'message describing problem with scrape' ]
238                 
239         def __init__(self, pirate, max_age=300):
240                 SomethingSoupInfo.__init__(self,
241                         'pirate.wm?target=', pirate, max_age)
242                 self.name = pirate
243                 self._find_standings()
244                 self.crew = self._find_crewflag('crew',
245                         '^/yoweb/crew/info\\.wm')
246                 self.flag = self._find_crewflag('flag',
247                         '^/yoweb/flag/info\\.wm')
248
249         def _find_standings(self):
250                 imgs = self._soup.findAll('img',
251                         src=regexp.compile('/yoweb/images/stat.*'))
252                 re = regexp.compile(
253 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
254                         )
255                 standings = { }
256
257                 for skill in puzzles:
258                         standings[skill] = [ ]
259
260                 skl = SoupLog()
261
262                 for img in imgs:
263                         try: puzzle = img['alt']
264                         except KeyError: continue
265
266                         if not puzzle in puzzles:
267                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
268                                 continue
269                         key = img.findParent('td')
270                         if key is None:
271                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
272                                 continue
273                         valelem = key.findNextSibling('td')
274                         if valelem is None:
275                                 skl.soupm(key, 'puzzle missing sibling "%s"'
276                                         % puzzle)
277                                 continue
278                         valstr = soup_text(valelem)
279                         match = re.match(valstr)
280                         if match is None:
281                                 skl.soupm(key, ('puzzle "%s" unparseable'+
282                                         ' standing "%s"') % (puzzle, valstr))
283                                 continue
284                         standing = match.group(match.lastindex)
285                         standings[puzzle].append(standing)
286
287                 self.standings = { }
288
289                 for puzzle in puzzles:
290                         sl = standings[puzzle]
291                         if len(sl) > 1:
292                                 skl.msg('puzzle "%s" multiple standings %s' %
293                                                 (puzzle, `sl`))
294                                 continue
295                         if not sl:
296                                 skl.msg('puzzle "%s" no standing found' % puzzle)
297                                 continue
298                         standing = sl[0]
299                         for i in range(0, standing_limit):
300                                 if standing == standingvals[i]:
301                                         self.standings[puzzle] = i
302                         if not puzzle in self.standings:
303                                 skl.msg('puzzle "%s" unknown standing "%s"' %
304                                         (puzzle, standing))
305
306                 all_standings_ok = True
307                 for puzzle in puzzles:
308                         if not puzzle in self.standings:
309                                 self.needs_msgs(skl)
310
311         def _find_crewflag(self, cf, yoweb_re):
312                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
313                 if len(things) != 1:
314                         self.msg('zero or several %s id references found' % cf)
315                         return None
316                 thing = things[0]
317                 id_re = '\\b%sid\\=(\\w+)$' % cf
318                 id_haystack = thing['href']
319                 match = regexp.compile(id_re).search(id_haystack)
320                 if match is None:
321                         self.soupm(thing, ('incomprehensible %s id ref'+
322                                 ' (%s in %s)') % (cf, id_re, id_haystack))
323                         return None
324                 name = soup_text(thing)
325                 return (match.group(1), name)
326
327         def __str__(self):
328                 return `(self.crew, self.flag, self.standings, self.msgs)`
329
330 #---------- scraper for crew pages ----------
331
332 class CrewInfo(SomethingSoupInfo):
333         # Public data members:
334         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
335         #              ('Senior Officer', [...]),
336         #               ... ]
337         #  pi.msgs = [ 'message describing problem with scrape' ]
338
339         def __init__(self, crewid, max_age=300):
340                 SomethingSoupInfo.__init__(self,
341                         'crew/info.wm?crewid=', crewid, max_age)
342                 self._find_crew()
343
344         def _find_crew(self):
345                 self.crew = []
346                 capts = self._soup.findAll('img',
347                         src='/yoweb/images/crew-captain.png')
348                 if len(capts) != 1:
349                         self.msg('crew members: no. of captain images != 1')
350                         return
351                 tbl = capts[0]
352                 while not tbl.find('a', href=pirate_ref_re):
353                         tbl = tbl.findParent('table')
354                         if not tbl:
355                                 self.msg('crew members: cannot find table')
356                                 return
357                 current_rank_crew = None
358                 crew_rank_re = regexp.compile('/yoweb/images/crew')
359                 for row in tbl.contents:
360                         # findAll(recurse=False)
361                         if isinstance(row,basestring):
362                                 continue
363
364                         is_rank = row.find('img', attrs={'src': crew_rank_re})
365                         if is_rank:
366                                 rank = soup_text(row)
367                                 current_rank_crew = []
368                                 self.crew.append((rank, current_rank_crew))
369                                 continue
370                         for cell in row.findAll('a', href=pirate_ref_re):
371                                 if current_rank_crew is None:
372                                         self.soupm(cell, 'crew members: crew'
373                                                 ' before rank')
374                                         continue
375                                 current_rank_crew.append(soup_text(cell))
376
377         def __str__(self):
378                 return `(self.crew, self.msgs)`
379
380 #---------- pretty-printer for tables of pirate puzzle standings ----------
381
382 class StandingsTable:
383         def __init__(self, f, use_puzzles=None, col_width=6, gap_every=5):
384                 if use_puzzles is None:
385                         if opts.ship_duty:
386                                 use_puzzles=duty_puzzles
387                         else:
388                                 use_puzzles=puzzles
389                 self._puzzles = use_puzzles
390                 self.f = f
391                 self._cw = col_width-1
392                 self._gap_every = gap_every
393                 self._linecount = 0
394                 self._o = f.write
395
396         def _nl(self): self._o('\n')
397
398         def _pline(self, pirate, puzstrs, extra):
399                 if (self._linecount > 0
400                     and self._gap_every is not None
401                     and not (self._linecount % self._gap_every)):
402                         self._nl()
403                 self._o(' %-*s' % (max(max_pirate_namelen, 14), pirate))
404                 for v in puzstrs:
405                         self._o(' %-*.*s' % (self._cw,self._cw, v))
406                 if extra:
407                         self._o(' ' + extra)
408                 self._nl()
409                 self._linecount += 1
410
411         def _puzstr(self, pi, puzzle):
412                 if not isinstance(puzzle,list): puzzle = [puzzle]
413                 try: standing = max([pi.standings[p] for p in puzzle])
414                 except KeyError: return '?'
415                 if not standing: return ''
416                 s = ''
417                 if self._cw > 4:
418                         c1 = standingvals[standing][0]
419                         if standing < 3: c1 = c1.lower() # 3 = Master
420                         s += `standing`
421                 if self._cw > 5:
422                         s += ' '
423                 s += '*' * (standing / 2)
424                 s += '+' * (standing % 2)
425                 return s
426
427         def headings(self, lhs='', rhs=None):
428                 def puzn_redact(name):
429                         if isinstance(name,list):
430                                 return '/'.join(
431                                         ["%.*s" % (self._cw/2, puzn_redact(n))
432                                          for n in name])
433                         spc = name.find(' ')
434                         if spc < 0: return name
435                         return name[0:min(4,spc)] + name[spc+1:]
436                 self._linecount = -2
437                 self._pline(lhs, map(puzn_redact, self._puzzles), rhs)
438                 self._linecount = 0
439         def literalline(self, line):
440                 self._o(line)
441                 self._nl()
442                 self._linecount = 0
443         def pirate_dummy(self, name, standingstring, extra=None):
444                 self._pline(name, standingstring * len(self._puzzles), extra)
445         def pirate(self, pi, extra=None):
446                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
447                 self._pline(pi.name, puzstrs, extra)
448
449
450 #---------- chat log parser ----------
451
452 class PirateAboard:
453         # This is essentially a transparent, dumb, data class.
454         #  pa.v
455         #  pa.name
456         #  pa.last_time
457         #  pa.last_event
458         #  pa.gunner
459         #  pa.last_chat_time
460         #  pa.last_chat_chan
461         #  pa.pi
462
463         def __init__(pa, pn, v, time, event):
464                 pa.name = pn
465                 pa.v = v
466                 pa.last_time = time
467                 pa.last_event = event
468                 pa.last_chat_time = None
469                 pa.last_chat_chan = None
470                 pa.gunner = False
471                 pa.pi = None
472
473         def pirate_info(pa):
474                 now = time.time()
475                 if pa.pi:
476                         age = now - pa.pi_fetched
477                         guide = random.randint(120,240)
478                         if age <= guide:
479                                 return pa.pi
480                         debug('PirateAboard refresh %d > %d  %s' % (
481                                 age, guide, pa.name))
482                         imaginary = [2,4]
483                 else:
484                         imaginary = [1]
485                 wait = fetcher.need_wait(now, imaginary)
486                 if wait:
487                         debug('PirateAboard fetcher not ready %d' % wait)
488                         return pa.pi
489                 pa.pi = PirateInfo(pa.name, 600)
490                 pa.pi_fetched = now
491                 return pa.pi
492
493 class ChatLogTracker:
494         # This is quite complex so we make it opaque.  Use the
495         # official invokers, accessors etc.
496
497         def __init__(self, myself_pi, logfn):
498                 self._pl = {}   # self._pl['Pirate'] =
499                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
500                                 # self._vl['Vessel']['#lastinfo']
501                                 # self._vl['Vessel']['#name']
502                                 # self._v = self._vl[self._vessel]
503                 self._date = None
504                 self._myself = myself_pi
505                 self._lbuf = ''
506                 self._f = file(logfn)
507                 flen = os.fstat(self._f.fileno()).st_size
508                 max_backlog = 500000
509                 if flen > max_backlog:
510                         startpos = flen - max_backlog
511                         self._f.seek(startpos)
512                         self._f.readline()
513                 self._progress = [0, flen - self._f.tell()]
514                 self._disembark_myself()
515                 self._need_redisplay = False
516                 self._lastvessel = None
517
518         def _disembark_myself(self):
519                 self._v = None
520                 self._vessel = None
521                 self.force_redisplay()
522
523         def force_redisplay(self):
524                 self._need_redisplay = True
525
526         def _vessel_updated(self, v, timestamp):
527                 v['#lastinfo'] = timestamp
528                 self.force_redisplay()
529
530         def _onboard_event(self,v,timestamp,pirate,event):
531                 pa = self._pl.get(pirate, None)
532                 if pa is not None and pa.v is v:
533                         pa.last_time = timestamp
534                         pa.last_event = event
535                 else:
536                         if pa is not None: del pa.v[pirate]
537                         pa = PirateAboard(pirate, v, timestamp, event)
538                         self._pl[pirate] = pa
539                         v[pirate] = pa
540                 self._vessel_updated(v, timestamp)
541                 return pa
542
543         def _trash_vessel(self, v):
544                 for pn in v:
545                         if pn.startswith('#'): continue
546                         del self._pl[pn]
547                 vn = v['#name']
548                 del self._vl[vn]
549                 if v is self._v: self._disembark_myself()
550                 self.force_redisplay()
551
552         def _vessel_stale(self, v, timestamp):
553                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
554
555         def _vessel_check_expire(self, v, timestamp):
556                 if not self._vessel_stale(v, timestamp):
557                         return v
558                 self._debug_line_disposition(timestamp,'',
559                         'stale-reset ' + v['#name'])
560                 self._trash_vessel(v)
561                 return None
562
563         def expire_garbage(self, timestamp):
564                 for v in self._vl.values():
565                         self._vessel_check_expire(v, timestamp)
566
567         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
568                 v = self._vl.get(vn, None)
569                 if v is not None:
570                         v = self._vessel_check_expire(v, timestamp)
571                 if v is not None:
572                         dml.append('found')
573                         return v
574                 if not create:
575                         dml.append('no')
576                 dml.append('new')
577                 self._vl[vn] = v = { '#name': vn }
578                 self._vessel_updated(v, timestamp)
579                 return v
580
581         def _find_matching_vessel(self, pattern, timestamp, cmdr,
582                                         dml=[], create=False):
583                 # use when a commander pirate `cmdr' specified a vessel
584                 #  by name `pattern' (either may be None)
585                 # if create is true, will create the vessel
586                 #  record if an exact name is specified
587
588                 if (pattern is not None and
589                     not '*' in pattern
590                     and len(pattern.split(' ')) == 2):
591                         vn = pattern.title()
592                         dml.append('exact')
593                         return self._vessel_lookup(
594                                 vn, timestamp, dml=dml, create=create)
595
596                 if pattern is None:
597                         pattern_check = lambda vn: True
598                 else:
599                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.+')
600                         pattern_check = regexp.compile(re, regexp.I).match
601
602                 tries = []
603
604                 cmdr_pa = self._pl.get(cmdr, None)
605                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
606
607                 tries.append((self._v, 'here'))
608                 tried_vns = []
609
610                 for (v, dm) in tries:
611                         if v is None: dml.append(dm+'?'); continue
612                         
613                         vn = v['#name']
614                         if not pattern_check(vn):
615                                 tried_vns.append(vn)
616                                 dml.append(dm+'#')
617                                 continue
618
619                         dml.append(dm+'!')
620                         return v
621
622                 if pattern is not None and '*' in pattern:
623                         search = [
624                                 (vn,v)
625                                 for (vn,v) in self._vl.iteritems()
626                                 if not self._vessel_stale(v, timestamp)
627                                 if pattern_check(vn)
628                                 ]
629                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
630                         #       re,
631                         #       '/'.join(tried_vns),
632                         #       '/'.join([vn for (vn,v) in search])))
633
634                         if len(search)==1:
635                                 dml.append('one')
636                                 return search[0][1]
637                         elif search:
638                                 dml.append('many')
639                         else:
640                                 dml.append('none')
641
642         def _debug_line_disposition(self,timestamp,l,m):
643                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
644
645         def chatline(self,l):
646                 rm = lambda re: regexp.match(re,l)
647                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
648                 timestamp = None
649
650                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
651                 if m:
652                         self._date = [int(x) for x in m.groups()]
653                         self._previous_timestamp = None
654                         return d('date '+`self._date`)
655
656                 if self._date is None:
657                         return d('date unset')
658
659                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
660                 if not m:
661                         return d('no timestamp')
662
663                 while True:
664                         time_tuple = (self._date +
665                                       [int(x) for x in m.groups()] +
666                                       [-1,-1,-1])
667                         timestamp = time.mktime(time_tuple)
668                         if timestamp >= self._previous_timestamp: break
669                         self._date[2] += 1
670                         self._debug_line_disposition(timestamp,'',
671                                 'new date '+`self._date`)
672
673                 self._previous_timestamp = timestamp
674
675                 l = l[l.find(' ')+1:]
676
677                 def ob_x(pirate,event):
678                         return self._onboard_event(
679                                         self._v, timestamp, pirate, event)
680                 def ob1(did): ob_x(m.group(1), did); return d(did)
681                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
682
683                 def disembark(v, timestamp, pirate, event):
684                         self._onboard_event(
685                                         v, timestamp, pirate, 'leaving '+event)
686                         del v[pirate]
687                         del self._pl[pirate]
688
689                 def disembark_me(why):
690                         self._disembark_myself()
691                         return d('disembark-me '+why)
692
693                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
694                 if m:
695                         dm = ['boarding']
696                         pn = self._myself.name
697                         vn = m.group(1)
698                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
699                         self._lastvessel = self._vessel = vn
700                         self._v = v
701                         ob_x(pn, 'we boarded')
702                         self.expire_garbage(timestamp)
703                         return d(' '.join(dm))
704
705                 if self._v is None:
706                         return d('no vessel')
707
708                 m = rm('(\\w+) has come aboard\\.$')
709                 if m: return ob1('boarded');
710
711                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
712                 if m:
713                         (who,what) = m.groups()
714                         pa = ob_x(who,'ord '+what)
715                         if what == 'Gunning':
716                                 pa.gunner = True
717                         return d('duty order')
718
719                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
720                 if m: oba('stopped'); return d("end")
721
722                 def chat_core(speaker, chan):
723                         try: pa = self._pl[speaker]
724                         except KeyError: return 'mystery'
725                         if pa.v is not self._v: return 'elsewhere'
726                         pa.last_chat_time = timestamp
727                         pa.last_chat_chan = chan
728                         self.force_redisplay()
729                         return 'here'
730
731                 def chat(chan):
732                         speaker = m.group(1)
733                         dm = chat_core(speaker, chan)
734                         return d('chat %s %s' % (chan, dm))
735
736                 def chat_metacmd(chan):
737                         (cmdr, metacmd) = m.groups()
738                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
739                         m2 = regexp.match(
740                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
741                                 metacmd)
742                         if not m2: return chat(chan)
743
744                         (cmd, pattern, targets) = m2.groups()
745                         dml = ['cmd', chan, cmd]
746
747                         if cmd == 'a': each = self._onboard_event
748                         else: each = disembark
749
750                         if cmdr == self._myself.name:
751                                 dml.append('self')
752                                 how = 'cmd: %s' % cmd
753                         else:
754                                 dml.append('other')
755                                 how = 'cmd: %s %s' % (cmd,cmdr)
756
757                         v = self._find_matching_vessel(
758                                 pattern, timestamp, cmdr, dml, create=True)
759
760                         if v is not None:
761                                 targets = targets.strip().split(' ')
762                                 dml.append(`len(targets)`)
763                                 for target in targets:
764                                         each(v, timestamp, target.title(), how)
765                                 self._vessel_updated(v, timestamp)
766
767                         dm = ' '.join(dml)
768                         chat_core(cmdr, 'cmd '+chan)
769                         return d(dm)
770
771                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
772                 if m: return ob1('general order');
773
774                 m = rm('(\\w+) says, "')
775                 if m: return chat('public')
776
777                 m = rm('(\\w+) tells ye, "')
778                 if m: return chat('private')
779
780                 m = rm('Ye told (\\w+), "(.*)"$')
781                 if m: return chat_metacmd('private')
782
783                 m = rm('(\\w+) flag officer chats, "')
784                 if m: return chat('flag officer')
785
786                 m = rm('(\\w+) officer chats, "(.*)"$')
787                 if m: return chat_metacmd('officer')
788
789                 m = rm('Ye accepted the offer to job with ')
790                 if m: return disembark_me('jobbing')
791
792                 m = rm('Ye hop on the ferry and are whisked away ')
793                 if m: return disembark_me('ferry')
794
795                 m = rm('Whisking away to yer home on the magical winds')
796                 if m: return disembark_me('home')
797
798                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
799                 if m:
800                         pl = m.group(1).split(', ')
801                         if not self._myself.name in pl:
802                                 return d('lost melee')
803                         for pn in pl:
804                                 if ' ' in pn: continue
805                                 ob_x(pn,'won melee')
806                         return d('won melee')
807
808                 m = rm('(\\w+) is eliminated\\!')
809                 if m: return ob1('eliminated in fray');
810
811                 m = rm('(\\w+) has driven \w+ from the ship\\!')
812                 if m: return ob1('boarder repelled');
813
814                 m = rm('\w+ has bested (\\w+), and turns'+
815                         ' to the rest of the ship\\.')
816                 if m: return ob1('boarder unrepelled');
817
818                 m = rm('(\\w+) has left the vessel\.')
819                 if m:
820                         pirate = m.group(1)
821                         disembark(self._v, timestamp, pirate, 'disembarked')
822                         return d('disembarked')
823
824                 return d('not-matched')
825
826         def _str_vessel(self, vn, v):
827                 s = ' vessel %s\n' % vn
828                 s += ' '*20 + "%-*s   %13s\n" % (
829                                 max_pirate_namelen, '#lastinfo',
830                                 v['#lastinfo'])
831                 assert v['#name'] == vn
832                 for pn in sorted(v.keys()):
833                         if pn.startswith('#'): continue
834                         pa = v[pn]
835                         assert pa.v == v
836                         assert self._pl[pn] == pa
837                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
838                                 (' ','G')[pa.gunner],
839                                 max_pirate_namelen, pn,
840                                 pa.last_time, pa.last_event,
841                                 pa.last_chat_time, pa.last_chat_chan)
842                 return s
843
844         def __str__(self):
845                 s = '''<ChatLogTracker
846  myself %s
847  vessel %s
848 '''                     % (self._myself.name, self._vessel)
849                 assert ((self._v is None and self._vessel is None) or
850                         (self._v is self._vl[self._vessel]))
851                 if self._vessel is not None:
852                         s += self._str_vessel(self._vessel, self._v)
853                 for vn in sorted(self._vl.keys()):
854                         if vn == self._vessel: continue
855                         s += self._str_vessel(vn, self._vl[vn])
856                 for p in self._pl:
857                         pa = self._pl[p]
858                         assert pa.v[p] is pa
859                         assert pa.v in self._vl.values()
860                 s += '>\n'
861                 return s
862
863         def catchup(self, progress=None):
864                 while True:
865                         more = self._f.readline()
866                         if not more: break
867
868                         self._progress[0] += len(more)
869                         if progress: progress.progress(*self._progress)
870
871                         self._lbuf += more
872                         if self._lbuf.endswith('\n'):
873                                 self.chatline(self._lbuf.rstrip())
874                                 self._lbuf = ''
875                                 if opts.debug >= 2:
876                                         debug(self.__str__())
877                 if progress: progress.caughtup()
878
879         def changed(self):
880                 rv = self._need_redisplay
881                 self._need_redisplay = False
882                 return rv
883         def myname(self):
884                 # returns our pirate name
885                 return self._myself.name
886         def vesselname(self):
887                 # returns the vessel name we're aboard or None
888                 return self._vessel
889         def lastvesselname(self):
890                 # returns the last vessel name we were aboard or None
891                 return self._lastvessel
892         def aboard(self, vesselname=True):
893                 # returns a list of PirateAboard the vessel
894                 #  sorted by pirate name
895                 #  you can pass this None and you'll get []
896                 #  or True for the current vessel (which is the default)
897                 #  the returned value is a fresh list of persistent
898                 #  PirateAboard objects
899                 if vesselname is True: v = self._v
900                 else: v = self._vl.get(vesselname.title())
901                 if v is None: return []
902                 return [ v[pn]
903                          for pn in sorted(v.keys())
904                          if not pn.startswith('#') ]
905
906 #---------- implementations of actual operation modes ----------
907
908 def do_pirate(pirates, bu):
909         print '{'
910         for pirate in pirates:
911                 info = PirateInfo(pirate)
912                 print '%s: %s,' % (`pirate`, info)
913         print '}'
914
915 def prep_crew_of(args, bu, max_age=300):
916         if len(args) != 1: bu('crew-of takes one pirate name')
917         pi = PirateInfo(args[0], max_age)
918         if pi.crew is None: return None
919         return CrewInfo(pi.crew[0], max_age)
920
921 def do_crew_of(args, bu):
922         ci = prep_crew_of(args, bu)
923         print ci
924
925 def do_standings_crew_of(args, bu):
926         ci = prep_crew_of(args, bu, 60)
927         tab = StandingsTable(sys.stdout)
928         tab.headings()
929         for (rank, members) in ci.crew:
930                 if not members: continue
931                 tab.literalline('')
932                 tab.literalline('%s:' % rank)
933                 for p in members:
934                         pi = PirateInfo(p, random.randint(900,1800))
935                         tab.pirate(pi)
936
937 class ProgressPrintPercentage:
938         def __init__(self, f=sys.stdout):
939                 self._f = f
940         def progress_string(self,done,total):
941                 return "scan chat logs %3d%%\r" % ((done*100) / total)
942         def progress(self,*a):
943                 self._f.write(self.progress_string(*a))
944                 self._f.flush()
945         def show_init(self, pirate, ocean):
946                 print >>self._f, 'Starting up, %s on the %s ocean' % (
947                         pirate, ocean)
948         def caughtup(self):
949                 self._f.write('                   \r')
950                 self._f.flush()
951
952 #----- modes which use the chat log parser are quite complex -----
953
954 def prep_chat_log(args, bu,
955                 progress=ProgressPrintPercentage(),
956                 max_myself_age=3600):
957         if len(args) != 1: bu('this action takes only chat log filename')
958         logfn = args[0]
959         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
960         match = regexp.match(logfn_re, logfn)
961         if not match: bu('chat log filename is not in expected format')
962         (pirate, ocean) = match.groups()
963         fetcher.default_ocean(ocean)
964
965         progress.show_init(pirate, fetcher.ocean)
966         myself = PirateInfo(pirate,max_myself_age)
967         track = ChatLogTracker(myself, logfn)
968
969         opts.debug -= 2
970         track.catchup(progress)
971         opts.debug += 2
972
973         track.force_redisplay()
974
975         return (myself, track)
976
977 def do_track_chat_log(args, bu):
978         (myself, track) = prep_chat_log(args, bu)
979         while True:
980                 track.catchup()
981                 if track.changed():
982                         print track
983                 sleep(0.5 + 0.5 * random.random())
984
985 #----- ship management aid -----
986
987 class Display_dumb(ProgressPrintPercentage):
988         def __init__(self):
989                 ProgressPrintPercentage.__init__(self)
990         def show(self, s):
991                 print '\n\n', s;
992         def realstart(self):
993                 pass
994
995 class Display_overwrite(ProgressPrintPercentage):
996         def __init__(self):
997                 ProgressPrintPercentage.__init__(self)
998
999                 null = file('/dev/null','w')
1000                 curses.setupterm(fd=null.fileno())
1001
1002                 self._clear = curses.tigetstr('clear')
1003                 if not self._clear:
1004                         self._debug('missing clear!')
1005                         self.show = Display_dumb.show
1006                         return
1007
1008                 self._t = {'el':'', 'ed':''}
1009                 if not self._init_sophisticated():
1010                         for k in self._t.keys(): self._t[k] = ''
1011                         self._t['ho'] = self._clear
1012
1013         def _debug(self,m): debug('display overwrite: '+m)
1014
1015         def _init_sophisticated(self):
1016                 for k in self._t.keys():
1017                         s = curses.tigetstr(k)
1018                         self._t[k] = s
1019                 self._t['ho'] = curses.tigetstr('ho')
1020                 if not self._t['ho']:
1021                         cup = curses.tigetstr('cup')
1022                         self._t['ho'] = curses.tparm(cup,0,0)
1023                 missing = [k for k in self._t.keys() if not self._t[k]]
1024                 if missing:
1025                         self.debug('missing '+(' '.join(missing)))
1026                         return 0
1027                 return 1
1028
1029         def show(self, s):
1030                 w = sys.stdout.write
1031                 def wti(k): w(self._t[k])
1032
1033                 wti('ho')
1034                 nl = ''
1035                 for l in s.rstrip().split('\n'):
1036                         w(nl)
1037                         w(l)
1038                         wti('el')
1039                         nl = '\r\n'
1040                 wti('ed')
1041                 w(' ')
1042                 sys.stdout.flush()
1043
1044         def realstart(self):
1045                 sys.stdout.write(self._clear)
1046                 sys.stdout.flush()
1047                         
1048
1049 def do_ship_aid(args, bu):
1050         if opts.ship_duty is None: opts.ship_duty = True
1051
1052         displayer = globals()['Display_'+opts.display]()
1053
1054         (myself, track) = prep_chat_log(args, bu, progress=displayer)
1055
1056         displayer.realstart()
1057
1058         if os.isatty(0): kr_create = KeystrokeReader
1059         else: kr_create = DummyKeystrokeReader
1060
1061         try:
1062                 kreader = kr_create(0, 10)
1063                 ship_aid_core(myself, track, displayer, kreader)
1064         finally:
1065                 kreader.stop()
1066                 print '\n'
1067
1068 class KeyBasedSorter:
1069         def compar_key_pa(self, pa):
1070                 pi = pa.pirate_info()
1071                 if pi is None: return None
1072                 return self.compar_key(pi)
1073         def lsort_pa(self, l):
1074                 l.sort(key = self.compar_key_pa)
1075
1076 class NameSorter(KeyBasedSorter):
1077         def compar_key(self, pi): return pi.name
1078         def desc(self): return 'name'
1079
1080 class SkillSorter(NameSorter):
1081         def __init__(self, relevant):
1082                 self._want = frozenset(relevant.split('/'))
1083                 self._avoid = set()
1084                 for p in core_duty_puzzles:
1085                         if isinstance(p,basestring): self._avoid.add(p)
1086                         else: self._avoid |= set(p)
1087                 self._avoid -= self._want
1088                 self._desc = '%s' % relevant
1089         
1090         def desc(self): return self._desc
1091
1092         def compar_key(self, pi):
1093                 best_want = max([
1094                         pi.standings.get(puz,-1)
1095                         for puz in self._want
1096                         ])
1097                 best_avoid = [
1098                         -pi.standings.get(puz,standing_limit)
1099                         for puz in self._avoid
1100                         ]
1101                 best_avoid.sort()
1102                 def negate(x): return -x
1103                 debug('compar_key %s bw=%s ba=%s' % (pi.name, `best_want`,
1104                         `best_avoid`))
1105                 return (-best_want, map(negate, best_avoid), pi.name)
1106
1107 def ship_aid_core(myself, track, displayer, kreader):
1108
1109         def find_vessel():
1110                 vn = track.vesselname()
1111                 if vn: return (vn, " on board the %s" % vn)
1112                 vn = track.lastvesselname()
1113                 if vn: return (vn, " ashore from the %s" % vn)
1114                 return (None, " not on a vessel")
1115
1116         def timeevent(t,e):
1117                 if t is None: return ' ' * 22
1118                 return " %-4s %-16s" % (format_time_interval(now - t),e)
1119
1120         displayer.show(track.myname() + find_vessel()[1] + '...')
1121
1122         rotate_nya = '/-\\'
1123
1124         sort = NameSorter()
1125
1126         while True:
1127                 track.catchup()
1128                 now = time.time()
1129
1130                 (vn, s) = find_vessel()
1131                 s = track.myname() + s
1132                 s += " at %s" % time.strftime("%Y-%m-%d %H:%M:%S")
1133                 s += kreader.info()
1134                 s += '\n'
1135
1136                 aboard = track.aboard(vn)
1137                 sort.lsort_pa(aboard)
1138
1139                 tbl_s = StringIO()
1140                 tbl = StandingsTable(tbl_s)
1141
1142                 if track.vesselname(): howmany = ' %d aboard' % len(aboard)
1143                 else: howmany = ''
1144
1145                 tbl.headings(howmany, '  sorted by '+sort.desc())
1146
1147                 for pa in aboard:
1148                         pi = pa.pirate_info()
1149
1150                         xs = ''
1151                         if pa.gunner: xs += 'G '
1152                         else: xs += '  '
1153                         xs += timeevent(pa.last_time, pa.last_event)
1154                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
1155
1156                         if pi is None:
1157                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1158                         else:
1159                                 tbl.pirate(pi, xs)
1160
1161                 s += tbl_s.getvalue()
1162                 displayer.show(s)
1163                 tbl_s.close()
1164
1165                 k = kreader.getch()
1166                 if k is None:
1167                         rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1168                         continue
1169
1170                 if k == 'q': break
1171                 elif k == 'g': sort = SkillSorter('Gunning')
1172                 elif k == 'c': sort = SkillSorter('Carpentry')
1173                 elif k == 's': sort = SkillSorter('Sailing/Rigging')
1174                 elif k == 'b': sort = SkillSorter('Bilging')
1175                 elif k == 'n': sort = SkillSorter('Navigating')
1176                 elif k == 'd': sort = SkillSorter('Battle Navigation')
1177                 elif k == 't': sort = SkillSorter('Treasure Haul')
1178                 elif k == 'a': sort = NameSorter()
1179                 else: pass # unknown key command
1180
1181 #---------- individual keystroke input ----------
1182
1183 class DummyKeystrokeReader:
1184         def __init__(self,fd,timeout_dummy): pass
1185         def stop(self): pass
1186         def getch(self): sleep(1); return None
1187         def info(self): return ' [noninteractive]'
1188
1189 class KeystrokeReader(DummyKeystrokeReader):
1190         def __init__(self, fd, timeout_decisec=0):
1191                 self._fd = fd
1192                 self._saved = termios.tcgetattr(fd)
1193                 a = termios.tcgetattr(fd)
1194                 a[3] &= ~(termios.ECHO | termios.ECHONL |
1195                           termios.ICANON | termios.IEXTEN)
1196                 a[6][termios.VMIN] = 0
1197                 a[6][termios.VTIME] = timeout_decisec
1198                 termios.tcsetattr(fd, termios.TCSANOW, a)
1199         def stop(self):
1200                 termios.tcsetattr(self._fd, termios.TCSANOW, self._saved)
1201         def getch(self):
1202                 debug_flush()
1203                 byte = os.read(self._fd, 1)
1204                 if not len(byte): return None
1205                 return byte
1206         def info(self):
1207                 return ''
1208
1209 #---------- main program ----------
1210
1211 def main():
1212         global opts, fetcher
1213
1214         pa = OptionParser(
1215 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1216 actions:
1217  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1218  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1219  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1220  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1221  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1222
1223 display modes (for --display) apply to ship-aid:
1224  --display=dumb       just print new information, scrolling the screen
1225  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top''')
1226         ao = pa.add_option
1227         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1228                 help='select ocean OCEAN')
1229         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1230                 default='~/.yoweb-scrape-cache',
1231                 help='cache yoweb pages in DIR')
1232         ao('-D','--debug', action='count', dest='debug', default=0,
1233                 help='enable debugging output')
1234         ao('--debug-fd', type='int', dest='debug_fd',
1235                 help='write any debugging output to specified fd')
1236         ao('-q','--quiet', action='store_true', dest='quiet',
1237                 help='suppress warning output')
1238         ao('--display', action='store', dest='display',
1239                 type='choice', choices=['dumb','overwrite'],
1240                 help='how to display ship aid')
1241
1242         ao('--ship-duty', action='store_true', dest='ship_duty',
1243                 help='show ship duty station puzzles')
1244         ao('--all-puzzles', action='store_false', dest='ship_duty',
1245                 help='show all puzzles, not just ship duty stations')
1246
1247         ao('--min-cache-reuse', type='int', dest='min_max_age',
1248                 metavar='SECONDS', default=60,
1249                 help='always reuse cache yoweb data if no older than this')
1250
1251         (opts,args) = pa.parse_args()
1252         random.seed()
1253
1254         if len(args) < 1:
1255                 print >>sys.stderr, copyright_info
1256                 pa.error('need a mode argument')
1257
1258         if opts.debug_fd is not None:
1259                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1260         else:
1261                 opts.debug_file = sys.stdout
1262
1263         mode = args[0]
1264         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1265         try: mode_fn = globals()[mode_fn_name]
1266         except KeyError: pa.error('unknown mode "%s"' % mode)
1267
1268         # fixed parameters
1269         opts.expire_age = max(3600, opts.min_max_age)
1270
1271         opts.ship_reboard_clearout = 3600
1272
1273         if opts.cache_dir.startswith('~/'):
1274                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1275
1276         if opts.display is None:
1277                 if ((opts.debug > 0 and opts.debug_fd is None)
1278                     or not os.isatty(sys.stdout.fileno())):
1279                         opts.display = 'dumb'
1280                 else:
1281                         opts.display = 'overwrite'
1282
1283         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1284
1285         mode_fn(args[1:], pa.error)
1286
1287 main()