chiark / gitweb /
e3a7c8c19c7b97f5e7c1e7dbb30f3418b101859a
[ypp-sc-tools.web-live.git] / yoweb-scrape
1 #!/usr/bin/python
2 # This is part of ypp-sc-tools, a set of third-party tools for assisting
3 # players of Yohoho Puzzle Pirates.
4 #
5 # Copyright (C) 2009 Ian Jackson <ijackson@chiark.greenend.org.uk>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 # Yohoho and Puzzle Pirates are probably trademarks of Three Rings and
21 # are used without permission.  This program is not endorsed or
22 # sponsored by Three Rings.
23
24 copyright_info = '''
25 yoweb-scrape is part of ypp-sc-tools  Copyright (C) 2009 Ian Jackson
26 This program comes with ABSOLUTELY NO WARRANTY; this is free software,
27 and you are welcome to redistribute it under certain conditions.
28 For details, read the top of the yoweb-scrape file.
29 '''
30
31 #---------- setup ----------
32
33 import signal
34 signal.signal(signal.SIGINT, signal.SIG_DFL)
35
36 import os
37 import time
38 import urllib
39 import urllib2
40 import errno
41 import sys
42 import re as regexp
43 import random
44 import curses
45 import termios
46 import random
47 from optparse import OptionParser
48 from StringIO import StringIO
49
50 from BeautifulSoup import BeautifulSoup
51
52 opts = None
53
54 #---------- YPP parameters and arrays ----------
55
56 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
57         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
58         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
59         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
60
61 core_duty_puzzles = [
62                 'Gunning',
63                 ['Sailing','Rigging'],
64                 'Bilging',
65                 'Carpentry',
66                 ]
67
68 duty_puzzles = ([ 'Navigating', 'Battle Navigation' ] +
69                 core_duty_puzzles +
70                 [ 'Treasure Haul' ])
71
72 standingvals = ('Able/Proficient/Distinguished/Respected/Master'+
73                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
74 standing_limit = len(standingvals)
75
76 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
77
78 max_pirate_namelen = 12
79
80
81 #---------- general utilities ----------
82
83 def debug(m):
84         if opts.debug > 0:
85                 print >>opts.debug_file, m
86
87 def debug_flush():
88         if opts.debug > 0:
89                 opts.debug_file.flush() 
90
91 def sleep(seconds):
92         debug_flush()
93         time.sleep(seconds)
94
95 def format_time_interval(ti):
96         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
97         if ti < 7200: return '%2dm' % (ti / 60)
98         if ti < 86400: return '%dh' % (ti / 3600)
99         return '%dd' % (ti / 86400)
100
101 #---------- caching and rate-limiting data fetcher ----------
102
103 class Fetcher:
104         def __init__(self, ocean, cachedir):
105                 debug('Fetcher init %s' % cachedir)
106                 self.ocean = ocean
107                 self.cachedir = cachedir
108                 try: os.mkdir(cachedir)
109                 except (OSError,IOError), oe:
110                         if oe.errno != errno.EEXIST: raise
111                 self._cache_scan(time.time())
112
113         def default_ocean(self, ocean='ice'):
114                 if self.ocean is None:
115                         self.ocean = ocean
116
117         def _cache_scan(self, now):
118                 # returns list of ages, unsorted
119                 ages = []
120                 debug('Fetcher   scan_cache')
121                 for leaf in os.listdir(self.cachedir):
122                         if not leaf.startswith('#'): continue
123                         path = self.cachedir + '/' + leaf
124                         try: s = os.stat(path)
125                         except (OSError,IOError), oe:
126                                 if oe.errno != errno.ENOENT: raise
127                                 continue
128                         age = now - s.st_mtime
129                         if age > opts.expire_age:
130                                 debug('Fetcher    expire %d %s' % (age, path))
131                                 try: os.remove(path)
132                                 except (OSError,IOError), oe:
133                                         if oe.errno != errno.ENOENT: raise
134                                 continue
135                         ages.append(age)
136                 return ages
137
138         def need_wait(self, now, imaginary=[]):
139                 ages = self._cache_scan(now)
140                 ages += imaginary
141                 ages.sort()
142                 debug('Fetcher   ages ' + `ages`)
143                 min_age = 1
144                 need_wait = 0
145                 for age in ages:
146                         if age < min_age and age <= 5:
147                                 debug('Fetcher   morewait min=%d age=%d' %
148                                         (min_age, age))
149                                 need_wait = max(need_wait, min_age - age)
150                         min_age += 3
151                         min_age *= 1.25
152                 if need_wait > 0:
153                         need_wait += random.random() - 0.5
154                 return need_wait
155
156         def _rate_limit_cache_clean(self, now):
157                 need_wait = self.need_wait(now)
158                 if need_wait > 0:
159                         debug('Fetcher   wait %d' % need_wait)
160                         sleep(need_wait)
161
162         def fetch(self, url, max_age):
163                 debug('Fetcher fetch %s' % url)
164                 cache_corename = urllib.quote_plus(url)
165                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
166                 try: f = file(cache_item, 'r')
167                 except (OSError,IOError), oe:
168                         if oe.errno != errno.ENOENT: raise
169                         f = None
170                 now = time.time()
171                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
172                 if f is not None:
173                         s = os.fstat(f.fileno())
174                         age = now - s.st_mtime
175                         if age > max_age:
176                                 debug('Fetcher  stale %d < %d'% (max_age, age))
177                                 f = None
178                 if f is not None:
179                         data = f.read()
180                         f.close()
181                         debug('Fetcher  cached %d > %d' % (max_age, age))
182                         return data
183
184                 debug('Fetcher  fetch')
185                 self._rate_limit_cache_clean(now)
186
187                 stream = urllib2.urlopen(url)
188                 data = stream.read()
189                 cache_tmp = "%s/#%s~%d#" % (
190                         self.cachedir, cache_corename, os.getpid())
191                 f = file(cache_tmp, 'w')
192                 f.write(data)
193                 f.close()
194                 os.rename(cache_tmp, cache_item)
195                 debug('Fetcher  stored')
196                 return data
197
198         def yoweb(self, kind, tail, max_age):
199                 self.default_ocean()
200                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
201                         self.ocean, kind, tail)
202                 return self.fetch(url, max_age)
203
204 #---------- logging assistance for troubled screenscrapers ----------
205
206 class SoupLog:
207         def __init__(self):
208                 self.msgs = [ ]
209         def msg(self, m):
210                 self.msgs.append(m)
211         def soupm(self, obj, m):
212                 self.msg(m + '; in ' + `obj`)
213         def needs_msgs(self, child_souplog):
214                 self.msgs += child_souplog.msgs
215                 child_souplog.msgs = [ ]
216
217 def soup_text(obj):
218         str = ''.join(obj.findAll(text=True))
219         return str.strip()
220
221 class SomethingSoupInfo(SoupLog):
222         def __init__(self, kind, tail, max_age):
223                 SoupLog.__init__(self)
224                 html = fetcher.yoweb(kind, tail, max_age)
225                 self._soup = BeautifulSoup(html,
226                         convertEntities=BeautifulSoup.HTML_ENTITIES
227                         )
228
229 #---------- scraper for pirate pages ----------
230
231 class PirateInfo(SomethingSoupInfo):
232         # Public data members:
233         #  pi.standings = { 'Treasure Haul': 'Able' ... }
234         #  pi.name = name
235         #  pi.crew = (id, name)
236         #  pi.flag = (id, name)
237         #  pi.msgs = [ 'message describing problem with scrape' ]
238                 
239         def __init__(self, pirate, max_age=300):
240                 SomethingSoupInfo.__init__(self,
241                         'pirate.wm?target=', pirate, max_age)
242                 self.name = pirate
243                 self._find_standings()
244                 self.crew = self._find_crewflag('crew',
245                         '^/yoweb/crew/info\\.wm')
246                 self.flag = self._find_crewflag('flag',
247                         '^/yoweb/flag/info\\.wm')
248
249         def _find_standings(self):
250                 imgs = self._soup.findAll('img',
251                         src=regexp.compile('/yoweb/images/stat.*'))
252                 re = regexp.compile(
253 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
254                         )
255                 standings = { }
256
257                 for skill in puzzles:
258                         standings[skill] = [ ]
259
260                 skl = SoupLog()
261
262                 for img in imgs:
263                         try: puzzle = img['alt']
264                         except KeyError: continue
265
266                         if not puzzle in puzzles:
267                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
268                                 continue
269                         key = img.findParent('td')
270                         if key is None:
271                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
272                                 continue
273                         valelem = key.findNextSibling('td')
274                         if valelem is None:
275                                 skl.soupm(key, 'puzzle missing sibling "%s"'
276                                         % puzzle)
277                                 continue
278                         valstr = soup_text(valelem)
279                         match = re.match(valstr)
280                         if match is None:
281                                 skl.soupm(key, ('puzzle "%s" unparseable'+
282                                         ' standing "%s"') % (puzzle, valstr))
283                                 continue
284                         standing = match.group(match.lastindex)
285                         standings[puzzle].append(standing)
286
287                 self.standings = { }
288
289                 for puzzle in puzzles:
290                         sl = standings[puzzle]
291                         if len(sl) > 1:
292                                 skl.msg('puzzle "%s" multiple standings %s' %
293                                                 (puzzle, `sl`))
294                                 continue
295                         if not sl:
296                                 skl.msg('puzzle "%s" no standing found' % puzzle)
297                                 continue
298                         standing = sl[0]
299                         for i in range(0, standing_limit):
300                                 if standing == standingvals[i]:
301                                         self.standings[puzzle] = i
302                         if not puzzle in self.standings:
303                                 skl.msg('puzzle "%s" unknown standing "%s"' %
304                                         (puzzle, standing))
305
306                 all_standings_ok = True
307                 for puzzle in puzzles:
308                         if not puzzle in self.standings:
309                                 self.needs_msgs(skl)
310
311         def _find_crewflag(self, cf, yoweb_re):
312                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
313                 if len(things) != 1:
314                         self.msg('zero or several %s id references found' % cf)
315                         return None
316                 thing = things[0]
317                 id_re = '\\b%sid\\=(\\w+)$' % cf
318                 id_haystack = thing['href']
319                 match = regexp.compile(id_re).search(id_haystack)
320                 if match is None:
321                         self.soupm(thing, ('incomprehensible %s id ref'+
322                                 ' (%s in %s)') % (cf, id_re, id_haystack))
323                         return None
324                 name = soup_text(thing)
325                 return (match.group(1), name)
326
327         def __str__(self):
328                 return `(self.crew, self.flag, self.standings, self.msgs)`
329
330 #---------- scraper for crew pages ----------
331
332 class CrewInfo(SomethingSoupInfo):
333         # Public data members:
334         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
335         #              ('Senior Officer', [...]),
336         #               ... ]
337         #  pi.msgs = [ 'message describing problem with scrape' ]
338
339         def __init__(self, crewid, max_age=300):
340                 SomethingSoupInfo.__init__(self,
341                         'crew/info.wm?crewid=', crewid, max_age)
342                 self._find_crew()
343
344         def _find_crew(self):
345                 self.crew = []
346                 capts = self._soup.findAll('img',
347                         src='/yoweb/images/crew-captain.png')
348                 if len(capts) != 1:
349                         self.msg('crew members: no. of captain images != 1')
350                         return
351                 tbl = capts[0]
352                 while not tbl.find('a', href=pirate_ref_re):
353                         tbl = tbl.findParent('table')
354                         if not tbl:
355                                 self.msg('crew members: cannot find table')
356                                 return
357                 current_rank_crew = None
358                 crew_rank_re = regexp.compile('/yoweb/images/crew')
359                 for row in tbl.contents:
360                         # findAll(recurse=False)
361                         if isinstance(row,basestring):
362                                 continue
363
364                         is_rank = row.find('img', attrs={'src': crew_rank_re})
365                         if is_rank:
366                                 rank = soup_text(row)
367                                 current_rank_crew = []
368                                 self.crew.append((rank, current_rank_crew))
369                                 continue
370                         for cell in row.findAll('a', href=pirate_ref_re):
371                                 if current_rank_crew is None:
372                                         self.soupm(cell, 'crew members: crew'
373                                                 ' before rank')
374                                         continue
375                                 current_rank_crew.append(soup_text(cell))
376
377         def __str__(self):
378                 return `(self.crew, self.msgs)`
379
380 #---------- pretty-printer for tables of pirate puzzle standings ----------
381
382 class StandingsTable:
383         def __init__(self, f, use_puzzles=None, col_width=6, gap_every=5):
384                 if use_puzzles is None:
385                         if opts.ship_duty:
386                                 use_puzzles=duty_puzzles
387                         else:
388                                 use_puzzles=puzzles
389                 self._puzzles = use_puzzles
390                 self.f = f
391                 self._cw = col_width-1
392                 self._gap_every = gap_every
393                 self._linecount = 0
394                 self._o = f.write
395
396         def _nl(self): self._o('\n')
397
398         def _pline(self, pirate, puzstrs, extra):
399                 if (self._linecount > 0
400                     and self._gap_every is not None
401                     and not (self._linecount % self._gap_every)):
402                         self._nl()
403                 self._o(' %-*s' % (max(max_pirate_namelen, 14), pirate))
404                 for v in puzstrs:
405                         self._o(' %-*.*s' % (self._cw,self._cw, v))
406                 if extra:
407                         self._o(' ' + extra)
408                 self._nl()
409                 self._linecount += 1
410
411         def _puzstr(self, pi, puzzle):
412                 if not isinstance(puzzle,list): puzzle = [puzzle]
413                 try: standing = max([pi.standings[p] for p in puzzle])
414                 except KeyError: return '?'
415                 if not standing: return ''
416                 s = ''
417                 if self._cw > 4:
418                         c1 = standingvals[standing][0]
419                         if standing < 3: c1 = c1.lower() # 3 = Master
420                         s += `standing`
421                 if self._cw > 5:
422                         s += ' '
423                 s += '*' * (standing / 2)
424                 s += '+' * (standing % 2)
425                 return s
426
427         def headings(self, lhs='', rhs=None):
428                 def puzn_redact(name):
429                         if isinstance(name,list):
430                                 return '/'.join(
431                                         ["%.*s" % (self._cw/2, puzn_redact(n))
432                                          for n in name])
433                         spc = name.find(' ')
434                         if spc < 0: return name
435                         return name[0:min(4,spc)] + name[spc+1:]
436                 self._linecount = -2
437                 self._pline(lhs, map(puzn_redact, self._puzzles), rhs)
438                 self._linecount = 0
439         def literalline(self, line):
440                 self._o(line)
441                 self._nl()
442                 self._linecount = 0
443         def pirate_dummy(self, name, standingstring, extra=None):
444                 self._pline(name, standingstring * len(self._puzzles), extra)
445         def pirate(self, pi, extra=None):
446                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
447                 self._pline(pi.name, puzstrs, extra)
448
449
450 #---------- chat log parser ----------
451
452 class PirateAboard:
453         # This is essentially a transparent, dumb, data class.
454         #  pa.v
455         #  pa.name
456         #  pa.last_time
457         #  pa.last_event
458         #  pa.gunner
459         #  pa.last_chat_time
460         #  pa.last_chat_chan
461         #  pa.pi
462
463         def __init__(pa, pn, v, time, event):
464                 pa.name = pn
465                 pa.v = v
466                 pa.last_time = time
467                 pa.last_event = event
468                 pa.last_chat_time = None
469                 pa.last_chat_chan = None
470                 pa.gunner = False
471                 pa.pi = None
472
473         def pirate_info(pa):
474                 now = time.time()
475                 if pa.pi:
476                         age = now - pa.pi_fetched
477                         guide = random.randint(120,240)
478                         if age <= guide:
479                                 return pa.pi
480                         debug('PirateAboard refresh %d > %d  %s' % (
481                                 age, guide, pa.name))
482                         imaginary = [2,4]
483                 else:
484                         imaginary = [1]
485                 wait = fetcher.need_wait(now, imaginary)
486                 if wait:
487                         debug('PirateAboard fetcher not ready %d' % wait)
488                         return pa.pi
489                 pa.pi = PirateInfo(pa.name, 600)
490                 pa.pi_fetched = now
491                 return pa.pi
492
493 class ChatLogTracker:
494         # This is quite complex so we make it opaque.  Use the
495         # official invokers, accessors etc.
496
497         def __init__(self, myself_pi, logfn):
498                 self._pl = {}   # self._pl['Pirate'] =
499                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
500                                 # self._vl['Vessel']['#lastinfo']
501                                 # self._vl['Vessel']['#name']
502                                 # self._v = self._vl[self._vessel]
503                 self._date = None
504                 self._myself = myself_pi
505                 self._f = file(logfn)
506                 self._lbuf = ''
507                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
508                 self._disembark_myself()
509                 self._need_redisplay = False
510                 self._lastvessel = None
511
512         def _disembark_myself(self):
513                 self._v = None
514                 self._vessel = None
515                 self.force_redisplay()
516
517         def force_redisplay(self):
518                 self._need_redisplay = True
519
520         def _vessel_updated(self, v, timestamp):
521                 v['#lastinfo'] = timestamp
522                 self.force_redisplay()
523
524         def _onboard_event(self,v,timestamp,pirate,event):
525                 pa = self._pl.get(pirate, None)
526                 if pa is not None and pa.v is v:
527                         pa.last_time = timestamp
528                         pa.last_event = event
529                 else:
530                         if pa is not None: del pa.v[pirate]
531                         pa = PirateAboard(pirate, v, timestamp, event)
532                         self._pl[pirate] = pa
533                         v[pirate] = pa
534                 self._vessel_updated(v, timestamp)
535                 return pa
536
537         def _trash_vessel(self, v):
538                 for pn in v:
539                         if pn.startswith('#'): continue
540                         del self._pl[pn]
541                 vn = v['#name']
542                 del self._vl[vn]
543                 if v is self._v: self._disembark_myself()
544                 self.force_redisplay()
545
546         def _vessel_stale(self, v, timestamp):
547                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
548
549         def _vessel_check_expire(self, v, timestamp):
550                 if not self._vessel_stale(v, timestamp):
551                         return v
552                 self._debug_line_disposition(timestamp,'',
553                         'stale-reset ' + v['#name'])
554                 self._trash_vessel(v)
555                 return None
556
557         def expire_garbage(self, timestamp):
558                 for v in self._vl.values():
559                         self._vessel_check_expire(v, timestamp)
560
561         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
562                 v = self._vl.get(vn, None)
563                 if v is not None:
564                         v = self._vessel_check_expire(v, timestamp)
565                 if v is not None:
566                         dml.append('found')
567                         return v
568                 if not create:
569                         dml.append('no')
570                 dml.append('new')
571                 self._vl[vn] = v = { '#name': vn }
572                 self._vessel_updated(v, timestamp)
573                 return v
574
575         def _find_matching_vessel(self, pattern, timestamp, cmdr,
576                                         dml=[], create=False):
577                 # use when a commander pirate `cmdr' specified a vessel
578                 #  by name `pattern' (either may be None)
579                 # if create is true, will create the vessel
580                 #  record if an exact name is specified
581
582                 if (pattern is not None and
583                     not '*' in pattern
584                     and len(pattern.split(' ')) == 2):
585                         vn = pattern.title()
586                         dml.append('exact')
587                         return self._vessel_lookup(
588                                 vn, timestamp, dml=dml, create=create)
589
590                 if pattern is None:
591                         pattern_check = lambda vn: True
592                 else:
593                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.+')
594                         pattern_check = regexp.compile(re, regexp.I).match
595
596                 tries = []
597
598                 cmdr_pa = self._pl.get(cmdr, None)
599                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
600
601                 tries.append((self._v, 'here'))
602                 tried_vns = []
603
604                 for (v, dm) in tries:
605                         if v is None: dml.append(dm+'?'); continue
606                         
607                         vn = v['#name']
608                         if not pattern_check(vn):
609                                 tried_vns.append(vn)
610                                 dml.append(dm+'#')
611                                 continue
612
613                         dml.append(dm+'!')
614                         return v
615
616                 if pattern is not None and '*' in pattern:
617                         search = [
618                                 (vn,v)
619                                 for (vn,v) in self._vl.iteritems()
620                                 if not self._vessel_stale(v, timestamp)
621                                 if pattern_check(vn)
622                                 ]
623                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
624                         #       re,
625                         #       '/'.join(tried_vns),
626                         #       '/'.join([vn for (vn,v) in search])))
627
628                         if len(search)==1:
629                                 dml.append('one')
630                                 return search[0][1]
631                         elif search:
632                                 dml.append('many')
633                         else:
634                                 dml.append('none')
635
636         def _debug_line_disposition(self,timestamp,l,m):
637                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
638
639         def chatline(self,l):
640                 rm = lambda re: regexp.match(re,l)
641                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
642                 timestamp = None
643
644                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
645                 if m:
646                         self._date = [int(x) for x in m.groups()]
647                         self._previous_timestamp = None
648                         return d('date '+`self._date`)
649
650                 if self._date is None:
651                         return d('date unset')
652
653                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
654                 if not m:
655                         return d('no timestamp')
656
657                 while True:
658                         time_tuple = (self._date +
659                                       [int(x) for x in m.groups()] +
660                                       [-1,-1,-1])
661                         timestamp = time.mktime(time_tuple)
662                         if timestamp >= self._previous_timestamp: break
663                         self._date[2] += 1
664                         self._debug_line_disposition(timestamp,'',
665                                 'new date '+`self._date`)
666
667                 self._previous_timestamp = timestamp
668
669                 l = l[l.find(' ')+1:]
670
671                 def ob_x(pirate,event):
672                         return self._onboard_event(
673                                         self._v, timestamp, pirate, event)
674                 def ob1(did): ob_x(m.group(1), did); return d(did)
675                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
676
677                 def disembark(v, timestamp, pirate, event):
678                         self._onboard_event(
679                                         v, timestamp, pirate, 'leaving '+event)
680                         del v[pirate]
681                         del self._pl[pirate]
682
683                 def disembark_me(why):
684                         self._disembark_myself()
685                         return d('disembark-me '+why)
686
687                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
688                 if m:
689                         dm = ['boarding']
690                         pn = self._myself.name
691                         vn = m.group(1)
692                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
693                         self._lastvessel = self._vessel = vn
694                         self._v = v
695                         ob_x(pn, 'we boarded')
696                         self.expire_garbage(timestamp)
697                         return d(' '.join(dm))
698
699                 if self._v is None:
700                         return d('no vessel')
701
702                 m = rm('(\\w+) has come aboard\\.$')
703                 if m: return ob1('boarded');
704
705                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
706                 if m:
707                         (who,what) = m.groups()
708                         pa = ob_x(who,'ord '+what)
709                         if what == 'Gunning':
710                                 pa.gunner = True
711                         return d('duty order')
712
713                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
714                 if m: oba('stopped'); return d("end")
715
716                 def chat_core(speaker, chan):
717                         try: pa = self._pl[speaker]
718                         except KeyError: return 'mystery'
719                         if pa.v is not self._v: return 'elsewhere'
720                         pa.last_chat_time = timestamp
721                         pa.last_chat_chan = chan
722                         self.force_redisplay()
723                         return 'here'
724
725                 def chat(chan):
726                         speaker = m.group(1)
727                         dm = chat_core(speaker, chan)
728                         return d('chat %s %s' % (chan, dm))
729
730                 def chat_metacmd(chan):
731                         (cmdr, metacmd) = m.groups()
732                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
733                         m2 = regexp.match(
734                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
735                                 metacmd)
736                         if not m2: return chat(chan)
737
738                         (cmd, pattern, targets) = m2.groups()
739                         dml = ['cmd', chan, cmd]
740
741                         if cmd == 'a': each = self._onboard_event
742                         else: each = disembark
743
744                         if cmdr == self._myself.name:
745                                 dml.append('self')
746                                 how = 'cmd: %s' % cmd
747                         else:
748                                 dml.append('other')
749                                 how = 'cmd: %s %s' % (cmd,cmdr)
750
751                         v = self._find_matching_vessel(
752                                 pattern, timestamp, cmdr, dml, create=True)
753
754                         if v is not None:
755                                 targets = targets.strip().split(' ')
756                                 dml.append(`len(targets)`)
757                                 for target in targets:
758                                         each(v, timestamp, target.title(), how)
759                                 self._vessel_updated(v, timestamp)
760
761                         dm = ' '.join(dml)
762                         chat_core(cmdr, 'cmd '+chan)
763                         return d(dm)
764
765                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
766                 if m: return ob1('general order');
767
768                 m = rm('(\\w+) says, "')
769                 if m: return chat('public')
770
771                 m = rm('(\\w+) tells ye, "')
772                 if m: return chat('private')
773
774                 m = rm('Ye told (\\w+), "(.*)"$')
775                 if m: return chat_metacmd('private')
776
777                 m = rm('(\\w+) flag officer chats, "')
778                 if m: return chat('flag officer')
779
780                 m = rm('(\\w+) officer chats, "(.*)"$')
781                 if m: return chat_metacmd('officer')
782
783                 m = rm('Ye accepted the offer to job with ')
784                 if m: return disembark_me('jobbing')
785
786                 m = rm('Ye hop on the ferry and are whisked away ')
787                 if m: return disembark_me('ferry')
788
789                 m = rm('Whisking away to yer home on the magical winds')
790                 if m: return disembark_me('home')
791
792                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
793                 if m:
794                         pl = m.group(1).split(', ')
795                         if not self._myself.name in pl:
796                                 return d('lost melee')
797                         for pn in pl:
798                                 if ' ' in pn: continue
799                                 ob_x(pn,'won melee')
800                         return d('won melee')
801
802                 m = rm('(\\w+) is eliminated\\!')
803                 if m: return ob1('eliminated in fray');
804
805                 m = rm('(\\w+) has driven \w+ from the ship\\!')
806                 if m: return ob1('boarder repelled');
807
808                 m = rm('\w+ has bested (\\w+), and turns'+
809                         ' to the rest of the ship\\.')
810                 if m: return ob1('boarder unrepelled');
811
812                 m = rm('(\\w+) has left the vessel\.')
813                 if m:
814                         pirate = m.group(1)
815                         disembark(self._v, timestamp, pirate, 'disembarked')
816                         return d('disembarked')
817
818                 return d('not-matched')
819
820         def _str_vessel(self, vn, v):
821                 s = ' vessel %s\n' % vn
822                 s += ' '*20 + "%-*s   %13s\n" % (
823                                 max_pirate_namelen, '#lastinfo',
824                                 v['#lastinfo'])
825                 assert v['#name'] == vn
826                 for pn in sorted(v.keys()):
827                         if pn.startswith('#'): continue
828                         pa = v[pn]
829                         assert pa.v == v
830                         assert self._pl[pn] == pa
831                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
832                                 (' ','G')[pa.gunner],
833                                 max_pirate_namelen, pn,
834                                 pa.last_time, pa.last_event,
835                                 pa.last_chat_time, pa.last_chat_chan)
836                 return s
837
838         def __str__(self):
839                 s = '''<ChatLogTracker
840  myself %s
841  vessel %s
842 '''                     % (self._myself.name, self._vessel)
843                 assert ((self._v is None and self._vessel is None) or
844                         (self._v is self._vl[self._vessel]))
845                 if self._vessel is not None:
846                         s += self._str_vessel(self._vessel, self._v)
847                 for vn in sorted(self._vl.keys()):
848                         if vn == self._vessel: continue
849                         s += self._str_vessel(vn, self._vl[vn])
850                 for p in self._pl:
851                         pa = self._pl[p]
852                         assert pa.v[p] is pa
853                         assert pa.v in self._vl.values()
854                 s += '>\n'
855                 return s
856
857         def catchup(self, progress=None):
858                 while True:
859                         more = self._f.readline()
860                         if not more: break
861
862                         self._progress[0] += len(more)
863                         if progress: progress.progress(*self._progress)
864
865                         self._lbuf += more
866                         if self._lbuf.endswith('\n'):
867                                 self.chatline(self._lbuf.rstrip())
868                                 self._lbuf = ''
869                                 if opts.debug >= 2:
870                                         debug(self.__str__())
871                 if progress: progress.caughtup()
872
873         def changed(self):
874                 rv = self._need_redisplay
875                 self._need_redisplay = False
876                 return rv
877         def myname(self):
878                 # returns our pirate name
879                 return self._myself.name
880         def vesselname(self):
881                 # returns the vessel name we're aboard or None
882                 return self._vessel
883         def lastvesselname(self):
884                 # returns the last vessel name we were aboard or None
885                 return self._lastvessel
886         def aboard(self, vesselname=True):
887                 # returns a list of PirateAboard the vessel
888                 #  sorted by pirate name
889                 #  you can pass this None and you'll get []
890                 #  or True for the current vessel (which is the default)
891                 #  the returned value is a fresh list of persistent
892                 #  PirateAboard objects
893                 if vesselname is True: v = self._v
894                 else: v = self._vl.get(vesselname.title())
895                 if v is None: return []
896                 return [ v[pn]
897                          for pn in sorted(v.keys())
898                          if not pn.startswith('#') ]
899
900 #---------- implementations of actual operation modes ----------
901
902 def do_pirate(pirates, bu):
903         print '{'
904         for pirate in pirates:
905                 info = PirateInfo(pirate)
906                 print '%s: %s,' % (`pirate`, info)
907         print '}'
908
909 def prep_crew_of(args, bu, max_age=300):
910         if len(args) != 1: bu('crew-of takes one pirate name')
911         pi = PirateInfo(args[0], max_age)
912         if pi.crew is None: return None
913         return CrewInfo(pi.crew[0], max_age)
914
915 def do_crew_of(args, bu):
916         ci = prep_crew_of(args, bu)
917         print ci
918
919 def do_standings_crew_of(args, bu):
920         ci = prep_crew_of(args, bu, 60)
921         tab = StandingsTable(sys.stdout)
922         tab.headings()
923         for (rank, members) in ci.crew:
924                 if not members: continue
925                 tab.literalline('')
926                 tab.literalline('%s:' % rank)
927                 for p in members:
928                         pi = PirateInfo(p, random.randint(900,1800))
929                         tab.pirate(pi)
930
931 class ProgressPrintPercentage:
932         def __init__(self, f=sys.stdout):
933                 self._f = f
934         def progress_string(self,done,total):
935                 return "scan chat logs %3d%%\r" % ((done*100) / total)
936         def progress(self,*a):
937                 self._f.write(self.progress_string(*a))
938                 self._f.flush()
939         def show_init(self, pirate, ocean):
940                 print >>self._f, 'Starting up, %s on the %s ocean' % (
941                         pirate, ocean)
942         def caughtup(self):
943                 self._f.write('                   \r')
944                 self._f.flush()
945
946 #----- modes which use the chat log parser are quite complex -----
947
948 def prep_chat_log(args, bu,
949                 progress=ProgressPrintPercentage(),
950                 max_myself_age=3600):
951         if len(args) != 1: bu('this action takes only chat log filename')
952         logfn = args[0]
953         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
954         match = regexp.match(logfn_re, logfn)
955         if not match: bu('chat log filename is not in expected format')
956         (pirate, ocean) = match.groups()
957         fetcher.default_ocean(ocean)
958
959         progress.show_init(pirate, fetcher.ocean)
960         myself = PirateInfo(pirate,max_myself_age)
961         track = ChatLogTracker(myself, logfn)
962
963         opts.debug -= 2
964         track.catchup(progress)
965         opts.debug += 2
966
967         track.force_redisplay()
968
969         return (myself, track)
970
971 def do_track_chat_log(args, bu):
972         (myself, track) = prep_chat_log(args, bu)
973         while True:
974                 track.catchup()
975                 if track.changed():
976                         print track
977                 sleep(0.5 + 0.5 * random.random())
978
979 #----- ship management aid -----
980
981 class Display_dumb(ProgressPrintPercentage):
982         def __init__(self):
983                 ProgressPrintPercentage.__init__(self)
984         def show(self, s):
985                 print '\n\n', s;
986         def realstart(self):
987                 pass
988
989 class Display_overwrite(ProgressPrintPercentage):
990         def __init__(self):
991                 ProgressPrintPercentage.__init__(self)
992
993                 null = file('/dev/null','w')
994                 curses.setupterm(fd=null.fileno())
995
996                 self._clear = curses.tigetstr('clear')
997                 if not self._clear:
998                         self._debug('missing clear!')
999                         self.show = Display_dumb.show
1000                         return
1001
1002                 self._t = {'el':'', 'ed':''}
1003                 if not self._init_sophisticated():
1004                         for k in self._t.keys(): self._t[k] = ''
1005                         self._t['ho'] = self._clear
1006
1007         def _debug(self,m): debug('display overwrite: '+m)
1008
1009         def _init_sophisticated(self):
1010                 for k in self._t.keys():
1011                         s = curses.tigetstr(k)
1012                         self._t[k] = s
1013                 self._t['ho'] = curses.tigetstr('ho')
1014                 if not self._t['ho']:
1015                         cup = curses.tigetstr('cup')
1016                         self._t['ho'] = curses.tparm(cup,0,0)
1017                 missing = [k for k in self._t.keys() if not self._t[k]]
1018                 if missing:
1019                         self.debug('missing '+(' '.join(missing)))
1020                         return 0
1021                 return 1
1022
1023         def show(self, s):
1024                 w = sys.stdout.write
1025                 def wti(k): w(self._t[k])
1026
1027                 wti('ho')
1028                 nl = ''
1029                 for l in s.rstrip().split('\n'):
1030                         w(nl)
1031                         w(l)
1032                         wti('el')
1033                         nl = '\r\n'
1034                 wti('ed')
1035                 w(' ')
1036                 sys.stdout.flush()
1037
1038         def realstart(self):
1039                 sys.stdout.write(self._clear)
1040                 sys.stdout.flush()
1041                         
1042
1043 def do_ship_aid(args, bu):
1044         if opts.ship_duty is None: opts.ship_duty = True
1045
1046         displayer = globals()['Display_'+opts.display]()
1047
1048         (myself, track) = prep_chat_log(args, bu, progress=displayer)
1049
1050         displayer.realstart()
1051
1052         if os.isatty(0): kr_create = KeystrokeReader
1053         else: kr_create = DummyKeystrokeReader
1054
1055         try:
1056                 kreader = kr_create(0, 10)
1057                 ship_aid_core(myself, track, displayer, kreader)
1058         finally:
1059                 kreader.stop()
1060                 print '\n'
1061
1062 class KeyBasedSorter:
1063         def compar_key_pa(self, pa):
1064                 pi = pa.pirate_info()
1065                 if pi is None: return None
1066                 return self.compar_key(pi)
1067         def lsort_pa(self, l):
1068                 l.sort(key = self.compar_key_pa)
1069
1070 class NameSorter(KeyBasedSorter):
1071         def compar_key(self, pi): return pi.name
1072         def desc(self): return 'name'
1073
1074 class SkillSorter(NameSorter):
1075         def __init__(self, relevant):
1076                 self._want = frozenset(relevant.split('/'))
1077                 self._avoid = set()
1078                 for p in core_duty_puzzles:
1079                         if isinstance(p,basestring): self._avoid.add(p)
1080                         else: self._avoid |= set(p)
1081                 self._avoid -= self._want
1082                 self._desc = '%s' % relevant
1083         
1084         def desc(self): return self._desc
1085
1086         def compar_key(self, pi):
1087                 best_want = max([
1088                         pi.standings.get(puz,-1)
1089                         for puz in self._want
1090                         ])
1091                 best_avoid = [
1092                         -pi.standings.get(puz,standing_limit)
1093                         for puz in self._avoid
1094                         ]
1095                 best_avoid.sort()
1096                 def negate(x): return -x
1097                 debug('compar_key %s bw=%s ba=%s' % (pi.name, `best_want`,
1098                         `best_avoid`))
1099                 return (-best_want, map(negate, best_avoid), pi.name)
1100
1101 def ship_aid_core(myself, track, displayer, kreader):
1102
1103         def find_vessel():
1104                 vn = track.vesselname()
1105                 if vn: return (vn, " on board the %s" % vn)
1106                 vn = track.lastvesselname()
1107                 if vn: return (vn, " ashore from the %s" % vn)
1108                 return (None, " not on a vessel")
1109
1110         def timeevent(t,e):
1111                 if t is None: return ' ' * 22
1112                 return " %-4s %-16s" % (format_time_interval(now - t),e)
1113
1114         displayer.show(track.myname() + find_vessel()[1] + '...')
1115
1116         rotate_nya = '/-\\'
1117
1118         sort = NameSorter()
1119
1120         while True:
1121                 track.catchup()
1122                 now = time.time()
1123
1124                 (vn, s) = find_vessel()
1125                 s = track.myname() + s
1126                 s += " at %s" % time.strftime("%Y-%m-%d %H:%M:%S")
1127                 s += kreader.info()
1128                 s += '\n'
1129
1130                 aboard = track.aboard(vn)
1131                 sort.lsort_pa(aboard)
1132
1133                 tbl_s = StringIO()
1134                 tbl = StandingsTable(tbl_s)
1135
1136                 if track.vesselname(): howmany = ' %d aboard' % len(aboard)
1137                 else: howmany = ''
1138
1139                 tbl.headings(howmany, '  sorted by '+sort.desc())
1140
1141                 for pa in aboard:
1142                         pi = pa.pirate_info()
1143
1144                         xs = ''
1145                         if pa.gunner: xs += 'G '
1146                         else: xs += '  '
1147                         xs += timeevent(pa.last_time, pa.last_event)
1148                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
1149
1150                         if pi is None:
1151                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1152                         else:
1153                                 tbl.pirate(pi, xs)
1154
1155                 s += tbl_s.getvalue()
1156                 displayer.show(s)
1157                 tbl_s.close()
1158
1159                 k = kreader.getch()
1160                 if k is None:
1161                         rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1162                         continue
1163
1164                 if k == 'q': break
1165                 elif k == 'g': sort = SkillSorter('Gunning')
1166                 elif k == 'c': sort = SkillSorter('Carpentry')
1167                 elif k == 's': sort = SkillSorter('Sailing/Rigging')
1168                 elif k == 'b': sort = SkillSorter('Bilging')
1169                 elif k == 'n': sort = SkillSorter('Navigating')
1170                 elif k == 'd': sort = SkillSorter('Battle Navigation')
1171                 elif k == 't': sort = SkillSorter('Treasure Haul')
1172                 elif k == 'a': sort = NameSorter()
1173                 else: pass # unknown key command
1174
1175 #---------- individual keystroke input ----------
1176
1177 class DummyKeystrokeReader:
1178         def __init__(self,fd,timeout_dummy): pass
1179         def stop(self): pass
1180         def getch(self): sleep(1); return None
1181         def info(self): return ' [noninteractive]'
1182
1183 class KeystrokeReader(DummyKeystrokeReader):
1184         def __init__(self, fd, timeout_decisec=0):
1185                 self._fd = fd
1186                 self._saved = termios.tcgetattr(fd)
1187                 a = termios.tcgetattr(fd)
1188                 a[3] &= ~(termios.ECHO | termios.ECHONL |
1189                           termios.ICANON | termios.IEXTEN)
1190                 a[6][termios.VMIN] = 0
1191                 a[6][termios.VTIME] = timeout_decisec
1192                 termios.tcsetattr(fd, termios.TCSANOW, a)
1193         def stop(self):
1194                 termios.tcsetattr(self._fd, termios.TCSANOW, self._saved)
1195         def getch(self):
1196                 debug_flush()
1197                 byte = os.read(self._fd, 1)
1198                 if not len(byte): return None
1199                 return byte
1200         def info(self):
1201                 return ''
1202
1203 #---------- main program ----------
1204
1205 def main():
1206         global opts, fetcher
1207
1208         pa = OptionParser(
1209 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1210 actions:
1211  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1212  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1213  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1214  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1215  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1216
1217 display modes (for --display) apply to ship-aid:
1218  --display=dumb       just print new information, scrolling the screen
1219  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top''')
1220         ao = pa.add_option
1221         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1222                 help='select ocean OCEAN')
1223         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1224                 default='~/.yoweb-scrape-cache',
1225                 help='cache yoweb pages in DIR')
1226         ao('-D','--debug', action='count', dest='debug', default=0,
1227                 help='enable debugging output')
1228         ao('--debug-fd', type='int', dest='debug_fd',
1229                 help='write any debugging output to specified fd')
1230         ao('-q','--quiet', action='store_true', dest='quiet',
1231                 help='suppress warning output')
1232         ao('--display', action='store', dest='display',
1233                 type='choice', choices=['dumb','overwrite'],
1234                 help='how to display ship aid')
1235
1236         ao('--ship-duty', action='store_true', dest='ship_duty',
1237                 help='show ship duty station puzzles')
1238         ao('--all-puzzles', action='store_false', dest='ship_duty',
1239                 help='show all puzzles, not just ship duty stations')
1240
1241         ao('--min-cache-reuse', type='int', dest='min_max_age',
1242                 metavar='SECONDS', default=60,
1243                 help='always reuse cache yoweb data if no older than this')
1244
1245         (opts,args) = pa.parse_args()
1246         random.seed()
1247
1248         if len(args) < 1:
1249                 print >>sys.stderr, copyright_info
1250                 pa.error('need a mode argument')
1251
1252         if opts.debug_fd is not None:
1253                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1254         else:
1255                 opts.debug_file = sys.stdout
1256
1257         mode = args[0]
1258         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1259         try: mode_fn = globals()[mode_fn_name]
1260         except KeyError: pa.error('unknown mode "%s"' % mode)
1261
1262         # fixed parameters
1263         opts.expire_age = max(3600, opts.min_max_age)
1264
1265         opts.ship_reboard_clearout = 3600
1266
1267         if opts.cache_dir.startswith('~/'):
1268                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1269
1270         if opts.display is None:
1271                 if ((opts.debug > 0 and opts.debug_fd is None)
1272                     or not os.isatty(sys.stdout.fileno())):
1273                         opts.display = 'dumb'
1274                 else:
1275                         opts.display = 'overwrite'
1276
1277         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1278
1279         mode_fn(args[1:], pa.error)
1280
1281 main()