chiark / gitweb /
d2d194c213e9039b941a1465c464239654ca14ae
[ypp-sc-tools.db-test.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 import termios
18 from optparse import OptionParser
19
20 from BeautifulSoup import BeautifulSoup
21
22 opts = None
23
24 #---------- YPP parameters and arrays ----------
25
26 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
27         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
28         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
29         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
30
31 core_duty_puzzles = [
32                 'Gunning',
33                 ['Sailing','Rigging'],
34                 'Bilging',
35                 'Carpentry',
36                 ]
37
38 duty_puzzles = ([ 'Navigating', 'Battle Navigation' ] +
39                 core_duty_puzzles +
40                 [ 'Treasure Haul' ])
41
42 standingvals = ('Able/Distinguished/Respected/Master'+
43                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
44 standing_limit = len(standingvals)
45
46 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
47
48 max_pirate_namelen = 12
49
50
51 #---------- general utilities ----------
52
53 def debug(m):
54         if opts.debug > 0:
55                 print >>opts.debug_file, m
56
57 def debug_flush():
58         if opts.debug > 0:
59                 opts.debug_file.flush() 
60
61 def sleep(seconds):
62         debug_flush()
63         time.sleep(seconds)
64
65 def format_time_interval(ti):
66         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
67         if ti < 7200: return '%2dm' % (ti / 60)
68         if ti < 86400: return '%dh' % (ti / 3600)
69         return '%dd' % (ti / 86400)
70
71 #---------- caching and rate-limiting data fetcher ----------
72
73 class Fetcher:
74         def __init__(self, ocean, cachedir):
75                 debug('Fetcher init %s' % cachedir)
76                 self.ocean = ocean
77                 self.cachedir = cachedir
78                 try: os.mkdir(cachedir)
79                 except (OSError,IOError), oe:
80                         if oe.errno != errno.EEXIST: raise
81                 self._cache_scan(time.time())
82
83         def default_ocean(self, ocean='ice'):
84                 if self.ocean is None:
85                         self.ocean = ocean
86
87         def _cache_scan(self, now):
88                 # returns list of ages, unsorted
89                 ages = []
90                 debug('Fetcher   scan_cache')
91                 for leaf in os.listdir(self.cachedir):
92                         if not leaf.startswith('#'): continue
93                         path = self.cachedir + '/' + leaf
94                         try: s = os.stat(path)
95                         except (OSError,IOError), oe:
96                                 if oe.errno != errno.ENOENT: raise
97                                 continue
98                         age = now - s.st_mtime
99                         if age > opts.expire_age:
100                                 debug('Fetcher    expire %d %s' % (age, path))
101                                 try: os.remove(path)
102                                 except (OSError,IOError), oe:
103                                         if oe.errno != errno.ENOENT: raise
104                                 continue
105                         ages.append(age)
106                 return ages
107
108         def need_wait(self, now, imaginary=[]):
109                 ages = self._cache_scan(now)
110                 ages += imaginary
111                 ages.sort()
112                 debug('Fetcher   ages ' + `ages`)
113                 min_age = 1
114                 need_wait = 0
115                 for age in ages:
116                         if age < min_age and age < 300:
117                                 debug('Fetcher   morewait min=%d age=%d' %
118                                         (min_age, age))
119                                 need_wait = max(need_wait, min_age - age)
120                         min_age += 3
121                         min_age *= 1.25
122                 return need_wait
123
124         def _rate_limit_cache_clean(self, now):
125                 need_wait = self.need_wait(now)
126                 if need_wait > 0:
127                         debug('Fetcher   wait %d' % need_wait)
128                         sleep(need_wait)
129
130         def fetch(self, url, max_age):
131                 debug('Fetcher fetch %s' % url)
132                 cache_corename = urllib.quote_plus(url)
133                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
134                 try: f = file(cache_item, 'r')
135                 except (OSError,IOError), oe:
136                         if oe.errno != errno.ENOENT: raise
137                         f = None
138                 now = time.time()
139                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
140                 if f is not None:
141                         s = os.fstat(f.fileno())
142                         age = now - s.st_mtime
143                         if age > max_age:
144                                 debug('Fetcher  stale %d < %d'% (max_age, age))
145                                 f = None
146                 if f is not None:
147                         data = f.read()
148                         f.close()
149                         debug('Fetcher  cached %d > %d' % (max_age, age))
150                         return data
151
152                 debug('Fetcher  fetch')
153                 self._rate_limit_cache_clean(now)
154
155                 stream = urllib2.urlopen(url)
156                 data = stream.read()
157                 cache_tmp = "%s/#%s~%d#" % (
158                         self.cachedir, cache_corename, os.getpid())
159                 f = file(cache_tmp, 'w')
160                 f.write(data)
161                 f.close()
162                 os.rename(cache_tmp, cache_item)
163                 debug('Fetcher  stored')
164                 return data
165
166         def yoweb(self, kind, tail, max_age):
167                 self.default_ocean()
168                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
169                         self.ocean, kind, tail)
170                 return self.fetch(url, max_age)
171
172 #---------- logging assistance for troubled screenscrapers ----------
173
174 class SoupLog:
175         def __init__(self):
176                 self.msgs = [ ]
177         def msg(self, m):
178                 self.msgs.append(m)
179         def soupm(self, obj, m):
180                 self.msg(m + '; in ' + `obj`)
181         def needs_msgs(self, child_souplog):
182                 self.msgs += child_souplog.msgs
183                 child_souplog.msgs = [ ]
184
185 def soup_text(obj):
186         str = ''.join(obj.findAll(text=True))
187         return str.strip()
188
189 class SomethingSoupInfo(SoupLog):
190         def __init__(self, kind, tail, max_age):
191                 SoupLog.__init__(self)
192                 html = fetcher.yoweb(kind, tail, max_age)
193                 self._soup = BeautifulSoup(html,
194                         convertEntities=BeautifulSoup.HTML_ENTITIES
195                         )
196
197 #---------- scraper for pirate pages ----------
198
199 class PirateInfo(SomethingSoupInfo):
200         # Public data members:
201         #  pi.standings = { 'Treasure Haul': 'Able' ... }
202         #  pi.name = name
203         #  pi.crew = (id, name)
204         #  pi.flag = (id, name)
205         #  pi.msgs = [ 'message describing problem with scrape' ]
206                 
207         def __init__(self, pirate, max_age=300):
208                 SomethingSoupInfo.__init__(self,
209                         'pirate.wm?target=', pirate, max_age)
210                 self.name = pirate
211                 self._find_standings()
212                 self.crew = self._find_crewflag('crew',
213                         '^/yoweb/crew/info\\.wm')
214                 self.flag = self._find_crewflag('flag',
215                         '^/yoweb/flag/info\\.wm')
216
217         def _find_standings(self):
218                 imgs = self._soup.findAll('img',
219                         src=regexp.compile('/yoweb/images/stat.*'))
220                 re = regexp.compile(
221 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
222                         )
223                 standings = { }
224
225                 for skill in puzzles:
226                         standings[skill] = [ ]
227
228                 skl = SoupLog()
229
230                 for img in imgs:
231                         try: puzzle = img['alt']
232                         except KeyError: continue
233
234                         if not puzzle in puzzles:
235                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
236                                 continue
237                         key = img.findParent('td')
238                         if key is None:
239                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
240                                 continue
241                         valelem = key.findNextSibling('td')
242                         if valelem is None:
243                                 skl.soupm(key, 'puzzle missing sibling "%s"'
244                                         % puzzle)
245                                 continue
246                         valstr = soup_text(valelem)
247                         match = re.match(valstr)
248                         if match is None:
249                                 skl.soupm(key, ('puzzle "%s" unparseable'+
250                                         ' standing "%s"') % (puzzle, valstr))
251                                 continue
252                         standing = match.group(match.lastindex)
253                         standings[puzzle].append(standing)
254
255                 self.standings = { }
256
257                 for puzzle in puzzles:
258                         sl = standings[puzzle]
259                         if len(sl) > 1:
260                                 skl.msg('puzzle "%s" multiple standings %s' %
261                                                 (puzzle, `sl`))
262                                 continue
263                         if not sl:
264                                 skl.msg('puzzle "%s" no standing found' % puzzle)
265                                 continue
266                         standing = sl[0]
267                         for i in range(0, standing_limit):
268                                 if standing == standingvals[i]:
269                                         self.standings[puzzle] = i
270                         if not puzzle in self.standings:
271                                 skl.msg('puzzle "%s" unknown standing "%s"' %
272                                         (puzzle, standing))
273
274                 all_standings_ok = True
275                 for puzzle in puzzles:
276                         if not puzzle in self.standings:
277                                 self.needs_msgs(skl)
278
279         def _find_crewflag(self, cf, yoweb_re):
280                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
281                 if len(things) != 1:
282                         self.msg('zero or several %s id references found' % cf)
283                         return None
284                 thing = things[0]
285                 id_re = '\\b%sid\\=(\\w+)$' % cf
286                 id_haystack = thing['href']
287                 match = regexp.compile(id_re).search(id_haystack)
288                 if match is None:
289                         self.soupm(thing, ('incomprehensible %s id ref'+
290                                 ' (%s in %s)') % (cf, id_re, id_haystack))
291                         return None
292                 name = soup_text(thing)
293                 return (match.group(1), name)
294
295         def __str__(self):
296                 return `(self.crew, self.flag, self.standings, self.msgs)`
297
298 #---------- scraper for crew pages ----------
299
300 class CrewInfo(SomethingSoupInfo):
301         # Public data members:
302         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
303         #              ('Senior Officer', [...]),
304         #               ... ]
305         #  pi.msgs = [ 'message describing problem with scrape' ]
306
307         def __init__(self, crewid, max_age=300):
308                 SomethingSoupInfo.__init__(self,
309                         'crew/info.wm?crewid=', crewid, max_age)
310                 self._find_crew()
311
312         def _find_crew(self):
313                 self.crew = []
314                 capts = self._soup.findAll('img',
315                         src='/yoweb/images/crew-captain.png')
316                 if len(capts) != 1:
317                         self.msg('crew members: no. of captain images != 1')
318                         return
319                 tbl = capts[0]
320                 while not tbl.find('a', href=pirate_ref_re):
321                         tbl = tbl.findParent('table')
322                         if not tbl:
323                                 self.msg('crew members: cannot find table')
324                                 return
325                 current_rank_crew = None
326                 crew_rank_re = regexp.compile('/yoweb/images/crew')
327                 for row in tbl.contents:
328                         # findAll(recurse=False)
329                         if isinstance(row,basestring):
330                                 continue
331
332                         is_rank = row.find('img', attrs={'src': crew_rank_re})
333                         if is_rank:
334                                 rank = soup_text(row)
335                                 current_rank_crew = []
336                                 self.crew.append((rank, current_rank_crew))
337                                 continue
338                         for cell in row.findAll('a', href=pirate_ref_re):
339                                 if current_rank_crew is None:
340                                         self.soupm(cell, 'crew members: crew'
341                                                 ' before rank')
342                                         continue
343                                 current_rank_crew.append(soup_text(cell))
344
345         def __str__(self):
346                 return `(self.crew, self.msgs)`
347
348 #---------- pretty-printer for tables of pirate puzzle standings ----------
349
350 class StandingsTable:
351         def __init__(self, use_puzzles=None, col_width=6):
352                 if use_puzzles is None:
353                         if opts.ship_duty:
354                                 use_puzzles=duty_puzzles
355                         else:
356                                 use_puzzles=puzzles
357                 self._puzzles = use_puzzles
358                 self.s = ''
359                 self._cw = col_width-1
360
361         def _pline(self, pirate, puzstrs, extra):
362                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
363                 for v in puzstrs:
364                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
365                 if extra:
366                         self.s += ' ' + extra
367                 self.s += '\n'
368
369         def _puzstr(self, pi, puzzle):
370                 if not isinstance(puzzle,list): puzzle = [puzzle]
371                 try: standing = max([pi.standings[p] for p in puzzle])
372                 except KeyError: return '?'
373                 if not standing: return ''
374                 s = ''
375                 if self._cw > 4:
376                         c1 = standingvals[standing][0]
377                         if standing < 3: c1 = c1.lower() # 3 = Master
378                         s += `standing`
379                 if self._cw > 5:
380                         s += ' '
381                 s += '*' * (standing / 2)
382                 s += '+' * (standing % 2)
383                 return s
384
385         def headings(self):
386                 def puzn_redact(name):
387                         if isinstance(name,list):
388                                 return '/'.join(
389                                         ["%.*s" % (self._cw/2, puzn_redact(n))
390                                          for n in name])
391                         spc = name.find(' ')
392                         if spc < 0: return name
393                         return name[0:min(4,spc)] + name[spc+1:]
394                 self._pline('', map(puzn_redact, self._puzzles), None)
395         def literalline(self, line):
396                 self.s += line + '\n'
397         def pirate_dummy(self, name, standingstring, extra=None):
398                 self._pline(name, standingstring * len(self._puzzles), extra)
399         def pirate(self, pi, extra=None):
400                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
401                 self._pline(pi.name, puzstrs, extra)
402
403         def results(self):
404                 return self.s
405
406 #---------- chat log parser ----------
407
408 class PirateAboard:
409         # This is essentially a transparent, dumb, data class.
410         #  pa.v
411         #  pa.name
412         #  pa.last_time
413         #  pa.last_event
414         #  pa.gunner
415         #  pa.last_chat_time
416         #  pa.last_chat_chan
417         #  pa.pi
418
419         def __init__(pa, pn, v, time, event):
420                 pa.name = pn
421                 pa.v = v
422                 pa.last_time = time
423                 pa.last_event = event
424                 pa.last_chat_time = None
425                 pa.last_chat_chan = None
426                 pa.gunner = False
427                 pa.pi = None
428
429         def pirate_info(pa):
430                 now = time.time()
431                 if pa.pi:
432                         age = now - pa.pi_fetched
433                         guide = random.randint(120,240)
434                         if age <= guide:
435                                 return pa.pi
436                         debug('PirateAboard refresh %d > %d  %s' % (
437                                 age, guide, pa.name))
438                         imaginary = [2,6]
439                 else:
440                         imaginary = [1]
441                 wait = fetcher.need_wait(now, imaginary)
442                 if wait:
443                         debug('PirateAboard fetcher not ready %d' % wait)
444                         return pa.pi
445                 pa.pi = PirateInfo(pa.name, 600)
446                 pa.pi_fetched = now
447                 return pa.pi
448
449 class ChatLogTracker:
450         # This is quite complex so we make it opaque.  Use the
451         # official invokers, accessors etc.
452
453         def __init__(self, myself_pi, logfn):
454                 self._pl = {}   # self._pl['Pirate'] =
455                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
456                                 # self._vl['Vessel']['#lastinfo']
457                                 # self._vl['Vessel']['#name']
458                                 # self._v = self._vl[self._vessel]
459                 self._date = None
460                 self._myself = myself_pi
461                 self._f = file(logfn)
462                 self._lbuf = ''
463                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
464                 self._disembark_myself()
465                 self._need_redisplay = False
466                 self._lastvessel = None
467
468         def _disembark_myself(self):
469                 self._v = None
470                 self._vessel = None
471                 self.force_redisplay()
472
473         def force_redisplay(self):
474                 self._need_redisplay = True
475
476         def _vessel_updated(self, v, timestamp):
477                 v['#lastinfo'] = timestamp
478                 self.force_redisplay()
479
480         def _onboard_event(self,v,timestamp,pirate,event):
481                 pa = self._pl.get(pirate, None)
482                 if pa is not None and pa.v is v:
483                         pa.last_time = timestamp
484                         pa.last_event = event
485                 else:
486                         if pa is not None: del pa.v[pirate]
487                         pa = PirateAboard(pirate, v, timestamp, event)
488                         self._pl[pirate] = pa
489                         v[pirate] = pa
490                 self._vessel_updated(v, timestamp)
491                 return pa
492
493         def _trash_vessel(self, v):
494                 for pn in v:
495                         if pn.startswith('#'): continue
496                         del self._pl[pn]
497                 vn = v['#name']
498                 del self._vl[vn]
499                 if v is self._v: self._disembark_myself()
500                 self.force_redisplay()
501
502         def _vessel_stale(self, v, timestamp):
503                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
504
505         def _vessel_check_expire(self, v, timestamp):
506                 if not self._vessel_stale(v, timestamp):
507                         return v
508                 self._debug_line_disposition(timestamp,'',
509                         'stale-reset ' + v['#name'])
510                 self._trash_vessel(v)
511                 return None
512
513         def expire_garbage(self, timestamp):
514                 for v in self._vl.values():
515                         self._vessel_check_expire(v, timestamp)
516
517         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
518                 v = self._vl.get(vn, None)
519                 if v is not None:
520                         v = self._vessel_check_expire(v, timestamp)
521                 if v is not None:
522                         dml.append('found')
523                         return v
524                 if not create:
525                         dml.append('no')
526                 dml.append('new')
527                 self._vl[vn] = v = { '#name': vn }
528                 self._vessel_updated(v, timestamp)
529                 return v
530
531         def _find_matching_vessel(self, pattern, timestamp, cmdr,
532                                         dml=[], create=False):
533                 # use when a commander pirate `cmdr' specified a vessel
534                 #  by name `pattern' (either may be None)
535                 # if create is true, will create the vessel
536                 #  record if an exact name is specified
537
538                 if (pattern is not None and
539                     not '*' in pattern
540                     and len(pattern.split(' ')) == 2):
541                         vn = pattern.title()
542                         dml.append('exact')
543                         return self._vessel_lookup(
544                                 vn, timestamp, dml=dml, create=create)
545
546                 if pattern is None:
547                         pattern_check = lambda vn: True
548                 else:
549                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.+')
550                         pattern_check = regexp.compile(re, regexp.I).match
551
552                 tries = []
553
554                 cmdr_pa = self._pl.get(cmdr, None)
555                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
556
557                 tries.append((self._v, 'here'))
558                 tried_vns = []
559
560                 for (v, dm) in tries:
561                         if v is None: dml.append(dm+'?'); continue
562                         
563                         vn = v['#name']
564                         if not pattern_check(vn):
565                                 tried_vns.append(vn)
566                                 dml.append(dm+'#')
567                                 continue
568
569                         dml.append(dm+'!')
570                         return v
571
572                 if pattern is not None and '*' in pattern:
573                         search = [
574                                 (vn,v)
575                                 for (vn,v) in self._vl.iteritems()
576                                 if not self._vessel_stale(v, timestamp)
577                                 if pattern_check(vn)
578                                 ]
579                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
580                         #       re,
581                         #       '/'.join(tried_vns),
582                         #       '/'.join([vn for (vn,v) in search])))
583
584                         if len(search)==1:
585                                 dml.append('one')
586                                 return search[0][1]
587                         elif search:
588                                 dml.append('many')
589                         else:
590                                 dml.append('none')
591
592         def _debug_line_disposition(self,timestamp,l,m):
593                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
594
595         def chatline(self,l):
596                 rm = lambda re: regexp.match(re,l)
597                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
598                 timestamp = None
599
600                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
601                 if m:
602                         self._date = [int(x) for x in m.groups()]
603                         self._previous_timestamp = None
604                         return d('date '+`self._date`)
605
606                 if self._date is None:
607                         return d('date unset')
608
609                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
610                 if not m:
611                         return d('no timestamp')
612
613                 while True:
614                         time_tuple = (self._date +
615                                       [int(x) for x in m.groups()] +
616                                       [-1,-1,-1])
617                         timestamp = time.mktime(time_tuple)
618                         if timestamp >= self._previous_timestamp: break
619                         self._date[2] += 1
620                         self._debug_line_disposition(timestamp,'',
621                                 'new date '+`self._date`)
622
623                 self._previous_timestamp = timestamp
624
625                 l = l[l.find(' ')+1:]
626
627                 def ob_x(pirate,event):
628                         return self._onboard_event(
629                                         self._v, timestamp, pirate, event)
630                 def ob1(did): ob_x(m.group(1), did); return d(did)
631                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
632
633                 def disembark(v, timestamp, pirate, event):
634                         self._onboard_event(
635                                         v, timestamp, pirate, 'leaving '+event)
636                         del v[pirate]
637                         del self._pl[pirate]
638
639                 def disembark_me(why):
640                         self._disembark_myself()
641                         return d('disembark-me '+why)
642
643                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
644                 if m:
645                         dm = ['boarding']
646                         pn = self._myself.name
647                         vn = m.group(1)
648                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
649                         self._lastvessel = self._vessel = vn
650                         self._v = v
651                         ob_x(pn, 'we boarded')
652                         self.expire_garbage(timestamp)
653                         return d(' '.join(dm))
654
655                 if self._v is None:
656                         return d('no vessel')
657
658                 m = rm('(\\w+) has come aboard\\.$')
659                 if m: return ob1('boarded');
660
661                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
662                 if m:
663                         (who,what) = m.groups()
664                         pa = ob_x(who,'ord '+what)
665                         if what == 'Gunning':
666                                 pa.gunner = True
667                         return d('duty order')
668
669                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
670                 if m: oba('stopped'); return d("end")
671
672                 def chat_core(speaker, chan):
673                         try: pa = self._pl[speaker]
674                         except KeyError: return 'mystery'
675                         if pa.v is not self._v: return 'elsewhere'
676                         pa.last_chat_time = timestamp
677                         pa.last_chat_chan = chan
678                         self.force_redisplay()
679                         return 'here'
680
681                 def chat(chan):
682                         speaker = m.group(1)
683                         dm = chat_core(speaker, chan)
684                         return d('chat %s %s' % (chan, dm))
685
686                 def chat_metacmd(chan):
687                         (cmdr, metacmd) = m.groups()
688                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
689                         m2 = regexp.match(
690                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
691                                 metacmd)
692                         if not m2: return chat(chan)
693
694                         (cmd, pattern, targets) = m2.groups()
695                         dml = ['cmd', chan, cmd]
696
697                         if cmd == 'a': each = self._onboard_event
698                         else: each = disembark
699
700                         if cmdr == self._myself.name:
701                                 dml.append('self')
702                                 how = 'cmd: %s' % cmd
703                         else:
704                                 dml.append('other')
705                                 how = 'cmd: %s %s' % (cmd,cmdr)
706
707                         v = self._find_matching_vessel(
708                                 pattern, timestamp, cmdr, dml, create=True)
709
710                         if v is not None:
711                                 targets = targets.strip().split(' ')
712                                 dml.append(`len(targets)`)
713                                 for target in targets:
714                                         each(v, timestamp, target.title(), how)
715                                 self._vessel_updated(v, timestamp)
716
717                         dm = ' '.join(dml)
718                         chat_core(cmdr, 'cmd '+chan)
719                         return d(dm)
720
721                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
722                 if m: return ob1('general order');
723
724                 m = rm('(\\w+) says, "')
725                 if m: return chat('public')
726
727                 m = rm('(\\w+) tells ye, "')
728                 if m: return chat('private')
729
730                 m = rm('Ye told (\\w+), "(.*)"$')
731                 if m: return chat_metacmd('private')
732
733                 m = rm('(\\w+) flag officer chats, "')
734                 if m: return chat('flag officer')
735
736                 m = rm('(\\w+) officer chats, "(.*)"$')
737                 if m: return chat_metacmd('officer')
738
739                 m = rm('Ye accepted the offer to job with ')
740                 if m: return disembark_me('jobbing')
741
742                 m = rm('Ye hop on the ferry and are whisked away ')
743                 if m: return disembark_me('ferry')
744
745                 m = rm('Whisking away to yer home on the magical winds')
746                 if m: return disembark_me('home')
747
748                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
749                 if m:
750                         pl = m.group(1).split(', ')
751                         if not self._myself.name in pl:
752                                 return d('lost melee')
753                         for pn in pl:
754                                 if ' ' in pn: continue
755                                 ob_x(pn,'won melee')
756                         return d('won melee')
757
758                 m = rm('(\\w+) is eliminated\\!')
759                 if m: return ob1('eliminated in fray');
760
761                 m = rm('(\\w+) has driven \w+ from the ship\\!')
762                 if m: return ob1('boarder repelled');
763
764                 m = rm('\w+ has bested (\\w+), and turns'+
765                         ' to the rest of the ship\\.')
766                 if m: return ob1('boarder unrepelled');
767
768                 m = rm('(\\w+) has left the vessel\.')
769                 if m:
770                         pirate = m.group(1)
771                         disembark(self._v, timestamp, pirate, 'disembarked')
772                         return d('disembarked')
773
774                 return d('not-matched')
775
776         def _str_vessel(self, vn, v):
777                 s = ' vessel %s\n' % vn
778                 s += ' '*20 + "%-*s   %13s\n" % (
779                                 max_pirate_namelen, '#lastinfo',
780                                 v['#lastinfo'])
781                 assert v['#name'] == vn
782                 for pn in sorted(v.keys()):
783                         if pn.startswith('#'): continue
784                         pa = v[pn]
785                         assert pa.v == v
786                         assert self._pl[pn] == pa
787                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
788                                 (' ','G')[pa.gunner],
789                                 max_pirate_namelen, pn,
790                                 pa.last_time, pa.last_event,
791                                 pa.last_chat_time, pa.last_chat_chan)
792                 return s
793
794         def __str__(self):
795                 s = '''<ChatLogTracker
796  myself %s
797  vessel %s
798 '''                     % (self._myself.name, self._vessel)
799                 assert ((self._v is None and self._vessel is None) or
800                         (self._v is self._vl[self._vessel]))
801                 if self._vessel is not None:
802                         s += self._str_vessel(self._vessel, self._v)
803                 for vn in sorted(self._vl.keys()):
804                         if vn == self._vessel: continue
805                         s += self._str_vessel(vn, self._vl[vn])
806                 for p in self._pl:
807                         pa = self._pl[p]
808                         assert pa.v[p] is pa
809                         assert pa.v in self._vl.values()
810                 s += '>\n'
811                 return s
812
813         def catchup(self, progress=None):
814                 while True:
815                         more = self._f.readline()
816                         if not more: break
817
818                         self._progress[0] += len(more)
819                         if progress: progress.progress(*self._progress)
820
821                         self._lbuf += more
822                         if self._lbuf.endswith('\n'):
823                                 self.chatline(self._lbuf.rstrip())
824                                 self._lbuf = ''
825                                 if opts.debug >= 2:
826                                         debug(self.__str__())
827                 if progress: progress.caughtup()
828
829         def changed(self):
830                 rv = self._need_redisplay
831                 self._need_redisplay = False
832                 return rv
833         def myname(self):
834                 # returns our pirate name
835                 return self._myself.name
836         def vesselname(self):
837                 # returns the vessel name we're aboard or None
838                 return self._vessel
839         def lastvesselname(self):
840                 # returns the last vessel name we were aboard or None
841                 return self._lastvessel
842         def aboard(self, vesselname=True):
843                 # returns a list of PirateAboard the vessel
844                 #  sorted by pirate name
845                 #  you can pass this None and you'll get []
846                 #  or True for the current vessel (which is the default)
847                 #  the returned value is a fresh list of persistent
848                 #  PirateAboard objects
849                 if vesselname is True: v = self._v
850                 else: v = self._vl.get(vesselname.title())
851                 if v is None: return []
852                 return [ v[pn]
853                          for pn in sorted(v.keys())
854                          if not pn.startswith('#') ]
855
856 #---------- implementations of actual operation modes ----------
857
858 def do_pirate(pirates, bu):
859         print '{'
860         for pirate in pirates:
861                 info = PirateInfo(pirate)
862                 print '%s: %s,' % (`pirate`, info)
863         print '}'
864
865 def prep_crew_of(args, bu, max_age=300):
866         if len(args) != 1: bu('crew-of takes one pirate name')
867         pi = PirateInfo(args[0], max_age)
868         if pi.crew is None: return None
869         return CrewInfo(pi.crew[0], max_age)
870
871 def do_crew_of(args, bu):
872         ci = prep_crew_of(args, bu)
873         print ci
874
875 def do_standings_crew_of(args, bu):
876         ci = prep_crew_of(args, bu, 60)
877         tab = StandingsTable()
878         tab.headings()
879         for (rank, members) in ci.crew:
880                 if not members: continue
881                 tab.literalline('%s:' % rank)
882                 for p in members:
883                         pi = PirateInfo(p, random.randint(900,1800))
884                         tab.pirate(pi)
885         print tab.results()
886
887 class ProgressPrintPercentage:
888         def __init__(self, f=sys.stdout):
889                 self._f = f
890         def progress_string(self,done,total):
891                 return "scan chat logs %3d%%\r" % ((done*100) / total)
892         def progress(self,*a):
893                 self._f.write(self.progress_string(*a))
894                 self._f.flush()
895         def show_init(self, pirate, ocean):
896                 print >>self._f, 'Starting up, %s on the %s ocean' % (
897                         pirate, ocean)
898         def caughtup(self):
899                 self._f.write('                   \r')
900                 self._f.flush()
901
902 #----- modes which use the chat log parser are quite complex -----
903
904 def prep_chat_log(args, bu,
905                 progress=ProgressPrintPercentage(),
906                 max_myself_age=3600):
907         if len(args) != 1: bu('this action takes only chat log filename')
908         logfn = args[0]
909         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
910         match = regexp.match(logfn_re, logfn)
911         if not match: bu('chat log filename is not in expected format')
912         (pirate, ocean) = match.groups()
913         fetcher.default_ocean(ocean)
914
915         progress.show_init(pirate, fetcher.ocean)
916         myself = PirateInfo(pirate,max_myself_age)
917         track = ChatLogTracker(myself, logfn)
918
919         opts.debug -= 2
920         track.catchup(progress)
921         opts.debug += 2
922
923         track.force_redisplay()
924
925         return (myself, track)
926
927 def do_track_chat_log(args, bu):
928         (myself, track) = prep_chat_log(args, bu)
929         while True:
930                 track.catchup()
931                 if track.changed():
932                         print track
933                 sleep(1)
934
935 #----- ship management aid -----
936
937 class Display_dumb(ProgressPrintPercentage):
938         def __init__(self):
939                 ProgressPrintPercentage.__init__(self)
940         def show(self, s):
941                 print '\n\n', s;
942         def realstart(self):
943                 pass
944
945 class Display_overwrite(ProgressPrintPercentage):
946         def __init__(self):
947                 ProgressPrintPercentage.__init__(self)
948
949                 null = file('/dev/null','w')
950                 curses.setupterm(fd=null.fileno())
951
952                 self._clear = curses.tigetstr('clear')
953                 if not self._clear:
954                         self._debug('missing clear!')
955                         self.show = Display_dumb.show
956                         return
957
958                 self._t = {'el':'', 'ed':''}
959                 if not self._init_sophisticated():
960                         for k in self._t.keys(): self._t[k] = ''
961                         self._t['ho'] = self._clear
962
963         def _debug(self,m): debug('display overwrite: '+m)
964
965         def _init_sophisticated(self):
966                 for k in self._t.keys():
967                         s = curses.tigetstr(k)
968                         self._t[k] = s
969                 self._t['ho'] = curses.tigetstr('ho')
970                 if not self._t['ho']:
971                         cup = curses.tigetstr('cup')
972                         self._t['ho'] = curses.tparm(cup,0,0)
973                 missing = [k for k in self._t.keys() if not self._t[k]]
974                 if missing:
975                         self.debug('missing '+(' '.join(missing)))
976                         return 0
977                 return 1
978
979         def show(self, s):
980                 w = sys.stdout.write
981                 def wti(k): w(self._t[k])
982
983                 wti('ho')
984                 nl = ''
985                 for l in s.rstrip().split('\n'):
986                         w(nl)
987                         w(l)
988                         wti('el')
989                         nl = '\r\n'
990                 wti('ed')
991                 w(' ')
992                 sys.stdout.flush()
993
994         def realstart(self):
995                 sys.stdout.write(self._clear)
996                 sys.stdout.flush()
997                         
998
999 def do_ship_aid(args, bu):
1000         if opts.ship_duty is None: opts.ship_duty = True
1001
1002         displayer = globals()['Display_'+opts.display]()
1003
1004         (myself, track) = prep_chat_log(args, bu, progress=displayer)
1005
1006         displayer.realstart()
1007
1008         if os.isatty(0): kr_create = KeystrokeReader
1009         else: kr_create = DummyKeystrokeReader
1010
1011         try:
1012                 kreader = kr_create(0, 10)
1013                 ship_aid_core(myself, track, displayer, kreader)
1014         finally:
1015                 kreader.stop()
1016                 print '\n'
1017
1018 class KeyBasedSorter:
1019         def compar_key_pa(self, pa):
1020                 return self.compar_key(pa.pirate_info())
1021         def lsort_pa(self, l):
1022                 l.sort(key = self.compar_key_pa)
1023
1024 class NameSorter(KeyBasedSorter):
1025         def compar_key(self, pi): return pi.name
1026
1027 class SkillSorter(NameSorter):
1028         def __init__(self, relevant):
1029                 self._want = frozenset(relevant.split('/'))
1030                 self._avoid = set()
1031                 for p in core_duty_puzzles:
1032                         if isinstance(p,basestring): self._avoid.add(p)
1033                         else: self._avoid |= set(p)
1034                 self._avoid -= self._want
1035         
1036         def compar_key(self, pi):
1037                 best_want = max([
1038                         pi.standings.get(puz,-1)
1039                         for puz in self._want
1040                         ])
1041                 best_avoid = [
1042                         -pi.standings.get(puz,standing_limit)
1043                         for puz in self._avoid
1044                         ]
1045                 best_avoid.sort()
1046                 def negate(x): return -x
1047                 debug('compar_key %s bw=%s ba=%s' % (pi.name, `best_want`,
1048                         `best_avoid`))
1049                 return (-best_want, map(negate, best_avoid), pi.name)
1050
1051 def ship_aid_core(myself, track, displayer, kreader):
1052
1053         def find_vessel():
1054                 vn = track.vesselname()
1055                 if vn: return (vn, " on board the %s" % vn)
1056                 vn = track.lastvesselname()
1057                 if vn: return (vn, " ashore from the %s" % vn)
1058                 return (None, " not on a vessel")
1059
1060         def timeevent(t,e):
1061                 if t is None: return ' ' * 22
1062                 return " %-4s %-16s" % (format_time_interval(now - t),e)
1063
1064         displayer.show(track.myname() + find_vessel()[1] + '...')
1065
1066         rotate_nya = '/-\\'
1067
1068         sort = NameSorter()
1069
1070         while True:
1071                 track.catchup()
1072                 now = time.time()
1073
1074                 (vn, s) = find_vessel()
1075                 s = track.myname() + s
1076                 s += " at %s" % time.strftime("%Y-%m-%d %H:%M:%S")
1077                 s += kreader.info()
1078                 s += '\n'
1079
1080                 tbl = StandingsTable()
1081                 tbl.headings()
1082
1083                 aboard = track.aboard(vn)
1084
1085                 sort.lsort_pa(aboard)
1086
1087                 for pa in aboard:
1088                         pi = pa.pirate_info()
1089
1090                         xs = ''
1091                         if pa.gunner: xs += 'G '
1092                         else: xs += '  '
1093                         xs += timeevent(pa.last_time, pa.last_event)
1094                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
1095
1096                         if pi is None:
1097                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1098                         else:
1099                                 tbl.pirate(pi, xs)
1100
1101                 s += tbl.results()
1102                 displayer.show(s)
1103
1104                 k = kreader.getch()
1105                 if k is None:
1106                         rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1107                         continue
1108
1109                 if k == 'q': break
1110                 elif k == 'g': sort = SkillSorter('Gunning')
1111                 elif k == 'c': sort = SkillSorter('Carpentry')
1112                 elif k == 's': sort = SkillSorter('Sailing/Rigging')
1113                 elif k == 'b': sort = SkillSorter('Bilging')
1114                 elif k == 'n': sort = SkillSorter('Navigating')
1115                 elif k == 'd': sort = SkillSorter('Battle Navigation')
1116                 elif k == 't': sort = SkillSorter('Treasure Haul')
1117                 elif k == 'a': sort = NameSorter()
1118                 else: pass # unknown key command
1119
1120 #---------- individual keystroke input ----------
1121
1122 class DummyKeystrokeReader:
1123         def __init__(self,fd,timeout_dummy): pass
1124         def stop(self): pass
1125         def getch(self): sleep(1); return None
1126         def info(self): return ' [noninteractive]'
1127
1128 class KeystrokeReader(DummyKeystrokeReader):
1129         def __init__(self, fd, timeout_decisec=0):
1130                 self._fd = fd
1131                 self._saved = termios.tcgetattr(fd)
1132                 a = termios.tcgetattr(fd)
1133                 a[3] &= ~(termios.ECHO | termios.ECHONL |
1134                           termios.ICANON | termios.IEXTEN)
1135                 a[6][termios.VMIN] = 0
1136                 a[6][termios.VTIME] = timeout_decisec
1137                 termios.tcsetattr(fd, termios.TCSANOW, a)
1138         def stop(self):
1139                 termios.tcsetattr(self._fd, termios.TCSANOW, self._saved)
1140         def getch(self):
1141                 debug_flush()
1142                 byte = os.read(self._fd, 1)
1143                 if not len(byte): return None
1144                 return byte
1145         def info(self):
1146                 return ''
1147
1148 #---------- main program ----------
1149
1150 def main():
1151         global opts, fetcher
1152
1153         pa = OptionParser(
1154 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1155 actions:
1156  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1157  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1158  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1159  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1160  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1161
1162 display modes (for --display) apply to ship-aid:
1163  --display=dumb       just print new information, scrolling the screen
1164  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
1165 ''')
1166         ao = pa.add_option
1167         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1168                 help='select ocean OCEAN')
1169         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1170                 default='~/.yoweb-scrape-cache',
1171                 help='cache yoweb pages in DIR')
1172         ao('-D','--debug', action='count', dest='debug', default=0,
1173                 help='enable debugging output')
1174         ao('--debug-fd', type='int', dest='debug_fd',
1175                 help='write any debugging output to specified fd')
1176         ao('-q','--quiet', action='store_true', dest='quiet',
1177                 help='suppress warning output')
1178         ao('--display', action='store', dest='display',
1179                 type='choice', choices=['dumb','overwrite'],
1180                 help='how to display ship aid')
1181
1182         ao('--ship-duty', action='store_true', dest='ship_duty',
1183                 help='show ship duty station puzzles')
1184         ao('--all-puzzles', action='store_false', dest='ship_duty',
1185                 help='show all puzzles, not just ship duty stations')
1186
1187         (opts,args) = pa.parse_args()
1188         random.seed()
1189
1190         if len(args) < 1:
1191                 pa.error('need a mode argument')
1192
1193         if opts.debug_fd is not None:
1194                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1195         else:
1196                 opts.debug_file = sys.stdout
1197
1198         mode = args[0]
1199         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1200         try: mode_fn = globals()[mode_fn_name]
1201         except KeyError: pa.error('unknown mode "%s"' % mode)
1202
1203         # fixed parameters
1204         opts.min_max_age = 60
1205         opts.expire_age = 3600
1206         opts.ship_reboard_clearout = 3600
1207
1208         if opts.cache_dir.startswith('~/'):
1209                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1210
1211         if opts.display is None:
1212                 if ((opts.debug > 0 and opts.debug_fd is None)
1213                     or not os.isatty(sys.stdout.fileno())):
1214                         opts.display = 'dumb'
1215                 else:
1216                         opts.display = 'overwrite'
1217
1218         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1219
1220         mode_fn(args[1:], pa.error)
1221
1222 main()