chiark / gitweb /
Added and edited commodity ordering.
[ypp-sc-tools.db-test.git] / yoweb-scrape
1 #!/usr/bin/python
2 # This is part of ypp-sc-tools, a set of third-party tools for assisting
3 # players of Yohoho Puzzle Pirates.
4 #
5 # Copyright (C) 2009 Ian Jackson <ijackson@chiark.greenend.org.uk>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 # Yohoho and Puzzle Pirates are probably trademarks of Three Rings and
21 # are used without permission.  This program is not endorsed or
22 # sponsored by Three Rings.
23
24 copyright_info = '''
25 yoweb-scrape is part of ypp-sc-tools  Copyright (C) 2009 Ian Jackson
26 This program comes with ABSOLUTELY NO WARRANTY; this is free software,
27 and you are welcome to redistribute it under certain conditions.
28 For details, read the top of the yoweb-scrape file.
29 '''
30
31 #---------- setup ----------
32
33 import signal
34 signal.signal(signal.SIGINT, signal.SIG_DFL)
35
36 import os
37 import time
38 import urllib
39 import urllib2
40 import errno
41 import sys
42 import re as regexp
43 import random
44 import curses
45 import termios
46 from optparse import OptionParser
47 from StringIO import StringIO
48
49 from BeautifulSoup import BeautifulSoup
50
51 opts = None
52
53 #---------- YPP parameters and arrays ----------
54
55 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
56         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
57         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
58         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
59
60 core_duty_puzzles = [
61                 'Gunning',
62                 ['Sailing','Rigging'],
63                 'Bilging',
64                 'Carpentry',
65                 ]
66
67 duty_puzzles = ([ 'Navigating', 'Battle Navigation' ] +
68                 core_duty_puzzles +
69                 [ 'Treasure Haul' ])
70
71 standingvals = ('Able/Distinguished/Respected/Master'+
72                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
73 standing_limit = len(standingvals)
74
75 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
76
77 max_pirate_namelen = 12
78
79
80 #---------- general utilities ----------
81
82 def debug(m):
83         if opts.debug > 0:
84                 print >>opts.debug_file, m
85
86 def debug_flush():
87         if opts.debug > 0:
88                 opts.debug_file.flush() 
89
90 def sleep(seconds):
91         debug_flush()
92         time.sleep(seconds)
93
94 def format_time_interval(ti):
95         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
96         if ti < 7200: return '%2dm' % (ti / 60)
97         if ti < 86400: return '%dh' % (ti / 3600)
98         return '%dd' % (ti / 86400)
99
100 #---------- caching and rate-limiting data fetcher ----------
101
102 class Fetcher:
103         def __init__(self, ocean, cachedir):
104                 debug('Fetcher init %s' % cachedir)
105                 self.ocean = ocean
106                 self.cachedir = cachedir
107                 try: os.mkdir(cachedir)
108                 except (OSError,IOError), oe:
109                         if oe.errno != errno.EEXIST: raise
110                 self._cache_scan(time.time())
111
112         def default_ocean(self, ocean='ice'):
113                 if self.ocean is None:
114                         self.ocean = ocean
115
116         def _cache_scan(self, now):
117                 # returns list of ages, unsorted
118                 ages = []
119                 debug('Fetcher   scan_cache')
120                 for leaf in os.listdir(self.cachedir):
121                         if not leaf.startswith('#'): continue
122                         path = self.cachedir + '/' + leaf
123                         try: s = os.stat(path)
124                         except (OSError,IOError), oe:
125                                 if oe.errno != errno.ENOENT: raise
126                                 continue
127                         age = now - s.st_mtime
128                         if age > opts.expire_age:
129                                 debug('Fetcher    expire %d %s' % (age, path))
130                                 try: os.remove(path)
131                                 except (OSError,IOError), oe:
132                                         if oe.errno != errno.ENOENT: raise
133                                 continue
134                         ages.append(age)
135                 return ages
136
137         def need_wait(self, now, imaginary=[]):
138                 ages = self._cache_scan(now)
139                 ages += imaginary
140                 ages.sort()
141                 debug('Fetcher   ages ' + `ages`)
142                 min_age = 1
143                 need_wait = 0
144                 for age in ages:
145                         if age < min_age and age < 300:
146                                 debug('Fetcher   morewait min=%d age=%d' %
147                                         (min_age, age))
148                                 need_wait = max(need_wait, min_age - age)
149                         min_age += 3
150                         min_age *= 1.25
151                 return need_wait
152
153         def _rate_limit_cache_clean(self, now):
154                 need_wait = self.need_wait(now)
155                 if need_wait > 0:
156                         debug('Fetcher   wait %d' % need_wait)
157                         sleep(need_wait)
158
159         def fetch(self, url, max_age):
160                 debug('Fetcher fetch %s' % url)
161                 cache_corename = urllib.quote_plus(url)
162                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
163                 try: f = file(cache_item, 'r')
164                 except (OSError,IOError), oe:
165                         if oe.errno != errno.ENOENT: raise
166                         f = None
167                 now = time.time()
168                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
169                 if f is not None:
170                         s = os.fstat(f.fileno())
171                         age = now - s.st_mtime
172                         if age > max_age:
173                                 debug('Fetcher  stale %d < %d'% (max_age, age))
174                                 f = None
175                 if f is not None:
176                         data = f.read()
177                         f.close()
178                         debug('Fetcher  cached %d > %d' % (max_age, age))
179                         return data
180
181                 debug('Fetcher  fetch')
182                 self._rate_limit_cache_clean(now)
183
184                 stream = urllib2.urlopen(url)
185                 data = stream.read()
186                 cache_tmp = "%s/#%s~%d#" % (
187                         self.cachedir, cache_corename, os.getpid())
188                 f = file(cache_tmp, 'w')
189                 f.write(data)
190                 f.close()
191                 os.rename(cache_tmp, cache_item)
192                 debug('Fetcher  stored')
193                 return data
194
195         def yoweb(self, kind, tail, max_age):
196                 self.default_ocean()
197                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
198                         self.ocean, kind, tail)
199                 return self.fetch(url, max_age)
200
201 #---------- logging assistance for troubled screenscrapers ----------
202
203 class SoupLog:
204         def __init__(self):
205                 self.msgs = [ ]
206         def msg(self, m):
207                 self.msgs.append(m)
208         def soupm(self, obj, m):
209                 self.msg(m + '; in ' + `obj`)
210         def needs_msgs(self, child_souplog):
211                 self.msgs += child_souplog.msgs
212                 child_souplog.msgs = [ ]
213
214 def soup_text(obj):
215         str = ''.join(obj.findAll(text=True))
216         return str.strip()
217
218 class SomethingSoupInfo(SoupLog):
219         def __init__(self, kind, tail, max_age):
220                 SoupLog.__init__(self)
221                 html = fetcher.yoweb(kind, tail, max_age)
222                 self._soup = BeautifulSoup(html,
223                         convertEntities=BeautifulSoup.HTML_ENTITIES
224                         )
225
226 #---------- scraper for pirate pages ----------
227
228 class PirateInfo(SomethingSoupInfo):
229         # Public data members:
230         #  pi.standings = { 'Treasure Haul': 'Able' ... }
231         #  pi.name = name
232         #  pi.crew = (id, name)
233         #  pi.flag = (id, name)
234         #  pi.msgs = [ 'message describing problem with scrape' ]
235                 
236         def __init__(self, pirate, max_age=300):
237                 SomethingSoupInfo.__init__(self,
238                         'pirate.wm?target=', pirate, max_age)
239                 self.name = pirate
240                 self._find_standings()
241                 self.crew = self._find_crewflag('crew',
242                         '^/yoweb/crew/info\\.wm')
243                 self.flag = self._find_crewflag('flag',
244                         '^/yoweb/flag/info\\.wm')
245
246         def _find_standings(self):
247                 imgs = self._soup.findAll('img',
248                         src=regexp.compile('/yoweb/images/stat.*'))
249                 re = regexp.compile(
250 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
251                         )
252                 standings = { }
253
254                 for skill in puzzles:
255                         standings[skill] = [ ]
256
257                 skl = SoupLog()
258
259                 for img in imgs:
260                         try: puzzle = img['alt']
261                         except KeyError: continue
262
263                         if not puzzle in puzzles:
264                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
265                                 continue
266                         key = img.findParent('td')
267                         if key is None:
268                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
269                                 continue
270                         valelem = key.findNextSibling('td')
271                         if valelem is None:
272                                 skl.soupm(key, 'puzzle missing sibling "%s"'
273                                         % puzzle)
274                                 continue
275                         valstr = soup_text(valelem)
276                         match = re.match(valstr)
277                         if match is None:
278                                 skl.soupm(key, ('puzzle "%s" unparseable'+
279                                         ' standing "%s"') % (puzzle, valstr))
280                                 continue
281                         standing = match.group(match.lastindex)
282                         standings[puzzle].append(standing)
283
284                 self.standings = { }
285
286                 for puzzle in puzzles:
287                         sl = standings[puzzle]
288                         if len(sl) > 1:
289                                 skl.msg('puzzle "%s" multiple standings %s' %
290                                                 (puzzle, `sl`))
291                                 continue
292                         if not sl:
293                                 skl.msg('puzzle "%s" no standing found' % puzzle)
294                                 continue
295                         standing = sl[0]
296                         for i in range(0, standing_limit):
297                                 if standing == standingvals[i]:
298                                         self.standings[puzzle] = i
299                         if not puzzle in self.standings:
300                                 skl.msg('puzzle "%s" unknown standing "%s"' %
301                                         (puzzle, standing))
302
303                 all_standings_ok = True
304                 for puzzle in puzzles:
305                         if not puzzle in self.standings:
306                                 self.needs_msgs(skl)
307
308         def _find_crewflag(self, cf, yoweb_re):
309                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
310                 if len(things) != 1:
311                         self.msg('zero or several %s id references found' % cf)
312                         return None
313                 thing = things[0]
314                 id_re = '\\b%sid\\=(\\w+)$' % cf
315                 id_haystack = thing['href']
316                 match = regexp.compile(id_re).search(id_haystack)
317                 if match is None:
318                         self.soupm(thing, ('incomprehensible %s id ref'+
319                                 ' (%s in %s)') % (cf, id_re, id_haystack))
320                         return None
321                 name = soup_text(thing)
322                 return (match.group(1), name)
323
324         def __str__(self):
325                 return `(self.crew, self.flag, self.standings, self.msgs)`
326
327 #---------- scraper for crew pages ----------
328
329 class CrewInfo(SomethingSoupInfo):
330         # Public data members:
331         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
332         #              ('Senior Officer', [...]),
333         #               ... ]
334         #  pi.msgs = [ 'message describing problem with scrape' ]
335
336         def __init__(self, crewid, max_age=300):
337                 SomethingSoupInfo.__init__(self,
338                         'crew/info.wm?crewid=', crewid, max_age)
339                 self._find_crew()
340
341         def _find_crew(self):
342                 self.crew = []
343                 capts = self._soup.findAll('img',
344                         src='/yoweb/images/crew-captain.png')
345                 if len(capts) != 1:
346                         self.msg('crew members: no. of captain images != 1')
347                         return
348                 tbl = capts[0]
349                 while not tbl.find('a', href=pirate_ref_re):
350                         tbl = tbl.findParent('table')
351                         if not tbl:
352                                 self.msg('crew members: cannot find table')
353                                 return
354                 current_rank_crew = None
355                 crew_rank_re = regexp.compile('/yoweb/images/crew')
356                 for row in tbl.contents:
357                         # findAll(recurse=False)
358                         if isinstance(row,basestring):
359                                 continue
360
361                         is_rank = row.find('img', attrs={'src': crew_rank_re})
362                         if is_rank:
363                                 rank = soup_text(row)
364                                 current_rank_crew = []
365                                 self.crew.append((rank, current_rank_crew))
366                                 continue
367                         for cell in row.findAll('a', href=pirate_ref_re):
368                                 if current_rank_crew is None:
369                                         self.soupm(cell, 'crew members: crew'
370                                                 ' before rank')
371                                         continue
372                                 current_rank_crew.append(soup_text(cell))
373
374         def __str__(self):
375                 return `(self.crew, self.msgs)`
376
377 #---------- pretty-printer for tables of pirate puzzle standings ----------
378
379 class StandingsTable:
380         def __init__(self, f, use_puzzles=None, col_width=6, gap_every=5):
381                 if use_puzzles is None:
382                         if opts.ship_duty:
383                                 use_puzzles=duty_puzzles
384                         else:
385                                 use_puzzles=puzzles
386                 self._puzzles = use_puzzles
387                 self.f = f
388                 self._cw = col_width-1
389                 self._gap_every = gap_every
390                 self._linecount = 0
391                 self._o = f.write
392
393         def _nl(self): self._o('\n')
394
395         def _pline(self, pirate, puzstrs, extra):
396                 if (self._linecount > 0
397                     and self._gap_every is not None
398                     and not (self._linecount % self._gap_every)):
399                         self._nl()
400                 self._o(' %-*s' % (max(max_pirate_namelen, 14), pirate))
401                 for v in puzstrs:
402                         self._o(' %-*.*s' % (self._cw,self._cw, v))
403                 if extra:
404                         self._o(' ' + extra)
405                 self._nl()
406                 self._linecount += 1
407
408         def _puzstr(self, pi, puzzle):
409                 if not isinstance(puzzle,list): puzzle = [puzzle]
410                 try: standing = max([pi.standings[p] for p in puzzle])
411                 except KeyError: return '?'
412                 if not standing: return ''
413                 s = ''
414                 if self._cw > 4:
415                         c1 = standingvals[standing][0]
416                         if standing < 3: c1 = c1.lower() # 3 = Master
417                         s += `standing`
418                 if self._cw > 5:
419                         s += ' '
420                 s += '*' * (standing / 2)
421                 s += '+' * (standing % 2)
422                 return s
423
424         def headings(self, lhs='', rhs=None):
425                 def puzn_redact(name):
426                         if isinstance(name,list):
427                                 return '/'.join(
428                                         ["%.*s" % (self._cw/2, puzn_redact(n))
429                                          for n in name])
430                         spc = name.find(' ')
431                         if spc < 0: return name
432                         return name[0:min(4,spc)] + name[spc+1:]
433                 self._linecount = -2
434                 self._pline(lhs, map(puzn_redact, self._puzzles), rhs)
435                 self._linecount = 0
436         def literalline(self, line):
437                 self._o(line)
438                 self._nl()
439                 self._linecount = 0
440         def pirate_dummy(self, name, standingstring, extra=None):
441                 self._pline(name, standingstring * len(self._puzzles), extra)
442         def pirate(self, pi, extra=None):
443                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
444                 self._pline(pi.name, puzstrs, extra)
445
446
447 #---------- chat log parser ----------
448
449 class PirateAboard:
450         # This is essentially a transparent, dumb, data class.
451         #  pa.v
452         #  pa.name
453         #  pa.last_time
454         #  pa.last_event
455         #  pa.gunner
456         #  pa.last_chat_time
457         #  pa.last_chat_chan
458         #  pa.pi
459
460         def __init__(pa, pn, v, time, event):
461                 pa.name = pn
462                 pa.v = v
463                 pa.last_time = time
464                 pa.last_event = event
465                 pa.last_chat_time = None
466                 pa.last_chat_chan = None
467                 pa.gunner = False
468                 pa.pi = None
469
470         def pirate_info(pa):
471                 now = time.time()
472                 if pa.pi:
473                         age = now - pa.pi_fetched
474                         guide = random.randint(120,240)
475                         if age <= guide:
476                                 return pa.pi
477                         debug('PirateAboard refresh %d > %d  %s' % (
478                                 age, guide, pa.name))
479                         imaginary = [2,6]
480                 else:
481                         imaginary = [1]
482                 wait = fetcher.need_wait(now, imaginary)
483                 if wait:
484                         debug('PirateAboard fetcher not ready %d' % wait)
485                         return pa.pi
486                 pa.pi = PirateInfo(pa.name, 600)
487                 pa.pi_fetched = now
488                 return pa.pi
489
490 class ChatLogTracker:
491         # This is quite complex so we make it opaque.  Use the
492         # official invokers, accessors etc.
493
494         def __init__(self, myself_pi, logfn):
495                 self._pl = {}   # self._pl['Pirate'] =
496                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
497                                 # self._vl['Vessel']['#lastinfo']
498                                 # self._vl['Vessel']['#name']
499                                 # self._v = self._vl[self._vessel]
500                 self._date = None
501                 self._myself = myself_pi
502                 self._f = file(logfn)
503                 self._lbuf = ''
504                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
505                 self._disembark_myself()
506                 self._need_redisplay = False
507                 self._lastvessel = None
508
509         def _disembark_myself(self):
510                 self._v = None
511                 self._vessel = None
512                 self.force_redisplay()
513
514         def force_redisplay(self):
515                 self._need_redisplay = True
516
517         def _vessel_updated(self, v, timestamp):
518                 v['#lastinfo'] = timestamp
519                 self.force_redisplay()
520
521         def _onboard_event(self,v,timestamp,pirate,event):
522                 pa = self._pl.get(pirate, None)
523                 if pa is not None and pa.v is v:
524                         pa.last_time = timestamp
525                         pa.last_event = event
526                 else:
527                         if pa is not None: del pa.v[pirate]
528                         pa = PirateAboard(pirate, v, timestamp, event)
529                         self._pl[pirate] = pa
530                         v[pirate] = pa
531                 self._vessel_updated(v, timestamp)
532                 return pa
533
534         def _trash_vessel(self, v):
535                 for pn in v:
536                         if pn.startswith('#'): continue
537                         del self._pl[pn]
538                 vn = v['#name']
539                 del self._vl[vn]
540                 if v is self._v: self._disembark_myself()
541                 self.force_redisplay()
542
543         def _vessel_stale(self, v, timestamp):
544                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
545
546         def _vessel_check_expire(self, v, timestamp):
547                 if not self._vessel_stale(v, timestamp):
548                         return v
549                 self._debug_line_disposition(timestamp,'',
550                         'stale-reset ' + v['#name'])
551                 self._trash_vessel(v)
552                 return None
553
554         def expire_garbage(self, timestamp):
555                 for v in self._vl.values():
556                         self._vessel_check_expire(v, timestamp)
557
558         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
559                 v = self._vl.get(vn, None)
560                 if v is not None:
561                         v = self._vessel_check_expire(v, timestamp)
562                 if v is not None:
563                         dml.append('found')
564                         return v
565                 if not create:
566                         dml.append('no')
567                 dml.append('new')
568                 self._vl[vn] = v = { '#name': vn }
569                 self._vessel_updated(v, timestamp)
570                 return v
571
572         def _find_matching_vessel(self, pattern, timestamp, cmdr,
573                                         dml=[], create=False):
574                 # use when a commander pirate `cmdr' specified a vessel
575                 #  by name `pattern' (either may be None)
576                 # if create is true, will create the vessel
577                 #  record if an exact name is specified
578
579                 if (pattern is not None and
580                     not '*' in pattern
581                     and len(pattern.split(' ')) == 2):
582                         vn = pattern.title()
583                         dml.append('exact')
584                         return self._vessel_lookup(
585                                 vn, timestamp, dml=dml, create=create)
586
587                 if pattern is None:
588                         pattern_check = lambda vn: True
589                 else:
590                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.+')
591                         pattern_check = regexp.compile(re, regexp.I).match
592
593                 tries = []
594
595                 cmdr_pa = self._pl.get(cmdr, None)
596                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
597
598                 tries.append((self._v, 'here'))
599                 tried_vns = []
600
601                 for (v, dm) in tries:
602                         if v is None: dml.append(dm+'?'); continue
603                         
604                         vn = v['#name']
605                         if not pattern_check(vn):
606                                 tried_vns.append(vn)
607                                 dml.append(dm+'#')
608                                 continue
609
610                         dml.append(dm+'!')
611                         return v
612
613                 if pattern is not None and '*' in pattern:
614                         search = [
615                                 (vn,v)
616                                 for (vn,v) in self._vl.iteritems()
617                                 if not self._vessel_stale(v, timestamp)
618                                 if pattern_check(vn)
619                                 ]
620                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
621                         #       re,
622                         #       '/'.join(tried_vns),
623                         #       '/'.join([vn for (vn,v) in search])))
624
625                         if len(search)==1:
626                                 dml.append('one')
627                                 return search[0][1]
628                         elif search:
629                                 dml.append('many')
630                         else:
631                                 dml.append('none')
632
633         def _debug_line_disposition(self,timestamp,l,m):
634                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
635
636         def chatline(self,l):
637                 rm = lambda re: regexp.match(re,l)
638                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
639                 timestamp = None
640
641                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
642                 if m:
643                         self._date = [int(x) for x in m.groups()]
644                         self._previous_timestamp = None
645                         return d('date '+`self._date`)
646
647                 if self._date is None:
648                         return d('date unset')
649
650                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
651                 if not m:
652                         return d('no timestamp')
653
654                 while True:
655                         time_tuple = (self._date +
656                                       [int(x) for x in m.groups()] +
657                                       [-1,-1,-1])
658                         timestamp = time.mktime(time_tuple)
659                         if timestamp >= self._previous_timestamp: break
660                         self._date[2] += 1
661                         self._debug_line_disposition(timestamp,'',
662                                 'new date '+`self._date`)
663
664                 self._previous_timestamp = timestamp
665
666                 l = l[l.find(' ')+1:]
667
668                 def ob_x(pirate,event):
669                         return self._onboard_event(
670                                         self._v, timestamp, pirate, event)
671                 def ob1(did): ob_x(m.group(1), did); return d(did)
672                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
673
674                 def disembark(v, timestamp, pirate, event):
675                         self._onboard_event(
676                                         v, timestamp, pirate, 'leaving '+event)
677                         del v[pirate]
678                         del self._pl[pirate]
679
680                 def disembark_me(why):
681                         self._disembark_myself()
682                         return d('disembark-me '+why)
683
684                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
685                 if m:
686                         dm = ['boarding']
687                         pn = self._myself.name
688                         vn = m.group(1)
689                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
690                         self._lastvessel = self._vessel = vn
691                         self._v = v
692                         ob_x(pn, 'we boarded')
693                         self.expire_garbage(timestamp)
694                         return d(' '.join(dm))
695
696                 if self._v is None:
697                         return d('no vessel')
698
699                 m = rm('(\\w+) has come aboard\\.$')
700                 if m: return ob1('boarded');
701
702                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
703                 if m:
704                         (who,what) = m.groups()
705                         pa = ob_x(who,'ord '+what)
706                         if what == 'Gunning':
707                                 pa.gunner = True
708                         return d('duty order')
709
710                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
711                 if m: oba('stopped'); return d("end")
712
713                 def chat_core(speaker, chan):
714                         try: pa = self._pl[speaker]
715                         except KeyError: return 'mystery'
716                         if pa.v is not self._v: return 'elsewhere'
717                         pa.last_chat_time = timestamp
718                         pa.last_chat_chan = chan
719                         self.force_redisplay()
720                         return 'here'
721
722                 def chat(chan):
723                         speaker = m.group(1)
724                         dm = chat_core(speaker, chan)
725                         return d('chat %s %s' % (chan, dm))
726
727                 def chat_metacmd(chan):
728                         (cmdr, metacmd) = m.groups()
729                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
730                         m2 = regexp.match(
731                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
732                                 metacmd)
733                         if not m2: return chat(chan)
734
735                         (cmd, pattern, targets) = m2.groups()
736                         dml = ['cmd', chan, cmd]
737
738                         if cmd == 'a': each = self._onboard_event
739                         else: each = disembark
740
741                         if cmdr == self._myself.name:
742                                 dml.append('self')
743                                 how = 'cmd: %s' % cmd
744                         else:
745                                 dml.append('other')
746                                 how = 'cmd: %s %s' % (cmd,cmdr)
747
748                         v = self._find_matching_vessel(
749                                 pattern, timestamp, cmdr, dml, create=True)
750
751                         if v is not None:
752                                 targets = targets.strip().split(' ')
753                                 dml.append(`len(targets)`)
754                                 for target in targets:
755                                         each(v, timestamp, target.title(), how)
756                                 self._vessel_updated(v, timestamp)
757
758                         dm = ' '.join(dml)
759                         chat_core(cmdr, 'cmd '+chan)
760                         return d(dm)
761
762                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
763                 if m: return ob1('general order');
764
765                 m = rm('(\\w+) says, "')
766                 if m: return chat('public')
767
768                 m = rm('(\\w+) tells ye, "')
769                 if m: return chat('private')
770
771                 m = rm('Ye told (\\w+), "(.*)"$')
772                 if m: return chat_metacmd('private')
773
774                 m = rm('(\\w+) flag officer chats, "')
775                 if m: return chat('flag officer')
776
777                 m = rm('(\\w+) officer chats, "(.*)"$')
778                 if m: return chat_metacmd('officer')
779
780                 m = rm('Ye accepted the offer to job with ')
781                 if m: return disembark_me('jobbing')
782
783                 m = rm('Ye hop on the ferry and are whisked away ')
784                 if m: return disembark_me('ferry')
785
786                 m = rm('Whisking away to yer home on the magical winds')
787                 if m: return disembark_me('home')
788
789                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
790                 if m:
791                         pl = m.group(1).split(', ')
792                         if not self._myself.name in pl:
793                                 return d('lost melee')
794                         for pn in pl:
795                                 if ' ' in pn: continue
796                                 ob_x(pn,'won melee')
797                         return d('won melee')
798
799                 m = rm('(\\w+) is eliminated\\!')
800                 if m: return ob1('eliminated in fray');
801
802                 m = rm('(\\w+) has driven \w+ from the ship\\!')
803                 if m: return ob1('boarder repelled');
804
805                 m = rm('\w+ has bested (\\w+), and turns'+
806                         ' to the rest of the ship\\.')
807                 if m: return ob1('boarder unrepelled');
808
809                 m = rm('(\\w+) has left the vessel\.')
810                 if m:
811                         pirate = m.group(1)
812                         disembark(self._v, timestamp, pirate, 'disembarked')
813                         return d('disembarked')
814
815                 return d('not-matched')
816
817         def _str_vessel(self, vn, v):
818                 s = ' vessel %s\n' % vn
819                 s += ' '*20 + "%-*s   %13s\n" % (
820                                 max_pirate_namelen, '#lastinfo',
821                                 v['#lastinfo'])
822                 assert v['#name'] == vn
823                 for pn in sorted(v.keys()):
824                         if pn.startswith('#'): continue
825                         pa = v[pn]
826                         assert pa.v == v
827                         assert self._pl[pn] == pa
828                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
829                                 (' ','G')[pa.gunner],
830                                 max_pirate_namelen, pn,
831                                 pa.last_time, pa.last_event,
832                                 pa.last_chat_time, pa.last_chat_chan)
833                 return s
834
835         def __str__(self):
836                 s = '''<ChatLogTracker
837  myself %s
838  vessel %s
839 '''                     % (self._myself.name, self._vessel)
840                 assert ((self._v is None and self._vessel is None) or
841                         (self._v is self._vl[self._vessel]))
842                 if self._vessel is not None:
843                         s += self._str_vessel(self._vessel, self._v)
844                 for vn in sorted(self._vl.keys()):
845                         if vn == self._vessel: continue
846                         s += self._str_vessel(vn, self._vl[vn])
847                 for p in self._pl:
848                         pa = self._pl[p]
849                         assert pa.v[p] is pa
850                         assert pa.v in self._vl.values()
851                 s += '>\n'
852                 return s
853
854         def catchup(self, progress=None):
855                 while True:
856                         more = self._f.readline()
857                         if not more: break
858
859                         self._progress[0] += len(more)
860                         if progress: progress.progress(*self._progress)
861
862                         self._lbuf += more
863                         if self._lbuf.endswith('\n'):
864                                 self.chatline(self._lbuf.rstrip())
865                                 self._lbuf = ''
866                                 if opts.debug >= 2:
867                                         debug(self.__str__())
868                 if progress: progress.caughtup()
869
870         def changed(self):
871                 rv = self._need_redisplay
872                 self._need_redisplay = False
873                 return rv
874         def myname(self):
875                 # returns our pirate name
876                 return self._myself.name
877         def vesselname(self):
878                 # returns the vessel name we're aboard or None
879                 return self._vessel
880         def lastvesselname(self):
881                 # returns the last vessel name we were aboard or None
882                 return self._lastvessel
883         def aboard(self, vesselname=True):
884                 # returns a list of PirateAboard the vessel
885                 #  sorted by pirate name
886                 #  you can pass this None and you'll get []
887                 #  or True for the current vessel (which is the default)
888                 #  the returned value is a fresh list of persistent
889                 #  PirateAboard objects
890                 if vesselname is True: v = self._v
891                 else: v = self._vl.get(vesselname.title())
892                 if v is None: return []
893                 return [ v[pn]
894                          for pn in sorted(v.keys())
895                          if not pn.startswith('#') ]
896
897 #---------- implementations of actual operation modes ----------
898
899 def do_pirate(pirates, bu):
900         print '{'
901         for pirate in pirates:
902                 info = PirateInfo(pirate)
903                 print '%s: %s,' % (`pirate`, info)
904         print '}'
905
906 def prep_crew_of(args, bu, max_age=300):
907         if len(args) != 1: bu('crew-of takes one pirate name')
908         pi = PirateInfo(args[0], max_age)
909         if pi.crew is None: return None
910         return CrewInfo(pi.crew[0], max_age)
911
912 def do_crew_of(args, bu):
913         ci = prep_crew_of(args, bu)
914         print ci
915
916 def do_standings_crew_of(args, bu):
917         ci = prep_crew_of(args, bu, 60)
918         tab = StandingsTable(sys.stdout)
919         tab.headings()
920         for (rank, members) in ci.crew:
921                 if not members: continue
922                 tab.literalline('')
923                 tab.literalline('%s:' % rank)
924                 for p in members:
925                         pi = PirateInfo(p, random.randint(900,1800))
926                         tab.pirate(pi)
927
928 class ProgressPrintPercentage:
929         def __init__(self, f=sys.stdout):
930                 self._f = f
931         def progress_string(self,done,total):
932                 return "scan chat logs %3d%%\r" % ((done*100) / total)
933         def progress(self,*a):
934                 self._f.write(self.progress_string(*a))
935                 self._f.flush()
936         def show_init(self, pirate, ocean):
937                 print >>self._f, 'Starting up, %s on the %s ocean' % (
938                         pirate, ocean)
939         def caughtup(self):
940                 self._f.write('                   \r')
941                 self._f.flush()
942
943 #----- modes which use the chat log parser are quite complex -----
944
945 def prep_chat_log(args, bu,
946                 progress=ProgressPrintPercentage(),
947                 max_myself_age=3600):
948         if len(args) != 1: bu('this action takes only chat log filename')
949         logfn = args[0]
950         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
951         match = regexp.match(logfn_re, logfn)
952         if not match: bu('chat log filename is not in expected format')
953         (pirate, ocean) = match.groups()
954         fetcher.default_ocean(ocean)
955
956         progress.show_init(pirate, fetcher.ocean)
957         myself = PirateInfo(pirate,max_myself_age)
958         track = ChatLogTracker(myself, logfn)
959
960         opts.debug -= 2
961         track.catchup(progress)
962         opts.debug += 2
963
964         track.force_redisplay()
965
966         return (myself, track)
967
968 def do_track_chat_log(args, bu):
969         (myself, track) = prep_chat_log(args, bu)
970         while True:
971                 track.catchup()
972                 if track.changed():
973                         print track
974                 sleep(1)
975
976 #----- ship management aid -----
977
978 class Display_dumb(ProgressPrintPercentage):
979         def __init__(self):
980                 ProgressPrintPercentage.__init__(self)
981         def show(self, s):
982                 print '\n\n', s;
983         def realstart(self):
984                 pass
985
986 class Display_overwrite(ProgressPrintPercentage):
987         def __init__(self):
988                 ProgressPrintPercentage.__init__(self)
989
990                 null = file('/dev/null','w')
991                 curses.setupterm(fd=null.fileno())
992
993                 self._clear = curses.tigetstr('clear')
994                 if not self._clear:
995                         self._debug('missing clear!')
996                         self.show = Display_dumb.show
997                         return
998
999                 self._t = {'el':'', 'ed':''}
1000                 if not self._init_sophisticated():
1001                         for k in self._t.keys(): self._t[k] = ''
1002                         self._t['ho'] = self._clear
1003
1004         def _debug(self,m): debug('display overwrite: '+m)
1005
1006         def _init_sophisticated(self):
1007                 for k in self._t.keys():
1008                         s = curses.tigetstr(k)
1009                         self._t[k] = s
1010                 self._t['ho'] = curses.tigetstr('ho')
1011                 if not self._t['ho']:
1012                         cup = curses.tigetstr('cup')
1013                         self._t['ho'] = curses.tparm(cup,0,0)
1014                 missing = [k for k in self._t.keys() if not self._t[k]]
1015                 if missing:
1016                         self.debug('missing '+(' '.join(missing)))
1017                         return 0
1018                 return 1
1019
1020         def show(self, s):
1021                 w = sys.stdout.write
1022                 def wti(k): w(self._t[k])
1023
1024                 wti('ho')
1025                 nl = ''
1026                 for l in s.rstrip().split('\n'):
1027                         w(nl)
1028                         w(l)
1029                         wti('el')
1030                         nl = '\r\n'
1031                 wti('ed')
1032                 w(' ')
1033                 sys.stdout.flush()
1034
1035         def realstart(self):
1036                 sys.stdout.write(self._clear)
1037                 sys.stdout.flush()
1038                         
1039
1040 def do_ship_aid(args, bu):
1041         if opts.ship_duty is None: opts.ship_duty = True
1042
1043         displayer = globals()['Display_'+opts.display]()
1044
1045         (myself, track) = prep_chat_log(args, bu, progress=displayer)
1046
1047         displayer.realstart()
1048
1049         if os.isatty(0): kr_create = KeystrokeReader
1050         else: kr_create = DummyKeystrokeReader
1051
1052         try:
1053                 kreader = kr_create(0, 10)
1054                 ship_aid_core(myself, track, displayer, kreader)
1055         finally:
1056                 kreader.stop()
1057                 print '\n'
1058
1059 class KeyBasedSorter:
1060         def compar_key_pa(self, pa):
1061                 pi = pa.pirate_info()
1062                 if pi is None: return None
1063                 return self.compar_key(pi)
1064         def lsort_pa(self, l):
1065                 l.sort(key = self.compar_key_pa)
1066
1067 class NameSorter(KeyBasedSorter):
1068         def compar_key(self, pi): return pi.name
1069         def desc(self): return 'name'
1070
1071 class SkillSorter(NameSorter):
1072         def __init__(self, relevant):
1073                 self._want = frozenset(relevant.split('/'))
1074                 self._avoid = set()
1075                 for p in core_duty_puzzles:
1076                         if isinstance(p,basestring): self._avoid.add(p)
1077                         else: self._avoid |= set(p)
1078                 self._avoid -= self._want
1079                 self._desc = '%s' % relevant
1080         
1081         def desc(self): return self._desc
1082
1083         def compar_key(self, pi):
1084                 best_want = max([
1085                         pi.standings.get(puz,-1)
1086                         for puz in self._want
1087                         ])
1088                 best_avoid = [
1089                         -pi.standings.get(puz,standing_limit)
1090                         for puz in self._avoid
1091                         ]
1092                 best_avoid.sort()
1093                 def negate(x): return -x
1094                 debug('compar_key %s bw=%s ba=%s' % (pi.name, `best_want`,
1095                         `best_avoid`))
1096                 return (-best_want, map(negate, best_avoid), pi.name)
1097
1098 def ship_aid_core(myself, track, displayer, kreader):
1099
1100         def find_vessel():
1101                 vn = track.vesselname()
1102                 if vn: return (vn, " on board the %s" % vn)
1103                 vn = track.lastvesselname()
1104                 if vn: return (vn, " ashore from the %s" % vn)
1105                 return (None, " not on a vessel")
1106
1107         def timeevent(t,e):
1108                 if t is None: return ' ' * 22
1109                 return " %-4s %-16s" % (format_time_interval(now - t),e)
1110
1111         displayer.show(track.myname() + find_vessel()[1] + '...')
1112
1113         rotate_nya = '/-\\'
1114
1115         sort = NameSorter()
1116
1117         while True:
1118                 track.catchup()
1119                 now = time.time()
1120
1121                 (vn, s) = find_vessel()
1122                 s = track.myname() + s
1123                 s += " at %s" % time.strftime("%Y-%m-%d %H:%M:%S")
1124                 s += kreader.info()
1125                 s += '\n'
1126
1127                 aboard = track.aboard(vn)
1128                 sort.lsort_pa(aboard)
1129
1130                 tbl_s = StringIO()
1131                 tbl = StandingsTable(tbl_s)
1132
1133                 if track.vesselname(): howmany = ' %d aboard' % len(aboard)
1134                 else: howmany = ''
1135
1136                 tbl.headings(howmany, '  sorted by '+sort.desc())
1137
1138                 for pa in aboard:
1139                         pi = pa.pirate_info()
1140
1141                         xs = ''
1142                         if pa.gunner: xs += 'G '
1143                         else: xs += '  '
1144                         xs += timeevent(pa.last_time, pa.last_event)
1145                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
1146
1147                         if pi is None:
1148                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1149                         else:
1150                                 tbl.pirate(pi, xs)
1151
1152                 s += tbl_s.getvalue()
1153                 displayer.show(s)
1154                 tbl_s.close()
1155
1156                 k = kreader.getch()
1157                 if k is None:
1158                         rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1159                         continue
1160
1161                 if k == 'q': break
1162                 elif k == 'g': sort = SkillSorter('Gunning')
1163                 elif k == 'c': sort = SkillSorter('Carpentry')
1164                 elif k == 's': sort = SkillSorter('Sailing/Rigging')
1165                 elif k == 'b': sort = SkillSorter('Bilging')
1166                 elif k == 'n': sort = SkillSorter('Navigating')
1167                 elif k == 'd': sort = SkillSorter('Battle Navigation')
1168                 elif k == 't': sort = SkillSorter('Treasure Haul')
1169                 elif k == 'a': sort = NameSorter()
1170                 else: pass # unknown key command
1171
1172 #---------- individual keystroke input ----------
1173
1174 class DummyKeystrokeReader:
1175         def __init__(self,fd,timeout_dummy): pass
1176         def stop(self): pass
1177         def getch(self): sleep(1); return None
1178         def info(self): return ' [noninteractive]'
1179
1180 class KeystrokeReader(DummyKeystrokeReader):
1181         def __init__(self, fd, timeout_decisec=0):
1182                 self._fd = fd
1183                 self._saved = termios.tcgetattr(fd)
1184                 a = termios.tcgetattr(fd)
1185                 a[3] &= ~(termios.ECHO | termios.ECHONL |
1186                           termios.ICANON | termios.IEXTEN)
1187                 a[6][termios.VMIN] = 0
1188                 a[6][termios.VTIME] = timeout_decisec
1189                 termios.tcsetattr(fd, termios.TCSANOW, a)
1190         def stop(self):
1191                 termios.tcsetattr(self._fd, termios.TCSANOW, self._saved)
1192         def getch(self):
1193                 debug_flush()
1194                 byte = os.read(self._fd, 1)
1195                 if not len(byte): return None
1196                 return byte
1197         def info(self):
1198                 return ''
1199
1200 #---------- main program ----------
1201
1202 def main():
1203         global opts, fetcher
1204
1205         pa = OptionParser(
1206 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1207 actions:
1208  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1209  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1210  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1211  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1212  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1213
1214 display modes (for --display) apply to ship-aid:
1215  --display=dumb       just print new information, scrolling the screen
1216  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top''')
1217         ao = pa.add_option
1218         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1219                 help='select ocean OCEAN')
1220         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1221                 default='~/.yoweb-scrape-cache',
1222                 help='cache yoweb pages in DIR')
1223         ao('-D','--debug', action='count', dest='debug', default=0,
1224                 help='enable debugging output')
1225         ao('--debug-fd', type='int', dest='debug_fd',
1226                 help='write any debugging output to specified fd')
1227         ao('-q','--quiet', action='store_true', dest='quiet',
1228                 help='suppress warning output')
1229         ao('--display', action='store', dest='display',
1230                 type='choice', choices=['dumb','overwrite'],
1231                 help='how to display ship aid')
1232
1233         ao('--ship-duty', action='store_true', dest='ship_duty',
1234                 help='show ship duty station puzzles')
1235         ao('--all-puzzles', action='store_false', dest='ship_duty',
1236                 help='show all puzzles, not just ship duty stations')
1237
1238         ao('--min-cache-reuse', type='int', dest='min_max_age',
1239                 metavar='SECONDS', default=60,
1240                 help='always reuse cache yoweb data if no older than this')
1241
1242         (opts,args) = pa.parse_args()
1243         random.seed()
1244
1245         if len(args) < 1:
1246                 print >>sys.stderr, copyright_info
1247                 pa.error('need a mode argument')
1248
1249         if opts.debug_fd is not None:
1250                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1251         else:
1252                 opts.debug_file = sys.stdout
1253
1254         mode = args[0]
1255         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1256         try: mode_fn = globals()[mode_fn_name]
1257         except KeyError: pa.error('unknown mode "%s"' % mode)
1258
1259         # fixed parameters
1260         opts.expire_age = max(3600, opts.min_max_age)
1261
1262         opts.ship_reboard_clearout = 3600
1263
1264         if opts.cache_dir.startswith('~/'):
1265                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1266
1267         if opts.display is None:
1268                 if ((opts.debug > 0 and opts.debug_fd is None)
1269                     or not os.isatty(sys.stdout.fileno())):
1270                         opts.display = 'dumb'
1271                 else:
1272                         opts.display = 'overwrite'
1273
1274         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1275
1276         mode_fn(args[1:], pa.error)
1277
1278 main()