chiark / gitweb /
Fix crash bug in ship-aid KeyBasedSorter
[ypp-sc-tools.db-live.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 import termios
18 from optparse import OptionParser
19 from StringIO import StringIO
20
21 from BeautifulSoup import BeautifulSoup
22
23 opts = None
24
25 #---------- YPP parameters and arrays ----------
26
27 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
28         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
29         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
30         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
31
32 core_duty_puzzles = [
33                 'Gunning',
34                 ['Sailing','Rigging'],
35                 'Bilging',
36                 'Carpentry',
37                 ]
38
39 duty_puzzles = ([ 'Navigating', 'Battle Navigation' ] +
40                 core_duty_puzzles +
41                 [ 'Treasure Haul' ])
42
43 standingvals = ('Able/Distinguished/Respected/Master'+
44                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
45 standing_limit = len(standingvals)
46
47 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
48
49 max_pirate_namelen = 12
50
51
52 #---------- general utilities ----------
53
54 def debug(m):
55         if opts.debug > 0:
56                 print >>opts.debug_file, m
57
58 def debug_flush():
59         if opts.debug > 0:
60                 opts.debug_file.flush() 
61
62 def sleep(seconds):
63         debug_flush()
64         time.sleep(seconds)
65
66 def format_time_interval(ti):
67         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
68         if ti < 7200: return '%2dm' % (ti / 60)
69         if ti < 86400: return '%dh' % (ti / 3600)
70         return '%dd' % (ti / 86400)
71
72 #---------- caching and rate-limiting data fetcher ----------
73
74 class Fetcher:
75         def __init__(self, ocean, cachedir):
76                 debug('Fetcher init %s' % cachedir)
77                 self.ocean = ocean
78                 self.cachedir = cachedir
79                 try: os.mkdir(cachedir)
80                 except (OSError,IOError), oe:
81                         if oe.errno != errno.EEXIST: raise
82                 self._cache_scan(time.time())
83
84         def default_ocean(self, ocean='ice'):
85                 if self.ocean is None:
86                         self.ocean = ocean
87
88         def _cache_scan(self, now):
89                 # returns list of ages, unsorted
90                 ages = []
91                 debug('Fetcher   scan_cache')
92                 for leaf in os.listdir(self.cachedir):
93                         if not leaf.startswith('#'): continue
94                         path = self.cachedir + '/' + leaf
95                         try: s = os.stat(path)
96                         except (OSError,IOError), oe:
97                                 if oe.errno != errno.ENOENT: raise
98                                 continue
99                         age = now - s.st_mtime
100                         if age > opts.expire_age:
101                                 debug('Fetcher    expire %d %s' % (age, path))
102                                 try: os.remove(path)
103                                 except (OSError,IOError), oe:
104                                         if oe.errno != errno.ENOENT: raise
105                                 continue
106                         ages.append(age)
107                 return ages
108
109         def need_wait(self, now, imaginary=[]):
110                 ages = self._cache_scan(now)
111                 ages += imaginary
112                 ages.sort()
113                 debug('Fetcher   ages ' + `ages`)
114                 min_age = 1
115                 need_wait = 0
116                 for age in ages:
117                         if age < min_age and age < 300:
118                                 debug('Fetcher   morewait min=%d age=%d' %
119                                         (min_age, age))
120                                 need_wait = max(need_wait, min_age - age)
121                         min_age += 3
122                         min_age *= 1.25
123                 return need_wait
124
125         def _rate_limit_cache_clean(self, now):
126                 need_wait = self.need_wait(now)
127                 if need_wait > 0:
128                         debug('Fetcher   wait %d' % need_wait)
129                         sleep(need_wait)
130
131         def fetch(self, url, max_age):
132                 debug('Fetcher fetch %s' % url)
133                 cache_corename = urllib.quote_plus(url)
134                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
135                 try: f = file(cache_item, 'r')
136                 except (OSError,IOError), oe:
137                         if oe.errno != errno.ENOENT: raise
138                         f = None
139                 now = time.time()
140                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
141                 if f is not None:
142                         s = os.fstat(f.fileno())
143                         age = now - s.st_mtime
144                         if age > max_age:
145                                 debug('Fetcher  stale %d < %d'% (max_age, age))
146                                 f = None
147                 if f is not None:
148                         data = f.read()
149                         f.close()
150                         debug('Fetcher  cached %d > %d' % (max_age, age))
151                         return data
152
153                 debug('Fetcher  fetch')
154                 self._rate_limit_cache_clean(now)
155
156                 stream = urllib2.urlopen(url)
157                 data = stream.read()
158                 cache_tmp = "%s/#%s~%d#" % (
159                         self.cachedir, cache_corename, os.getpid())
160                 f = file(cache_tmp, 'w')
161                 f.write(data)
162                 f.close()
163                 os.rename(cache_tmp, cache_item)
164                 debug('Fetcher  stored')
165                 return data
166
167         def yoweb(self, kind, tail, max_age):
168                 self.default_ocean()
169                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
170                         self.ocean, kind, tail)
171                 return self.fetch(url, max_age)
172
173 #---------- logging assistance for troubled screenscrapers ----------
174
175 class SoupLog:
176         def __init__(self):
177                 self.msgs = [ ]
178         def msg(self, m):
179                 self.msgs.append(m)
180         def soupm(self, obj, m):
181                 self.msg(m + '; in ' + `obj`)
182         def needs_msgs(self, child_souplog):
183                 self.msgs += child_souplog.msgs
184                 child_souplog.msgs = [ ]
185
186 def soup_text(obj):
187         str = ''.join(obj.findAll(text=True))
188         return str.strip()
189
190 class SomethingSoupInfo(SoupLog):
191         def __init__(self, kind, tail, max_age):
192                 SoupLog.__init__(self)
193                 html = fetcher.yoweb(kind, tail, max_age)
194                 self._soup = BeautifulSoup(html,
195                         convertEntities=BeautifulSoup.HTML_ENTITIES
196                         )
197
198 #---------- scraper for pirate pages ----------
199
200 class PirateInfo(SomethingSoupInfo):
201         # Public data members:
202         #  pi.standings = { 'Treasure Haul': 'Able' ... }
203         #  pi.name = name
204         #  pi.crew = (id, name)
205         #  pi.flag = (id, name)
206         #  pi.msgs = [ 'message describing problem with scrape' ]
207                 
208         def __init__(self, pirate, max_age=300):
209                 SomethingSoupInfo.__init__(self,
210                         'pirate.wm?target=', pirate, max_age)
211                 self.name = pirate
212                 self._find_standings()
213                 self.crew = self._find_crewflag('crew',
214                         '^/yoweb/crew/info\\.wm')
215                 self.flag = self._find_crewflag('flag',
216                         '^/yoweb/flag/info\\.wm')
217
218         def _find_standings(self):
219                 imgs = self._soup.findAll('img',
220                         src=regexp.compile('/yoweb/images/stat.*'))
221                 re = regexp.compile(
222 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
223                         )
224                 standings = { }
225
226                 for skill in puzzles:
227                         standings[skill] = [ ]
228
229                 skl = SoupLog()
230
231                 for img in imgs:
232                         try: puzzle = img['alt']
233                         except KeyError: continue
234
235                         if not puzzle in puzzles:
236                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
237                                 continue
238                         key = img.findParent('td')
239                         if key is None:
240                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
241                                 continue
242                         valelem = key.findNextSibling('td')
243                         if valelem is None:
244                                 skl.soupm(key, 'puzzle missing sibling "%s"'
245                                         % puzzle)
246                                 continue
247                         valstr = soup_text(valelem)
248                         match = re.match(valstr)
249                         if match is None:
250                                 skl.soupm(key, ('puzzle "%s" unparseable'+
251                                         ' standing "%s"') % (puzzle, valstr))
252                                 continue
253                         standing = match.group(match.lastindex)
254                         standings[puzzle].append(standing)
255
256                 self.standings = { }
257
258                 for puzzle in puzzles:
259                         sl = standings[puzzle]
260                         if len(sl) > 1:
261                                 skl.msg('puzzle "%s" multiple standings %s' %
262                                                 (puzzle, `sl`))
263                                 continue
264                         if not sl:
265                                 skl.msg('puzzle "%s" no standing found' % puzzle)
266                                 continue
267                         standing = sl[0]
268                         for i in range(0, standing_limit):
269                                 if standing == standingvals[i]:
270                                         self.standings[puzzle] = i
271                         if not puzzle in self.standings:
272                                 skl.msg('puzzle "%s" unknown standing "%s"' %
273                                         (puzzle, standing))
274
275                 all_standings_ok = True
276                 for puzzle in puzzles:
277                         if not puzzle in self.standings:
278                                 self.needs_msgs(skl)
279
280         def _find_crewflag(self, cf, yoweb_re):
281                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
282                 if len(things) != 1:
283                         self.msg('zero or several %s id references found' % cf)
284                         return None
285                 thing = things[0]
286                 id_re = '\\b%sid\\=(\\w+)$' % cf
287                 id_haystack = thing['href']
288                 match = regexp.compile(id_re).search(id_haystack)
289                 if match is None:
290                         self.soupm(thing, ('incomprehensible %s id ref'+
291                                 ' (%s in %s)') % (cf, id_re, id_haystack))
292                         return None
293                 name = soup_text(thing)
294                 return (match.group(1), name)
295
296         def __str__(self):
297                 return `(self.crew, self.flag, self.standings, self.msgs)`
298
299 #---------- scraper for crew pages ----------
300
301 class CrewInfo(SomethingSoupInfo):
302         # Public data members:
303         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
304         #              ('Senior Officer', [...]),
305         #               ... ]
306         #  pi.msgs = [ 'message describing problem with scrape' ]
307
308         def __init__(self, crewid, max_age=300):
309                 SomethingSoupInfo.__init__(self,
310                         'crew/info.wm?crewid=', crewid, max_age)
311                 self._find_crew()
312
313         def _find_crew(self):
314                 self.crew = []
315                 capts = self._soup.findAll('img',
316                         src='/yoweb/images/crew-captain.png')
317                 if len(capts) != 1:
318                         self.msg('crew members: no. of captain images != 1')
319                         return
320                 tbl = capts[0]
321                 while not tbl.find('a', href=pirate_ref_re):
322                         tbl = tbl.findParent('table')
323                         if not tbl:
324                                 self.msg('crew members: cannot find table')
325                                 return
326                 current_rank_crew = None
327                 crew_rank_re = regexp.compile('/yoweb/images/crew')
328                 for row in tbl.contents:
329                         # findAll(recurse=False)
330                         if isinstance(row,basestring):
331                                 continue
332
333                         is_rank = row.find('img', attrs={'src': crew_rank_re})
334                         if is_rank:
335                                 rank = soup_text(row)
336                                 current_rank_crew = []
337                                 self.crew.append((rank, current_rank_crew))
338                                 continue
339                         for cell in row.findAll('a', href=pirate_ref_re):
340                                 if current_rank_crew is None:
341                                         self.soupm(cell, 'crew members: crew'
342                                                 ' before rank')
343                                         continue
344                                 current_rank_crew.append(soup_text(cell))
345
346         def __str__(self):
347                 return `(self.crew, self.msgs)`
348
349 #---------- pretty-printer for tables of pirate puzzle standings ----------
350
351 class StandingsTable:
352         def __init__(self, f, use_puzzles=None, col_width=6, gap_every=5):
353                 if use_puzzles is None:
354                         if opts.ship_duty:
355                                 use_puzzles=duty_puzzles
356                         else:
357                                 use_puzzles=puzzles
358                 self._puzzles = use_puzzles
359                 self.f = f
360                 self._cw = col_width-1
361                 self._gap_every = gap_every
362                 self._linecount = 0
363                 self._o = f.write
364
365         def _nl(self): self._o('\n')
366
367         def _pline(self, pirate, puzstrs, extra):
368                 if (self._linecount > 0
369                     and self._gap_every is not None
370                     and not (self._linecount % self._gap_every)):
371                         self._nl()
372                 self._o(' %-*s' % (max(max_pirate_namelen, 14), pirate))
373                 for v in puzstrs:
374                         self._o(' %-*.*s' % (self._cw,self._cw, v))
375                 if extra:
376                         self._o(' ' + extra)
377                 self._nl()
378                 self._linecount += 1
379
380         def _puzstr(self, pi, puzzle):
381                 if not isinstance(puzzle,list): puzzle = [puzzle]
382                 try: standing = max([pi.standings[p] for p in puzzle])
383                 except KeyError: return '?'
384                 if not standing: return ''
385                 s = ''
386                 if self._cw > 4:
387                         c1 = standingvals[standing][0]
388                         if standing < 3: c1 = c1.lower() # 3 = Master
389                         s += `standing`
390                 if self._cw > 5:
391                         s += ' '
392                 s += '*' * (standing / 2)
393                 s += '+' * (standing % 2)
394                 return s
395
396         def headings(self, lhs='', rhs=None):
397                 def puzn_redact(name):
398                         if isinstance(name,list):
399                                 return '/'.join(
400                                         ["%.*s" % (self._cw/2, puzn_redact(n))
401                                          for n in name])
402                         spc = name.find(' ')
403                         if spc < 0: return name
404                         return name[0:min(4,spc)] + name[spc+1:]
405                 self._linecount = -2
406                 self._pline(lhs, map(puzn_redact, self._puzzles), rhs)
407                 self._linecount = 0
408         def literalline(self, line):
409                 self._o(line)
410                 self._nl()
411                 self._linecount = 0
412         def pirate_dummy(self, name, standingstring, extra=None):
413                 self._pline(name, standingstring * len(self._puzzles), extra)
414         def pirate(self, pi, extra=None):
415                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
416                 self._pline(pi.name, puzstrs, extra)
417
418
419 #---------- chat log parser ----------
420
421 class PirateAboard:
422         # This is essentially a transparent, dumb, data class.
423         #  pa.v
424         #  pa.name
425         #  pa.last_time
426         #  pa.last_event
427         #  pa.gunner
428         #  pa.last_chat_time
429         #  pa.last_chat_chan
430         #  pa.pi
431
432         def __init__(pa, pn, v, time, event):
433                 pa.name = pn
434                 pa.v = v
435                 pa.last_time = time
436                 pa.last_event = event
437                 pa.last_chat_time = None
438                 pa.last_chat_chan = None
439                 pa.gunner = False
440                 pa.pi = None
441
442         def pirate_info(pa):
443                 now = time.time()
444                 if pa.pi:
445                         age = now - pa.pi_fetched
446                         guide = random.randint(120,240)
447                         if age <= guide:
448                                 return pa.pi
449                         debug('PirateAboard refresh %d > %d  %s' % (
450                                 age, guide, pa.name))
451                         imaginary = [2,6]
452                 else:
453                         imaginary = [1]
454                 wait = fetcher.need_wait(now, imaginary)
455                 if wait:
456                         debug('PirateAboard fetcher not ready %d' % wait)
457                         return pa.pi
458                 pa.pi = PirateInfo(pa.name, 600)
459                 pa.pi_fetched = now
460                 return pa.pi
461
462 class ChatLogTracker:
463         # This is quite complex so we make it opaque.  Use the
464         # official invokers, accessors etc.
465
466         def __init__(self, myself_pi, logfn):
467                 self._pl = {}   # self._pl['Pirate'] =
468                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
469                                 # self._vl['Vessel']['#lastinfo']
470                                 # self._vl['Vessel']['#name']
471                                 # self._v = self._vl[self._vessel]
472                 self._date = None
473                 self._myself = myself_pi
474                 self._f = file(logfn)
475                 self._lbuf = ''
476                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
477                 self._disembark_myself()
478                 self._need_redisplay = False
479                 self._lastvessel = None
480
481         def _disembark_myself(self):
482                 self._v = None
483                 self._vessel = None
484                 self.force_redisplay()
485
486         def force_redisplay(self):
487                 self._need_redisplay = True
488
489         def _vessel_updated(self, v, timestamp):
490                 v['#lastinfo'] = timestamp
491                 self.force_redisplay()
492
493         def _onboard_event(self,v,timestamp,pirate,event):
494                 pa = self._pl.get(pirate, None)
495                 if pa is not None and pa.v is v:
496                         pa.last_time = timestamp
497                         pa.last_event = event
498                 else:
499                         if pa is not None: del pa.v[pirate]
500                         pa = PirateAboard(pirate, v, timestamp, event)
501                         self._pl[pirate] = pa
502                         v[pirate] = pa
503                 self._vessel_updated(v, timestamp)
504                 return pa
505
506         def _trash_vessel(self, v):
507                 for pn in v:
508                         if pn.startswith('#'): continue
509                         del self._pl[pn]
510                 vn = v['#name']
511                 del self._vl[vn]
512                 if v is self._v: self._disembark_myself()
513                 self.force_redisplay()
514
515         def _vessel_stale(self, v, timestamp):
516                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
517
518         def _vessel_check_expire(self, v, timestamp):
519                 if not self._vessel_stale(v, timestamp):
520                         return v
521                 self._debug_line_disposition(timestamp,'',
522                         'stale-reset ' + v['#name'])
523                 self._trash_vessel(v)
524                 return None
525
526         def expire_garbage(self, timestamp):
527                 for v in self._vl.values():
528                         self._vessel_check_expire(v, timestamp)
529
530         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
531                 v = self._vl.get(vn, None)
532                 if v is not None:
533                         v = self._vessel_check_expire(v, timestamp)
534                 if v is not None:
535                         dml.append('found')
536                         return v
537                 if not create:
538                         dml.append('no')
539                 dml.append('new')
540                 self._vl[vn] = v = { '#name': vn }
541                 self._vessel_updated(v, timestamp)
542                 return v
543
544         def _find_matching_vessel(self, pattern, timestamp, cmdr,
545                                         dml=[], create=False):
546                 # use when a commander pirate `cmdr' specified a vessel
547                 #  by name `pattern' (either may be None)
548                 # if create is true, will create the vessel
549                 #  record if an exact name is specified
550
551                 if (pattern is not None and
552                     not '*' in pattern
553                     and len(pattern.split(' ')) == 2):
554                         vn = pattern.title()
555                         dml.append('exact')
556                         return self._vessel_lookup(
557                                 vn, timestamp, dml=dml, create=create)
558
559                 if pattern is None:
560                         pattern_check = lambda vn: True
561                 else:
562                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.+')
563                         pattern_check = regexp.compile(re, regexp.I).match
564
565                 tries = []
566
567                 cmdr_pa = self._pl.get(cmdr, None)
568                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
569
570                 tries.append((self._v, 'here'))
571                 tried_vns = []
572
573                 for (v, dm) in tries:
574                         if v is None: dml.append(dm+'?'); continue
575                         
576                         vn = v['#name']
577                         if not pattern_check(vn):
578                                 tried_vns.append(vn)
579                                 dml.append(dm+'#')
580                                 continue
581
582                         dml.append(dm+'!')
583                         return v
584
585                 if pattern is not None and '*' in pattern:
586                         search = [
587                                 (vn,v)
588                                 for (vn,v) in self._vl.iteritems()
589                                 if not self._vessel_stale(v, timestamp)
590                                 if pattern_check(vn)
591                                 ]
592                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
593                         #       re,
594                         #       '/'.join(tried_vns),
595                         #       '/'.join([vn for (vn,v) in search])))
596
597                         if len(search)==1:
598                                 dml.append('one')
599                                 return search[0][1]
600                         elif search:
601                                 dml.append('many')
602                         else:
603                                 dml.append('none')
604
605         def _debug_line_disposition(self,timestamp,l,m):
606                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
607
608         def chatline(self,l):
609                 rm = lambda re: regexp.match(re,l)
610                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
611                 timestamp = None
612
613                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
614                 if m:
615                         self._date = [int(x) for x in m.groups()]
616                         self._previous_timestamp = None
617                         return d('date '+`self._date`)
618
619                 if self._date is None:
620                         return d('date unset')
621
622                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
623                 if not m:
624                         return d('no timestamp')
625
626                 while True:
627                         time_tuple = (self._date +
628                                       [int(x) for x in m.groups()] +
629                                       [-1,-1,-1])
630                         timestamp = time.mktime(time_tuple)
631                         if timestamp >= self._previous_timestamp: break
632                         self._date[2] += 1
633                         self._debug_line_disposition(timestamp,'',
634                                 'new date '+`self._date`)
635
636                 self._previous_timestamp = timestamp
637
638                 l = l[l.find(' ')+1:]
639
640                 def ob_x(pirate,event):
641                         return self._onboard_event(
642                                         self._v, timestamp, pirate, event)
643                 def ob1(did): ob_x(m.group(1), did); return d(did)
644                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
645
646                 def disembark(v, timestamp, pirate, event):
647                         self._onboard_event(
648                                         v, timestamp, pirate, 'leaving '+event)
649                         del v[pirate]
650                         del self._pl[pirate]
651
652                 def disembark_me(why):
653                         self._disembark_myself()
654                         return d('disembark-me '+why)
655
656                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
657                 if m:
658                         dm = ['boarding']
659                         pn = self._myself.name
660                         vn = m.group(1)
661                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
662                         self._lastvessel = self._vessel = vn
663                         self._v = v
664                         ob_x(pn, 'we boarded')
665                         self.expire_garbage(timestamp)
666                         return d(' '.join(dm))
667
668                 if self._v is None:
669                         return d('no vessel')
670
671                 m = rm('(\\w+) has come aboard\\.$')
672                 if m: return ob1('boarded');
673
674                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
675                 if m:
676                         (who,what) = m.groups()
677                         pa = ob_x(who,'ord '+what)
678                         if what == 'Gunning':
679                                 pa.gunner = True
680                         return d('duty order')
681
682                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
683                 if m: oba('stopped'); return d("end")
684
685                 def chat_core(speaker, chan):
686                         try: pa = self._pl[speaker]
687                         except KeyError: return 'mystery'
688                         if pa.v is not self._v: return 'elsewhere'
689                         pa.last_chat_time = timestamp
690                         pa.last_chat_chan = chan
691                         self.force_redisplay()
692                         return 'here'
693
694                 def chat(chan):
695                         speaker = m.group(1)
696                         dm = chat_core(speaker, chan)
697                         return d('chat %s %s' % (chan, dm))
698
699                 def chat_metacmd(chan):
700                         (cmdr, metacmd) = m.groups()
701                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
702                         m2 = regexp.match(
703                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
704                                 metacmd)
705                         if not m2: return chat(chan)
706
707                         (cmd, pattern, targets) = m2.groups()
708                         dml = ['cmd', chan, cmd]
709
710                         if cmd == 'a': each = self._onboard_event
711                         else: each = disembark
712
713                         if cmdr == self._myself.name:
714                                 dml.append('self')
715                                 how = 'cmd: %s' % cmd
716                         else:
717                                 dml.append('other')
718                                 how = 'cmd: %s %s' % (cmd,cmdr)
719
720                         v = self._find_matching_vessel(
721                                 pattern, timestamp, cmdr, dml, create=True)
722
723                         if v is not None:
724                                 targets = targets.strip().split(' ')
725                                 dml.append(`len(targets)`)
726                                 for target in targets:
727                                         each(v, timestamp, target.title(), how)
728                                 self._vessel_updated(v, timestamp)
729
730                         dm = ' '.join(dml)
731                         chat_core(cmdr, 'cmd '+chan)
732                         return d(dm)
733
734                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
735                 if m: return ob1('general order');
736
737                 m = rm('(\\w+) says, "')
738                 if m: return chat('public')
739
740                 m = rm('(\\w+) tells ye, "')
741                 if m: return chat('private')
742
743                 m = rm('Ye told (\\w+), "(.*)"$')
744                 if m: return chat_metacmd('private')
745
746                 m = rm('(\\w+) flag officer chats, "')
747                 if m: return chat('flag officer')
748
749                 m = rm('(\\w+) officer chats, "(.*)"$')
750                 if m: return chat_metacmd('officer')
751
752                 m = rm('Ye accepted the offer to job with ')
753                 if m: return disembark_me('jobbing')
754
755                 m = rm('Ye hop on the ferry and are whisked away ')
756                 if m: return disembark_me('ferry')
757
758                 m = rm('Whisking away to yer home on the magical winds')
759                 if m: return disembark_me('home')
760
761                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
762                 if m:
763                         pl = m.group(1).split(', ')
764                         if not self._myself.name in pl:
765                                 return d('lost melee')
766                         for pn in pl:
767                                 if ' ' in pn: continue
768                                 ob_x(pn,'won melee')
769                         return d('won melee')
770
771                 m = rm('(\\w+) is eliminated\\!')
772                 if m: return ob1('eliminated in fray');
773
774                 m = rm('(\\w+) has driven \w+ from the ship\\!')
775                 if m: return ob1('boarder repelled');
776
777                 m = rm('\w+ has bested (\\w+), and turns'+
778                         ' to the rest of the ship\\.')
779                 if m: return ob1('boarder unrepelled');
780
781                 m = rm('(\\w+) has left the vessel\.')
782                 if m:
783                         pirate = m.group(1)
784                         disembark(self._v, timestamp, pirate, 'disembarked')
785                         return d('disembarked')
786
787                 return d('not-matched')
788
789         def _str_vessel(self, vn, v):
790                 s = ' vessel %s\n' % vn
791                 s += ' '*20 + "%-*s   %13s\n" % (
792                                 max_pirate_namelen, '#lastinfo',
793                                 v['#lastinfo'])
794                 assert v['#name'] == vn
795                 for pn in sorted(v.keys()):
796                         if pn.startswith('#'): continue
797                         pa = v[pn]
798                         assert pa.v == v
799                         assert self._pl[pn] == pa
800                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
801                                 (' ','G')[pa.gunner],
802                                 max_pirate_namelen, pn,
803                                 pa.last_time, pa.last_event,
804                                 pa.last_chat_time, pa.last_chat_chan)
805                 return s
806
807         def __str__(self):
808                 s = '''<ChatLogTracker
809  myself %s
810  vessel %s
811 '''                     % (self._myself.name, self._vessel)
812                 assert ((self._v is None and self._vessel is None) or
813                         (self._v is self._vl[self._vessel]))
814                 if self._vessel is not None:
815                         s += self._str_vessel(self._vessel, self._v)
816                 for vn in sorted(self._vl.keys()):
817                         if vn == self._vessel: continue
818                         s += self._str_vessel(vn, self._vl[vn])
819                 for p in self._pl:
820                         pa = self._pl[p]
821                         assert pa.v[p] is pa
822                         assert pa.v in self._vl.values()
823                 s += '>\n'
824                 return s
825
826         def catchup(self, progress=None):
827                 while True:
828                         more = self._f.readline()
829                         if not more: break
830
831                         self._progress[0] += len(more)
832                         if progress: progress.progress(*self._progress)
833
834                         self._lbuf += more
835                         if self._lbuf.endswith('\n'):
836                                 self.chatline(self._lbuf.rstrip())
837                                 self._lbuf = ''
838                                 if opts.debug >= 2:
839                                         debug(self.__str__())
840                 if progress: progress.caughtup()
841
842         def changed(self):
843                 rv = self._need_redisplay
844                 self._need_redisplay = False
845                 return rv
846         def myname(self):
847                 # returns our pirate name
848                 return self._myself.name
849         def vesselname(self):
850                 # returns the vessel name we're aboard or None
851                 return self._vessel
852         def lastvesselname(self):
853                 # returns the last vessel name we were aboard or None
854                 return self._lastvessel
855         def aboard(self, vesselname=True):
856                 # returns a list of PirateAboard the vessel
857                 #  sorted by pirate name
858                 #  you can pass this None and you'll get []
859                 #  or True for the current vessel (which is the default)
860                 #  the returned value is a fresh list of persistent
861                 #  PirateAboard objects
862                 if vesselname is True: v = self._v
863                 else: v = self._vl.get(vesselname.title())
864                 if v is None: return []
865                 return [ v[pn]
866                          for pn in sorted(v.keys())
867                          if not pn.startswith('#') ]
868
869 #---------- implementations of actual operation modes ----------
870
871 def do_pirate(pirates, bu):
872         print '{'
873         for pirate in pirates:
874                 info = PirateInfo(pirate)
875                 print '%s: %s,' % (`pirate`, info)
876         print '}'
877
878 def prep_crew_of(args, bu, max_age=300):
879         if len(args) != 1: bu('crew-of takes one pirate name')
880         pi = PirateInfo(args[0], max_age)
881         if pi.crew is None: return None
882         return CrewInfo(pi.crew[0], max_age)
883
884 def do_crew_of(args, bu):
885         ci = prep_crew_of(args, bu)
886         print ci
887
888 def do_standings_crew_of(args, bu):
889         ci = prep_crew_of(args, bu, 60)
890         tab = StandingsTable(sys.stdout)
891         tab.headings()
892         for (rank, members) in ci.crew:
893                 if not members: continue
894                 tab.literalline('')
895                 tab.literalline('%s:' % rank)
896                 for p in members:
897                         pi = PirateInfo(p, random.randint(900,1800))
898                         tab.pirate(pi)
899
900 class ProgressPrintPercentage:
901         def __init__(self, f=sys.stdout):
902                 self._f = f
903         def progress_string(self,done,total):
904                 return "scan chat logs %3d%%\r" % ((done*100) / total)
905         def progress(self,*a):
906                 self._f.write(self.progress_string(*a))
907                 self._f.flush()
908         def show_init(self, pirate, ocean):
909                 print >>self._f, 'Starting up, %s on the %s ocean' % (
910                         pirate, ocean)
911         def caughtup(self):
912                 self._f.write('                   \r')
913                 self._f.flush()
914
915 #----- modes which use the chat log parser are quite complex -----
916
917 def prep_chat_log(args, bu,
918                 progress=ProgressPrintPercentage(),
919                 max_myself_age=3600):
920         if len(args) != 1: bu('this action takes only chat log filename')
921         logfn = args[0]
922         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
923         match = regexp.match(logfn_re, logfn)
924         if not match: bu('chat log filename is not in expected format')
925         (pirate, ocean) = match.groups()
926         fetcher.default_ocean(ocean)
927
928         progress.show_init(pirate, fetcher.ocean)
929         myself = PirateInfo(pirate,max_myself_age)
930         track = ChatLogTracker(myself, logfn)
931
932         opts.debug -= 2
933         track.catchup(progress)
934         opts.debug += 2
935
936         track.force_redisplay()
937
938         return (myself, track)
939
940 def do_track_chat_log(args, bu):
941         (myself, track) = prep_chat_log(args, bu)
942         while True:
943                 track.catchup()
944                 if track.changed():
945                         print track
946                 sleep(1)
947
948 #----- ship management aid -----
949
950 class Display_dumb(ProgressPrintPercentage):
951         def __init__(self):
952                 ProgressPrintPercentage.__init__(self)
953         def show(self, s):
954                 print '\n\n', s;
955         def realstart(self):
956                 pass
957
958 class Display_overwrite(ProgressPrintPercentage):
959         def __init__(self):
960                 ProgressPrintPercentage.__init__(self)
961
962                 null = file('/dev/null','w')
963                 curses.setupterm(fd=null.fileno())
964
965                 self._clear = curses.tigetstr('clear')
966                 if not self._clear:
967                         self._debug('missing clear!')
968                         self.show = Display_dumb.show
969                         return
970
971                 self._t = {'el':'', 'ed':''}
972                 if not self._init_sophisticated():
973                         for k in self._t.keys(): self._t[k] = ''
974                         self._t['ho'] = self._clear
975
976         def _debug(self,m): debug('display overwrite: '+m)
977
978         def _init_sophisticated(self):
979                 for k in self._t.keys():
980                         s = curses.tigetstr(k)
981                         self._t[k] = s
982                 self._t['ho'] = curses.tigetstr('ho')
983                 if not self._t['ho']:
984                         cup = curses.tigetstr('cup')
985                         self._t['ho'] = curses.tparm(cup,0,0)
986                 missing = [k for k in self._t.keys() if not self._t[k]]
987                 if missing:
988                         self.debug('missing '+(' '.join(missing)))
989                         return 0
990                 return 1
991
992         def show(self, s):
993                 w = sys.stdout.write
994                 def wti(k): w(self._t[k])
995
996                 wti('ho')
997                 nl = ''
998                 for l in s.rstrip().split('\n'):
999                         w(nl)
1000                         w(l)
1001                         wti('el')
1002                         nl = '\r\n'
1003                 wti('ed')
1004                 w(' ')
1005                 sys.stdout.flush()
1006
1007         def realstart(self):
1008                 sys.stdout.write(self._clear)
1009                 sys.stdout.flush()
1010                         
1011
1012 def do_ship_aid(args, bu):
1013         if opts.ship_duty is None: opts.ship_duty = True
1014
1015         displayer = globals()['Display_'+opts.display]()
1016
1017         (myself, track) = prep_chat_log(args, bu, progress=displayer)
1018
1019         displayer.realstart()
1020
1021         if os.isatty(0): kr_create = KeystrokeReader
1022         else: kr_create = DummyKeystrokeReader
1023
1024         try:
1025                 kreader = kr_create(0, 10)
1026                 ship_aid_core(myself, track, displayer, kreader)
1027         finally:
1028                 kreader.stop()
1029                 print '\n'
1030
1031 class KeyBasedSorter:
1032         def compar_key_pa(self, pa):
1033                 pi = pa.pirate_info()
1034                 if pi is None: return None
1035                 return self.compar_key(pi)
1036         def lsort_pa(self, l):
1037                 l.sort(key = self.compar_key_pa)
1038
1039 class NameSorter(KeyBasedSorter):
1040         def compar_key(self, pi): return pi.name
1041         def desc(self): return 'name'
1042
1043 class SkillSorter(NameSorter):
1044         def __init__(self, relevant):
1045                 self._want = frozenset(relevant.split('/'))
1046                 self._avoid = set()
1047                 for p in core_duty_puzzles:
1048                         if isinstance(p,basestring): self._avoid.add(p)
1049                         else: self._avoid |= set(p)
1050                 self._avoid -= self._want
1051                 self._desc = '%s' % relevant
1052         
1053         def desc(self): return self._desc
1054
1055         def compar_key(self, pi):
1056                 best_want = max([
1057                         pi.standings.get(puz,-1)
1058                         for puz in self._want
1059                         ])
1060                 best_avoid = [
1061                         -pi.standings.get(puz,standing_limit)
1062                         for puz in self._avoid
1063                         ]
1064                 best_avoid.sort()
1065                 def negate(x): return -x
1066                 debug('compar_key %s bw=%s ba=%s' % (pi.name, `best_want`,
1067                         `best_avoid`))
1068                 return (-best_want, map(negate, best_avoid), pi.name)
1069
1070 def ship_aid_core(myself, track, displayer, kreader):
1071
1072         def find_vessel():
1073                 vn = track.vesselname()
1074                 if vn: return (vn, " on board the %s" % vn)
1075                 vn = track.lastvesselname()
1076                 if vn: return (vn, " ashore from the %s" % vn)
1077                 return (None, " not on a vessel")
1078
1079         def timeevent(t,e):
1080                 if t is None: return ' ' * 22
1081                 return " %-4s %-16s" % (format_time_interval(now - t),e)
1082
1083         displayer.show(track.myname() + find_vessel()[1] + '...')
1084
1085         rotate_nya = '/-\\'
1086
1087         sort = NameSorter()
1088
1089         while True:
1090                 track.catchup()
1091                 now = time.time()
1092
1093                 (vn, s) = find_vessel()
1094                 s = track.myname() + s
1095                 s += " at %s" % time.strftime("%Y-%m-%d %H:%M:%S")
1096                 s += kreader.info()
1097                 s += '\n'
1098
1099                 aboard = track.aboard(vn)
1100                 sort.lsort_pa(aboard)
1101
1102                 tbl_s = StringIO()
1103                 tbl = StandingsTable(tbl_s)
1104                 tbl.headings(' %d aboard' % len(aboard),
1105                                 '  sorted by '+sort.desc())
1106
1107                 for pa in aboard:
1108                         pi = pa.pirate_info()
1109
1110                         xs = ''
1111                         if pa.gunner: xs += 'G '
1112                         else: xs += '  '
1113                         xs += timeevent(pa.last_time, pa.last_event)
1114                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
1115
1116                         if pi is None:
1117                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1118                         else:
1119                                 tbl.pirate(pi, xs)
1120
1121                 s += tbl_s.getvalue()
1122                 displayer.show(s)
1123                 tbl_s.close()
1124
1125                 k = kreader.getch()
1126                 if k is None:
1127                         rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1128                         continue
1129
1130                 if k == 'q': break
1131                 elif k == 'g': sort = SkillSorter('Gunning')
1132                 elif k == 'c': sort = SkillSorter('Carpentry')
1133                 elif k == 's': sort = SkillSorter('Sailing/Rigging')
1134                 elif k == 'b': sort = SkillSorter('Bilging')
1135                 elif k == 'n': sort = SkillSorter('Navigating')
1136                 elif k == 'd': sort = SkillSorter('Battle Navigation')
1137                 elif k == 't': sort = SkillSorter('Treasure Haul')
1138                 elif k == 'a': sort = NameSorter()
1139                 else: pass # unknown key command
1140
1141 #---------- individual keystroke input ----------
1142
1143 class DummyKeystrokeReader:
1144         def __init__(self,fd,timeout_dummy): pass
1145         def stop(self): pass
1146         def getch(self): sleep(1); return None
1147         def info(self): return ' [noninteractive]'
1148
1149 class KeystrokeReader(DummyKeystrokeReader):
1150         def __init__(self, fd, timeout_decisec=0):
1151                 self._fd = fd
1152                 self._saved = termios.tcgetattr(fd)
1153                 a = termios.tcgetattr(fd)
1154                 a[3] &= ~(termios.ECHO | termios.ECHONL |
1155                           termios.ICANON | termios.IEXTEN)
1156                 a[6][termios.VMIN] = 0
1157                 a[6][termios.VTIME] = timeout_decisec
1158                 termios.tcsetattr(fd, termios.TCSANOW, a)
1159         def stop(self):
1160                 termios.tcsetattr(self._fd, termios.TCSANOW, self._saved)
1161         def getch(self):
1162                 debug_flush()
1163                 byte = os.read(self._fd, 1)
1164                 if not len(byte): return None
1165                 return byte
1166         def info(self):
1167                 return ''
1168
1169 #---------- main program ----------
1170
1171 def main():
1172         global opts, fetcher
1173
1174         pa = OptionParser(
1175 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1176 actions:
1177  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1178  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1179  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1180  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1181  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1182
1183 display modes (for --display) apply to ship-aid:
1184  --display=dumb       just print new information, scrolling the screen
1185  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
1186 ''')
1187         ao = pa.add_option
1188         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1189                 help='select ocean OCEAN')
1190         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1191                 default='~/.yoweb-scrape-cache',
1192                 help='cache yoweb pages in DIR')
1193         ao('-D','--debug', action='count', dest='debug', default=0,
1194                 help='enable debugging output')
1195         ao('--debug-fd', type='int', dest='debug_fd',
1196                 help='write any debugging output to specified fd')
1197         ao('-q','--quiet', action='store_true', dest='quiet',
1198                 help='suppress warning output')
1199         ao('--display', action='store', dest='display',
1200                 type='choice', choices=['dumb','overwrite'],
1201                 help='how to display ship aid')
1202
1203         ao('--ship-duty', action='store_true', dest='ship_duty',
1204                 help='show ship duty station puzzles')
1205         ao('--all-puzzles', action='store_false', dest='ship_duty',
1206                 help='show all puzzles, not just ship duty stations')
1207
1208         ao('--min-cache-reuse', type='int', dest='min_max_age',
1209                 metavar='SECONDS', default=60,
1210                 help='always reuse cache yoweb data if no older than this')
1211
1212         (opts,args) = pa.parse_args()
1213         random.seed()
1214
1215         if len(args) < 1:
1216                 pa.error('need a mode argument')
1217
1218         if opts.debug_fd is not None:
1219                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1220         else:
1221                 opts.debug_file = sys.stdout
1222
1223         mode = args[0]
1224         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1225         try: mode_fn = globals()[mode_fn_name]
1226         except KeyError: pa.error('unknown mode "%s"' % mode)
1227
1228         # fixed parameters
1229         opts.expire_age = max(3600, opts.min_max_age)
1230
1231         opts.ship_reboard_clearout = 3600
1232
1233         if opts.cache_dir.startswith('~/'):
1234                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1235
1236         if opts.display is None:
1237                 if ((opts.debug > 0 and opts.debug_fd is None)
1238                     or not os.isatty(sys.stdout.fileno())):
1239                         opts.display = 'dumb'
1240                 else:
1241                         opts.display = 'overwrite'
1242
1243         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1244
1245         mode_fn(args[1:], pa.error)
1246
1247 main()