chiark / gitweb /
a943f3cb5a4ccd7951d930486d121c07864948ed
[ypp-sc-tools.web-live.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 from optparse import OptionParser
18
19 from BeautifulSoup import BeautifulSoup
20
21 opts = None
22
23 #---------- YPP parameters and arrays ----------
24
25 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
26         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
27         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
28         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
29
30 standingvals = ('Able/Distinguished/Respected/Master'+
31                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
32
33 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
34
35 max_pirate_namelen = 12
36
37
38 #---------- general utilities ----------
39
40 def debug(m):
41         if opts.debug > 0:
42                 print >>opts.debug_file, m
43
44 def sleep(seconds):
45         if opts.debug > 0:
46                 opts.debug_file.flush()
47         time.sleep(seconds)
48
49 def format_time_interval(ti):
50         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
51         if ti < 7200: return '%2dm' % (ti / 60)
52         if ti < 86400: return '%dh' % (ti / 3600)
53         return '%dd' % (ti / 86400)
54
55 #---------- caching and rate-limiting data fetcher ----------
56
57 class Fetcher:
58         def __init__(self, ocean, cachedir):
59                 debug('Fetcher init %s' % cachedir)
60                 self.ocean = ocean
61                 self.cachedir = cachedir
62                 try: os.mkdir(cachedir)
63                 except (OSError,IOError), oe:
64                         if oe.errno != errno.EEXIST: raise
65                 self._cache_scan(time.time())
66
67         def default_ocean(self, ocean='ice'):
68                 if self.ocean is None:
69                         self.ocean = ocean
70
71         def _cache_scan(self, now):
72                 # returns list of ages, unsorted
73                 ages = []
74                 debug('Fetcher   scan_cache')
75                 for leaf in os.listdir(self.cachedir):
76                         if not leaf.startswith('#'): continue
77                         path = self.cachedir + '/' + leaf
78                         try: s = os.stat(path)
79                         except (OSError,IOError), oe:
80                                 if oe.errno != errno.ENOENT: raise
81                                 continue
82                         age = now - s.st_mtime
83                         if age > opts.expire_age:
84                                 debug('Fetcher    expire %d %s' % (age, path))
85                                 try: os.remove(path)
86                                 except (OSError,IOError), oe:
87                                         if oe.errno != errno.ENOENT: raise
88                                 continue
89                         ages.append(age)
90                 return ages
91
92         def need_wait(self, now, imaginary=[]):
93                 ages = self._cache_scan(now)
94                 ages += imaginary
95                 ages.sort()
96                 debug('Fetcher   ages ' + `ages`)
97                 min_age = 1
98                 need_wait = 0
99                 for age in ages:
100                         if age < min_age and age < 300:
101                                 debug('Fetcher   morewait min=%d age=%d' %
102                                         (min_age, age))
103                                 need_wait = max(need_wait, min_age - age)
104                         min_age += 3
105                         min_age *= 1.25
106                 return need_wait
107
108         def _rate_limit_cache_clean(self, now):
109                 need_wait = self.need_wait(now)
110                 if need_wait > 0:
111                         debug('Fetcher   wait %d' % need_wait)
112                         sleep(need_wait)
113
114         def fetch(self, url, max_age):
115                 debug('Fetcher fetch %s' % url)
116                 cache_corename = urllib.quote_plus(url)
117                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
118                 try: f = file(cache_item, 'r')
119                 except (OSError,IOError), oe:
120                         if oe.errno != errno.ENOENT: raise
121                         f = None
122                 now = time.time()
123                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
124                 if f is not None:
125                         s = os.fstat(f.fileno())
126                         age = now - s.st_mtime
127                         if age > max_age:
128                                 debug('Fetcher  stale %d < %d'% (max_age, age))
129                                 f = None
130                 if f is not None:
131                         data = f.read()
132                         f.close()
133                         debug('Fetcher  cached %d > %d' % (max_age, age))
134                         return data
135
136                 debug('Fetcher  fetch')
137                 self._rate_limit_cache_clean(now)
138
139                 stream = urllib2.urlopen(url)
140                 data = stream.read()
141                 cache_tmp = "%s/#%s~%d#" % (
142                         self.cachedir, cache_corename, os.getpid())
143                 f = file(cache_tmp, 'w')
144                 f.write(data)
145                 f.close()
146                 os.rename(cache_tmp, cache_item)
147                 debug('Fetcher  stored')
148                 return data
149
150         def yoweb(self, kind, tail, max_age):
151                 self.default_ocean()
152                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
153                         self.ocean, kind, tail)
154                 return self.fetch(url, max_age)
155
156 #---------- logging assistance for troubled screenscrapers ----------
157
158 class SoupLog:
159         def __init__(self):
160                 self.msgs = [ ]
161         def msg(self, m):
162                 self.msgs.append(m)
163         def soupm(self, obj, m):
164                 self.msg(m + '; in ' + `obj`)
165         def needs_msgs(self, child_souplog):
166                 self.msgs += child_souplog.msgs
167                 child_souplog.msgs = [ ]
168
169 def soup_text(obj):
170         str = ''.join(obj.findAll(text=True))
171         return str.strip()
172
173 class SomethingSoupInfo(SoupLog):
174         def __init__(self, kind, tail, max_age):
175                 SoupLog.__init__(self)
176                 html = fetcher.yoweb(kind, tail, max_age)
177                 self._soup = BeautifulSoup(html,
178                         convertEntities=BeautifulSoup.HTML_ENTITIES
179                         )
180
181 #---------- scraper for pirate pages ----------
182
183 class PirateInfo(SomethingSoupInfo):
184         # Public data members:
185         #  pi.standings = { 'Treasure Haul': 'Able' ... }
186         #  pi.name = name
187         #  pi.crew = (id, name)
188         #  pi.flag = (id, name)
189         #  pi.msgs = [ 'message describing problem with scrape' ]
190                 
191         def __init__(self, pirate, max_age=300):
192                 SomethingSoupInfo.__init__(self,
193                         'pirate.wm?target=', pirate, max_age)
194                 self.name = pirate
195                 self._find_standings()
196                 self.crew = self._find_crewflag('crew',
197                         '^/yoweb/crew/info\\.wm')
198                 self.flag = self._find_crewflag('flag',
199                         '^/yoweb/flag/info\\.wm')
200
201         def _find_standings(self):
202                 imgs = self._soup.findAll('img',
203                         src=regexp.compile('/yoweb/images/stat.*'))
204                 re = regexp.compile(
205 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
206                         )
207                 standings = { }
208
209                 for skill in puzzles:
210                         standings[skill] = [ ]
211
212                 skl = SoupLog()
213
214                 for img in imgs:
215                         try: puzzle = img['alt']
216                         except KeyError: continue
217
218                         if not puzzle in puzzles:
219                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
220                                 continue
221                         key = img.findParent('td')
222                         if key is None:
223                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
224                                 continue
225                         valelem = key.findNextSibling('td')
226                         if valelem is None:
227                                 skl.soupm(key, 'puzzle missing sibling "%s"'
228                                         % puzzle)
229                                 continue
230                         valstr = soup_text(valelem)
231                         match = re.match(valstr)
232                         if match is None:
233                                 skl.soupm(key, ('puzzle "%s" unparseable'+
234                                         ' standing "%s"') % (puzzle, valstr))
235                                 continue
236                         standing = match.group(match.lastindex)
237                         standings[puzzle].append(standing)
238
239                 self.standings = { }
240
241                 for puzzle in puzzles:
242                         sl = standings[puzzle]
243                         if len(sl) > 1:
244                                 skl.msg('puzzle "%s" multiple standings %s' %
245                                                 (puzzle, `sl`))
246                                 continue
247                         if not sl:
248                                 skl.msg('puzzle "%s" no standing found' % puzzle)
249                                 continue
250                         standing = sl[0]
251                         for i in range(0, len(standingvals)-1):
252                                 if standing == standingvals[i]:
253                                         self.standings[puzzle] = i
254                         if not puzzle in self.standings:
255                                 skl.msg('puzzle "%s" unknown standing "%s"' %
256                                         (puzzle, standing))
257
258                 all_standings_ok = True
259                 for puzzle in puzzles:
260                         if not puzzle in self.standings:
261                                 self.needs_msgs(skl)
262
263         def _find_crewflag(self, cf, yoweb_re):
264                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
265                 if len(things) != 1:
266                         self.msg('zero or several %s id references found' % cf)
267                         return None
268                 thing = things[0]
269                 id_re = '\\b%sid\\=(\\w+)$' % cf
270                 id_haystack = thing['href']
271                 match = regexp.compile(id_re).search(id_haystack)
272                 if match is None:
273                         self.soupm(thing, ('incomprehensible %s id ref'+
274                                 ' (%s in %s)') % (cf, id_re, id_haystack))
275                         return None
276                 name = soup_text(thing)
277                 return (match.group(1), name)
278
279         def __str__(self):
280                 return `(self.crew, self.flag, self.standings, self.msgs)`
281
282 #---------- scraper for crew pages ----------
283
284 class CrewInfo(SomethingSoupInfo):
285         # Public data members:
286         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
287         #              ('Senior Officer', [...]),
288         #               ... ]
289         #  pi.msgs = [ 'message describing problem with scrape' ]
290
291         def __init__(self, crewid, max_age=300):
292                 SomethingSoupInfo.__init__(self,
293                         'crew/info.wm?crewid=', crewid, max_age)
294                 self._find_crew()
295
296         def _find_crew(self):
297                 self.crew = []
298                 capts = self._soup.findAll('img',
299                         src='/yoweb/images/crew-captain.png')
300                 if len(capts) != 1:
301                         self.msg('crew members: no. of captain images != 1')
302                         return
303                 tbl = capts[0]
304                 while not tbl.find('a', href=pirate_ref_re):
305                         tbl = tbl.findParent('table')
306                         if not tbl:
307                                 self.msg('crew members: cannot find table')
308                                 return
309                 current_rank_crew = None
310                 crew_rank_re = regexp.compile('/yoweb/images/crew')
311                 for row in tbl.contents:
312                         # findAll(recurse=False)
313                         if isinstance(row,basestring):
314                                 continue
315
316                         is_rank = row.find('img', attrs={'src': crew_rank_re})
317                         if is_rank:
318                                 rank = soup_text(row)
319                                 current_rank_crew = []
320                                 self.crew.append((rank, current_rank_crew))
321                                 continue
322                         for cell in row.findAll('a', href=pirate_ref_re):
323                                 if current_rank_crew is None:
324                                         self.soupm(cell, 'crew members: crew'
325                                                 ' before rank')
326                                         continue
327                                 current_rank_crew.append(soup_text(cell))
328
329         def __str__(self):
330                 return `(self.crew, self.msgs)`
331
332 #---------- pretty-printer for tables of pirate puzzle standings ----------
333
334 class StandingsTable:
335         def __init__(self, use_puzzles=None, col_width=6):
336                 if use_puzzles is None:
337                         if opts.ship_duty:
338                                 use_puzzles=[
339                                         'Navigating','Battle Navigation',
340                                         'Gunning',
341                                         ['Sailing','Rigging'],
342                                         'Bilging',
343                                         'Carpentry',
344                                         'Treasure Haul'
345                                 ]
346                         else:
347                                 use_puzzles=puzzles
348                 self._puzzles = use_puzzles
349                 self.s = ''
350                 self._cw = col_width-1
351
352         def _pline(self, pirate, puzstrs, extra):
353                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
354                 for v in puzstrs:
355                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
356                 if extra:
357                         self.s += ' ' + extra
358                 self.s += '\n'
359
360         def _puzstr(self, pi, puzzle):
361                 if not isinstance(puzzle,list): puzzle = [puzzle]
362                 try: standing = max([pi.standings[p] for p in puzzle])
363                 except KeyError: return '?'
364                 if not standing: return ''
365                 s = ''
366                 if self._cw > 4:
367                         c1 = standingvals[standing][0]
368                         if standing < 3: c1 = c1.lower() # 3 = Master
369                         s += `standing`
370                 if self._cw > 5:
371                         s += ' '
372                 s += '*' * (standing / 2)
373                 s += '+' * (standing % 2)
374                 return s
375
376         def headings(self):
377                 def puzn_redact(name):
378                         if isinstance(name,list):
379                                 return '/'.join(
380                                         ["%.*s" % (self._cw/2, puzn_redact(n))
381                                          for n in name])
382                         spc = name.find(' ')
383                         if spc < 0: return name
384                         return name[0:min(4,spc)] + name[spc+1:]
385                 self._pline('', map(puzn_redact, self._puzzles), None)
386         def literalline(self, line):
387                 self.s += line + '\n'
388         def pirate_dummy(self, name, standingstring, extra=None):
389                 self._pline(name, standingstring * len(self._puzzles), extra)
390         def pirate(self, pi, extra=None):
391                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
392                 self._pline(pi.name, puzstrs, extra)
393
394         def results(self):
395                 return self.s
396
397 #---------- chat log parser ----------
398
399 class PirateAboard:
400         # This is essentially a transparent, dumb, data class.
401         #  pa.v
402         #  pa.name
403         #  pa.last_time
404         #  pa.last_event
405         #  pa.gunner
406         #  pa.last_chat_time
407         #  pa.last_chat_chan
408         #  pa.pi
409
410         def __init__(pa, pn, v, time, event):
411                 pa.name = pn
412                 pa.v = v
413                 pa.last_time = time
414                 pa.last_event = event
415                 pa.last_chat_time = None
416                 pa.last_chat_chan = None
417                 pa.gunner = False
418                 pa.pi = None
419
420         def pirate_info(pa):
421                 now = time.time()
422                 if pa.pi:
423                         age = now - pa.pi_fetched
424                         guide = random.randint(120,240)
425                         if age <= guide:
426                                 return pa.pi
427                         debug('PirateAboard refresh %d > %d  %s' % (
428                                 age, guide, pa.name))
429                         imaginary = [2,6]
430                 else:
431                         imaginary = [1]
432                 wait = fetcher.need_wait(now, imaginary)
433                 if wait:
434                         debug('PirateAboard fetcher not ready %d' % wait)
435                         return pa.pi
436                 pa.pi = PirateInfo(pa.name, 600)
437                 pa.pi_fetched = now
438                 return pa.pi
439
440 class ChatLogTracker:
441         # This is quite complex so we make it opaque.  Use the
442         # official invokers, accessors etc.
443
444         def __init__(self, myself_pi, logfn):
445                 self._pl = {}   # self._pl['Pirate'] =
446                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
447                                 # self._vl['Vessel']['#lastinfo']
448                                 # self._vl['Vessel']['#name']
449                                 # self._v = self._vl[self._vessel]
450                 self._date = None
451                 self._myself = myself_pi
452                 self._f = file(logfn)
453                 self._lbuf = ''
454                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
455                 self._disembark_myself()
456                 self._need_redisplay = False
457                 self._lastvessel = None
458
459         def _disembark_myself(self):
460                 self._v = None
461                 self._vessel = None
462                 self.force_redisplay()
463
464         def force_redisplay(self):
465                 self._need_redisplay = True
466
467         def _vessel_updated(self, v, timestamp):
468                 v['#lastinfo'] = timestamp
469                 self.force_redisplay()
470
471         def _onboard_event(self,v,timestamp,pirate,event):
472                 pa = self._pl.get(pirate, None)
473                 if pa is not None and pa.v is v:
474                         pa.last_time = timestamp
475                         pa.last_event = event
476                 else:
477                         if pa is not None: del pa.v[pirate]
478                         pa = PirateAboard(pirate, v, timestamp, event)
479                         self._pl[pirate] = pa
480                         v[pirate] = pa
481                 self._vessel_updated(v, timestamp)
482                 return pa
483
484         def _trash_vessel(self, v):
485                 for pn in v:
486                         if pn.startswith('#'): continue
487                         del self._pl[pn]
488                 vn = v['#name']
489                 del self._vl[vn]
490                 if v is self._v: self._disembark_myself()
491                 self.force_redisplay()
492
493         def _vessel_stale(self, v, timestamp):
494                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
495
496         def _vessel_check_expire(self, v, timestamp):
497                 if not self._vessel_stale(v, timestamp):
498                         return v
499                 self._debug_line_disposition(timestamp,'',
500                         'stale-reset ' + v['#name'])
501                 self._trash_vessel(v)
502                 return None
503
504         def expire_garbage(self, timestamp):
505                 for v in self._vl.values():
506                         self._vessel_check_expire(v, timestamp)
507
508         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
509                 v = self._vl.get(vn, None)
510                 if v is not None:
511                         v = self._vessel_check_expire(v, timestamp)
512                 if v is not None:
513                         dml.append('found')
514                         return v
515                 if not create:
516                         dml.append('no')
517                 dml.append('new')
518                 self._vl[vn] = v = { '#name': vn }
519                 self._vessel_updated(v, timestamp)
520                 return v
521
522         def _find_matching_vessel(self, pattern, timestamp, cmdr,
523                                         dml=[], create=False):
524                 # use when a commander pirate `cmdr' specified a vessel
525                 #  by name `pattern' (either may be None)
526                 # if create is true, will create the vessel
527                 #  record if an exact name is specified
528
529                 if (pattern is not None and
530                     not '*' in pattern
531                     and len(pattern.split(' ')) == 2):
532                         vn = pattern.title()
533                         dml.append('exact')
534                         return self._vessel_lookup(
535                                 vn, timestamp, dml=dml, create=create)
536
537                 if pattern is None:
538                         pattern_check = lambda vn: True
539                 else:
540                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.*')
541                         pattern_check = regexp.compile(re, regexp.I).match
542
543                 tries = []
544
545                 cmdr_pa = self._pl.get(cmdr, None)
546                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
547
548                 tries.append((self._v, 'here'))
549                 tried_vns = []
550
551                 for (v, dm) in tries:
552                         if v is None: dml.append(dm+'?'); continue
553                         
554                         vn = v['#name']
555                         if not pattern_check(vn):
556                                 tried_vns.append(vn)
557                                 dml.append(dm+'#')
558                                 continue
559
560                         dml.append(dm+'!')
561                         return v
562
563                 if pattern is not None and '*' in pattern:
564                         search = [
565                                 (vn,v)
566                                 for (vn,v) in self._vl.iteritems()
567                                 if not self._vessel_stale(v, timestamp)
568                                 if pattern_check(vn)
569                                 ]
570                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
571                         #       re,
572                         #       '/'.join(tried_vns),
573                         #       '/'.join([vn for (vn,v) in search])))
574
575                         if len(search)==1:
576                                 dml.append('one')
577                                 return search[0][1]
578                         elif search:
579                                 dml.append('many')
580                         else:
581                                 dml.append('none')
582
583         def _debug_line_disposition(self,timestamp,l,m):
584                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
585
586         def chatline(self,l):
587                 rm = lambda re: regexp.match(re,l)
588                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
589                 timestamp = None
590
591                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
592                 if m:
593                         self._date = [int(x) for x in m.groups()]
594                         self._previous_timestamp = None
595                         return d('date '+`self._date`)
596
597                 if self._date is None:
598                         return d('date unset')
599
600                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
601                 if not m:
602                         return d('no timestamp')
603
604                 while True:
605                         time_tuple = (self._date +
606                                       [int(x) for x in m.groups()] +
607                                       [-1,-1,-1])
608                         timestamp = time.mktime(time_tuple)
609                         if timestamp >= self._previous_timestamp: break
610                         self._date[2] += 1
611                         self._debug_line_disposition(timestamp,'',
612                                 'new date '+`self._date`)
613
614                 self._previous_timestamp = timestamp
615
616                 l = l[l.find(' ')+1:]
617
618                 def ob_x(pirate,event):
619                         return self._onboard_event(
620                                         self._v, timestamp, pirate, event)
621                 def ob1(did): ob_x(m.group(1), did); return d(did)
622                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
623
624                 def disembark(v, timestamp, pirate, event):
625                         self._onboard_event(
626                                         v, timestamp, pirate, 'leaving '+event)
627                         del v[pirate]
628                         del self._pl[pirate]
629
630                 def disembark_me(why):
631                         self._disembark_myself()
632                         return d('disembark-me '+why)
633
634                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
635                 if m:
636                         dm = ['boarding']
637                         pn = self._myself.name
638                         vn = m.group(1)
639                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
640                         self._lastvessel = self._vessel = vn
641                         self._v = v
642                         ob_x(pn, 'we boarded')
643                         self.expire_garbage(timestamp)
644                         return d(' '.join(dm))
645
646                 if self._v is None:
647                         return d('no vessel')
648
649                 m = rm('(\\w+) has come aboard\\.$')
650                 if m: return ob1('boarded');
651
652                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
653                 if m:
654                         (who,what) = m.groups()
655                         pa = ob_x(who,'ord '+what)
656                         if what == 'Gunning':
657                                 pa.gunner = True
658                         return d('duty order')
659
660                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
661                 if m: oba('stopped'); return d("end")
662
663                 def chat_core(speaker, chan):
664                         try: pa = self._pl[speaker]
665                         except KeyError: return 'mystery'
666                         if pa.v is not self._v: return 'elsewhere'
667                         pa.last_chat_time = timestamp
668                         pa.last_chat_chan = chan
669                         self.force_redisplay()
670                         return 'here'
671
672                 def chat(chan):
673                         speaker = m.group(1)
674                         dm = chat_core(speaker, chan)
675                         return d('chat %s %s' % (chan, dm))
676
677                 def chat_metacmd(chan):
678                         (cmdr, metacmd) = m.groups()
679                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
680                         m2 = regexp.match(
681                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
682                                 metacmd)
683                         if not m2: return chat(chan)
684
685                         (cmd, pattern, targets) = m2.groups()
686                         dml = ['cmd', chan, cmd]
687
688                         if cmd == 'a': each = self._onboard_event
689                         else: each = disembark
690
691                         if cmdr == self._myself.name:
692                                 dml.append('self')
693                                 how = 'cmd: %s' % cmd
694                         else:
695                                 dml.append('other')
696                                 how = 'cmd: %s %s' % (cmd,cmdr)
697
698                         v = self._find_matching_vessel(
699                                 pattern, timestamp, cmdr, dml, create=True)
700
701                         if v is not None:
702                                 targets = targets.strip().split(' ')
703                                 dml.append(`len(targets)`)
704                                 for target in targets:
705                                         each(v, timestamp, target.title(), how)
706                                 self._vessel_updated(v, timestamp)
707
708                         dm = ' '.join(dml)
709                         chat_core(cmdr, 'cmd '+chan)
710                         return d(dm)
711
712                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
713                 if m: return ob1('general order');
714
715                 m = rm('(\\w+) says, "')
716                 if m: return chat('public')
717
718                 m = rm('(\\w+) tells ye, "')
719                 if m: return chat('private')
720
721                 m = rm('Ye told (\\w+), "(.*)"$')
722                 if m: return chat_metacmd('private')
723
724                 m = rm('(\\w+) flag officer chats, "')
725                 if m: return chat('flag officer')
726
727                 m = rm('(\\w+) officer chats, "(.*)"$')
728                 if m: return chat_metacmd('officer')
729
730                 m = rm('Ye accepted the offer to job with ')
731                 if m: return disembark_me('jobbing')
732
733                 m = rm('Ye hop on the ferry and are whisked away ')
734                 if m: return disembark_me('ferry')
735
736                 m = rm('Whisking away to yer home on the magical winds')
737                 if m: return disembark_me('home')
738
739                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
740                 if m:
741                         pl = m.group(1).split(', ')
742                         if not self._myself.name in pl:
743                                 return d('lost melee')
744                         for pn in pl:
745                                 if ' ' in pn: continue
746                                 ob_x(pn,'won melee')
747                         return d('won melee')
748
749                 m = rm('(\\w+) is eliminated\\!')
750                 if m: return ob1('eliminated in fray');
751
752                 m = rm('(\\w+) has driven \w+ from the ship\\!')
753                 if m: return ob1('boarder repelled');
754
755                 m = rm('\w+ has bested (\\w+), and turns'+
756                         ' to the rest of the ship\\.')
757                 if m: return ob1('boarder unrepelled');
758
759                 m = rm('(\\w+) has left the vessel\.')
760                 if m:
761                         pirate = m.group(1)
762                         disembark(self._v, timestamp, pirate, 'disembarked')
763                         return d('disembarked')
764
765                 return d('not-matched')
766
767         def _str_vessel(self, vn, v):
768                 s = ' vessel %s\n' % vn
769                 s += ' '*20 + "%-*s   %13s\n" % (
770                                 max_pirate_namelen, '#lastinfo',
771                                 v['#lastinfo'])
772                 assert v['#name'] == vn
773                 for pn in sorted(v.keys()):
774                         if pn.startswith('#'): continue
775                         pa = v[pn]
776                         assert pa.v == v
777                         assert self._pl[pn] == pa
778                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
779                                 (' ','G')[pa.gunner],
780                                 max_pirate_namelen, pn,
781                                 pa.last_time, pa.last_event,
782                                 pa.last_chat_time, pa.last_chat_chan)
783                 return s
784
785         def __str__(self):
786                 s = '''<ChatLogTracker
787  myself %s
788  vessel %s
789 '''                     % (self._myself.name, self._vessel)
790                 assert ((self._v is None and self._vessel is None) or
791                         (self._v is self._vl[self._vessel]))
792                 if self._vessel is not None:
793                         s += self._str_vessel(self._vessel, self._v)
794                 for vn in sorted(self._vl.keys()):
795                         if vn == self._vessel: continue
796                         s += self._str_vessel(vn, self._vl[vn])
797                 for p in self._pl:
798                         pa = self._pl[p]
799                         assert pa.v[p] is pa
800                         assert pa.v in self._vl.values()
801                 s += '>\n'
802                 return s
803
804         def catchup(self, progress=None):
805                 while True:
806                         more = self._f.readline()
807                         if not more: break
808
809                         self._progress[0] += len(more)
810                         if progress: progress.progress(*self._progress)
811
812                         self._lbuf += more
813                         if self._lbuf.endswith('\n'):
814                                 self.chatline(self._lbuf.rstrip())
815                                 self._lbuf = ''
816                                 if opts.debug >= 2:
817                                         debug(self.__str__())
818                 if progress: progress.caughtup()
819
820         def changed(self):
821                 rv = self._need_redisplay
822                 self._need_redisplay = False
823                 return rv
824         def myname(self):
825                 # returns our pirate name
826                 return self._myself.name
827         def vesselname(self):
828                 # returns the vessel name we're aboard or None
829                 return self._vessel
830         def lastvesselname(self):
831                 # returns the last vessel name we were aboard or None
832                 return self._lastvessel
833         def aboard(self, vesselname=True):
834                 # returns a list of PirateAboard the vessel
835                 #  sorted by pirate name
836                 #  you can pass this None and you'll get []
837                 #  or True for the current vessel (which is the default)
838                 if vesselname is True: v = self._v
839                 else: v = self._vl.get(vesselname.title())
840                 if v is None: return []
841                 return [ v[pn]
842                          for pn in sorted(v.keys())
843                          if not pn.startswith('#') ]
844
845 #---------- implementations of actual operation modes ----------
846
847 def do_pirate(pirates, bu):
848         print '{'
849         for pirate in pirates:
850                 info = PirateInfo(pirate)
851                 print '%s: %s,' % (`pirate`, info)
852         print '}'
853
854 def prep_crew_of(args, bu, max_age=300):
855         if len(args) != 1: bu('crew-of takes one pirate name')
856         pi = PirateInfo(args[0], max_age)
857         if pi.crew is None: return None
858         return CrewInfo(pi.crew[0], max_age)
859
860 def do_crew_of(args, bu):
861         ci = prep_crew_of(args, bu)
862         print ci
863
864 def do_standings_crew_of(args, bu):
865         ci = prep_crew_of(args, bu, 60)
866         tab = StandingsTable()
867         tab.headings()
868         for (rank, members) in ci.crew:
869                 if not members: continue
870                 tab.literalline('%s:' % rank)
871                 for p in members:
872                         pi = PirateInfo(p, random.randint(900,1800))
873                         tab.pirate(pi)
874         print tab.results()
875
876 class ProgressPrintPercentage:
877         def __init__(self, f=sys.stdout):
878                 self._f = f
879         def progress_string(self,done,total):
880                 return "scan chat logs %3d%%\r" % ((done*100) / total)
881         def progress(self,*a):
882                 self._f.write(self.progress_string(*a))
883                 self._f.flush()
884         def show_init(self, pirate, ocean):
885                 print >>self._f, 'Starting up, %s on the %s ocean' % (
886                         pirate, ocean)
887         def caughtup(self):
888                 self._f.write('                   \r')
889                 self._f.flush()
890
891 #----- modes which use the chat log parser are quite complex -----
892
893 def prep_chat_log(args, bu,
894                 progress=ProgressPrintPercentage(),
895                 max_myself_age=3600):
896         if len(args) != 1: bu('this action takes only chat log filename')
897         logfn = args[0]
898         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
899         match = regexp.match(logfn_re, logfn)
900         if not match: bu('chat log filename is not in expected format')
901         (pirate, ocean) = match.groups()
902         fetcher.default_ocean(ocean)
903
904         progress.show_init(pirate, fetcher.ocean)
905         myself = PirateInfo(pirate,max_myself_age)
906         track = ChatLogTracker(myself, logfn)
907
908         opts.debug -= 2
909         track.catchup(progress)
910         opts.debug += 2
911
912         track.force_redisplay()
913
914         return (myself, track)
915
916 def do_track_chat_log(args, bu):
917         (myself, track) = prep_chat_log(args, bu)
918         while True:
919                 track.catchup()
920                 if track.changed():
921                         print track
922                 sleep(1)
923
924 #----- ship management aid -----
925
926 class Display_dumb(ProgressPrintPercentage):
927         def __init__(self):
928                 ProgressPrintPercentage.__init__(self)
929         def show(self, s):
930                 print '\n\n', s;
931         def realstart(self):
932                 pass
933
934 class Display_overwrite(ProgressPrintPercentage):
935         def __init__(self):
936                 ProgressPrintPercentage.__init__(self)
937
938                 null = file('/dev/null','w')
939                 curses.setupterm(fd=null.fileno())
940
941                 self._clear = curses.tigetstr('clear')
942                 if not self._clear:
943                         self._debug('missing clear!')
944                         self.show = Display_dumb.show
945                         return
946
947                 self._t = {'el':'', 'ed':''}
948                 if not self._init_sophisticated():
949                         for k in self._t.keys(): self._t[k] = ''
950                         self._t['ho'] = self._clear
951
952         def _debug(self,m): debug('display overwrite: '+m)
953
954         def _init_sophisticated(self):
955                 for k in self._t.keys():
956                         s = curses.tigetstr(k)
957                         self._t[k] = s
958                 self._t['ho'] = curses.tigetstr('ho')
959                 if not self._t['ho']:
960                         cup = curses.tigetstr('cup')
961                         self._t['ho'] = curses.tparm(cup,0,0)
962                 missing = [k for k in self._t.keys() if not self._t[k]]
963                 if missing:
964                         self.debug('missing '+(' '.join(missing)))
965                         return 0
966                 return 1
967
968         def show(self, s):
969                 w = sys.stdout.write
970                 def wti(k): w(self._t[k])
971
972                 wti('ho')
973                 nl = ''
974                 for l in s.rstrip().split('\n'):
975                         w(nl)
976                         w(l)
977                         wti('el')
978                         nl = '\r\n'
979                 wti('ed')
980                 w(' ')
981                 sys.stdout.flush()
982
983         def realstart(self):
984                 sys.stdout.write(self._clear)
985                 sys.stdout.flush()
986                         
987
988 def do_ship_aid(args, bu):
989         if opts.ship_duty is None: opts.ship_duty = True
990
991         displayer = globals()['Display_'+opts.display]()
992         rotate_nya = '/-\\'
993
994         (myself, track) = prep_chat_log(args, bu, progress=displayer)
995
996         def timeevent(t,e):
997                 if t is None: return ' ' * 22
998                 return " %-4s %-16s" % (format_time_interval(now - t),e)
999
1000         displayer.realstart()
1001
1002         def find_vessel():
1003                 vn = track.vesselname()
1004                 if vn: return (vn, " on board the %s" % vn)
1005                 vn = track.lastvesselname()
1006                 if vn: return (vn, " ashore from the %s" % vn)
1007                 return (None, " not on a vessel")
1008
1009         displayer.show(track.myname() + find_vessel()[1] + '...')
1010
1011         while True:
1012                 track.catchup()
1013                 now = time.time()
1014
1015                 (vn, s) = find_vessel()
1016                 s = track.myname() + s
1017                 s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")
1018
1019                 tbl = StandingsTable()
1020                 tbl.headings()
1021
1022                 aboard = track.aboard(vn)
1023
1024                 for pa in aboard:
1025                         pi = pa.pirate_info()
1026
1027                         xs = ''
1028                         if pa.gunner: xs += 'G '
1029                         else: xs += '  '
1030                         xs += timeevent(pa.last_time, pa.last_event)
1031                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
1032
1033                         if pi is None:
1034                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1035                         else:
1036                                 tbl.pirate(pi, xs)
1037
1038                 s += tbl.results()
1039
1040                 displayer.show(s)
1041                 sleep(1)
1042                 rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1043
1044 #---------- main program ----------
1045
1046 def main():
1047         global opts, fetcher
1048
1049         pa = OptionParser(
1050 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1051 actions:
1052  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1053  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1054  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1055  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1056  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1057
1058 display modes (for --display) apply to ship-aid:
1059  --display=dumb       just print new information, scrolling the screen
1060  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
1061 ''')
1062         ao = pa.add_option
1063         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1064                 help='select ocean OCEAN')
1065         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1066                 default='~/.yoweb-scrape-cache',
1067                 help='cache yoweb pages in DIR')
1068         ao('-D','--debug', action='count', dest='debug', default=0,
1069                 help='enable debugging output')
1070         ao('--debug-fd', type='int', dest='debug_fd',
1071                 help='write any debugging output to specified fd')
1072         ao('-q','--quiet', action='store_true', dest='quiet',
1073                 help='suppress warning output')
1074         ao('--display', action='store', dest='display',
1075                 type='choice', choices=['dumb','overwrite'],
1076                 help='how to display ship aid')
1077
1078         ao('--ship-duty', action='store_true', dest='ship_duty',
1079                 help='show ship duty station puzzles')
1080         ao('--all-puzzles', action='store_false', dest='ship_duty',
1081                 help='show all puzzles, not just ship duty stations')
1082
1083         (opts,args) = pa.parse_args()
1084         random.seed()
1085
1086         if len(args) < 1:
1087                 pa.error('need a mode argument')
1088
1089         if opts.debug_fd is not None:
1090                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1091         else:
1092                 opts.debug_file = sys.stdout
1093
1094         mode = args[0]
1095         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1096         try: mode_fn = globals()[mode_fn_name]
1097         except KeyError: pa.error('unknown mode "%s"' % mode)
1098
1099         # fixed parameters
1100         opts.min_max_age = 60
1101         opts.expire_age = 3600
1102         opts.ship_reboard_clearout = 3600
1103
1104         if opts.cache_dir.startswith('~/'):
1105                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1106
1107         if opts.display is None:
1108                 if ((opts.debug > 0 and opts.debug_fd is None)
1109                     or not os.isatty(sys.stdout.fileno())):
1110                         opts.display = 'dumb'
1111                 else:
1112                         opts.display = 'overwrite'
1113
1114         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1115
1116         mode_fn(args[1:], pa.error)
1117
1118 main()