chiark / gitweb /
40cdad56c80dbea539ea2c4cc86c01828d35a2c1
[ypp-sc-tools.main.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 from optparse import OptionParser
18
19 from BeautifulSoup import BeautifulSoup
20
21 opts = None
22
23 #---------- YPP parameters and arrays ----------
24
25 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
26         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
27         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
28         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
29
30 standingvals = ('Able/Distinguished/Respected/Master'+
31                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
32
33 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
34
35 max_pirate_namelen = 12
36
37
38 #---------- general utilities ----------
39
40 def debug(m):
41         if opts.debug > 0:
42                 print >>opts.debug_file, m
43
44 def sleep(seconds):
45         if opts.debug > 0:
46                 opts.debug_file.flush()
47         time.sleep(seconds)
48
49 def format_time_interval(ti):
50         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
51         if ti < 7200: return '%2dm' % (ti / 60)
52         if ti < 86400: return '%dh' % (ti / 3600)
53         return '%dd' % (ti / 86400)
54
55 #---------- caching and rate-limiting data fetcher ----------
56
57 class Fetcher:
58         def __init__(self, ocean, cachedir):
59                 debug('Fetcher init %s' % cachedir)
60                 self.ocean = ocean
61                 self.cachedir = cachedir
62                 try: os.mkdir(cachedir)
63                 except (OSError,IOError), oe:
64                         if oe.errno != errno.EEXIST: raise
65                 self._cache_scan(time.time())
66
67         def default_ocean(self, ocean='ice'):
68                 if self.ocean is None:
69                         self.ocean = ocean
70
71         def _cache_scan(self, now):
72                 # returns list of ages, unsorted
73                 ages = []
74                 debug('Fetcher   scan_cache')
75                 for leaf in os.listdir(self.cachedir):
76                         if not leaf.startswith('#'): continue
77                         path = self.cachedir + '/' + leaf
78                         try: s = os.stat(path)
79                         except (OSError,IOError), oe:
80                                 if oe.errno != errno.ENOENT: raise
81                                 continue
82                         age = now - s.st_mtime
83                         if age > opts.expire_age:
84                                 debug('Fetcher    expire %d %s' % (age, path))
85                                 try: os.remove(path)
86                                 except (OSError,IOError), oe:
87                                         if oe.errno != errno.ENOENT: raise
88                                 continue
89                         ages.append(age)
90                 return ages
91
92         def need_wait(self, now, imaginary=[]):
93                 ages = self._cache_scan(now)
94                 ages += imaginary
95                 ages.sort()
96                 debug('Fetcher   ages ' + `ages`)
97                 min_age = 1
98                 need_wait = 0
99                 for age in ages:
100                         if age < min_age and age < 300:
101                                 debug('Fetcher   morewait min=%d age=%d' %
102                                         (min_age, age))
103                                 need_wait = max(need_wait, min_age - age)
104                         min_age += 3
105                         min_age *= 1.25
106                 return need_wait
107
108         def _rate_limit_cache_clean(self, now):
109                 need_wait = self.need_wait(now)
110                 if need_wait > 0:
111                         debug('Fetcher   wait %d' % need_wait)
112                         sleep(need_wait)
113
114         def fetch(self, url, max_age):
115                 debug('Fetcher fetch %s' % url)
116                 cache_corename = urllib.quote_plus(url)
117                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
118                 try: f = file(cache_item, 'r')
119                 except (OSError,IOError), oe:
120                         if oe.errno != errno.ENOENT: raise
121                         f = None
122                 now = time.time()
123                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
124                 if f is not None:
125                         s = os.fstat(f.fileno())
126                         age = now - s.st_mtime
127                         if age > max_age:
128                                 debug('Fetcher  stale %d < %d'% (max_age, age))
129                                 f = None
130                 if f is not None:
131                         data = f.read()
132                         f.close()
133                         debug('Fetcher  cached %d > %d' % (max_age, age))
134                         return data
135
136                 debug('Fetcher  fetch')
137                 self._rate_limit_cache_clean(now)
138
139                 stream = urllib2.urlopen(url)
140                 data = stream.read()
141                 cache_tmp = "%s/#%s~%d#" % (
142                         self.cachedir, cache_corename, os.getpid())
143                 f = file(cache_tmp, 'w')
144                 f.write(data)
145                 f.close()
146                 os.rename(cache_tmp, cache_item)
147                 debug('Fetcher  stored')
148                 return data
149
150         def yoweb(self, kind, tail, max_age):
151                 self.default_ocean()
152                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
153                         self.ocean, kind, tail)
154                 return self.fetch(url, max_age)
155
156 #---------- logging assistance for troubled screenscrapers ----------
157
158 class SoupLog:
159         def __init__(self):
160                 self.msgs = [ ]
161         def msg(self, m):
162                 self.msgs.append(m)
163         def soupm(self, obj, m):
164                 self.msg(m + '; in ' + `obj`)
165         def needs_msgs(self, child_souplog):
166                 self.msgs += child_souplog.msgs
167                 child_souplog.msgs = [ ]
168
169 def soup_text(obj):
170         str = ''.join(obj.findAll(text=True))
171         return str.strip()
172
173 class SomethingSoupInfo(SoupLog):
174         def __init__(self, kind, tail, max_age):
175                 SoupLog.__init__(self)
176                 html = fetcher.yoweb(kind, tail, max_age)
177                 self._soup = BeautifulSoup(html,
178                         convertEntities=BeautifulSoup.HTML_ENTITIES
179                         )
180
181 #---------- scraper for pirate pages ----------
182
183 class PirateInfo(SomethingSoupInfo):
184         # Public data members:
185         #  pi.standings = { 'Treasure Haul': 'Able' ... }
186         #  pi.name = name
187         #  pi.crew = (id, name)
188         #  pi.flag = (id, name)
189         #  pi.msgs = [ 'message describing problem with scrape' ]
190                 
191         def __init__(self, pirate, max_age=300):
192                 SomethingSoupInfo.__init__(self,
193                         'pirate.wm?target=', pirate, max_age)
194                 self.name = pirate
195                 self._find_standings()
196                 self.crew = self._find_crewflag('crew',
197                         '^/yoweb/crew/info\\.wm')
198                 self.flag = self._find_crewflag('flag',
199                         '^/yoweb/flag/info\\.wm')
200
201         def _find_standings(self):
202                 imgs = self._soup.findAll('img',
203                         src=regexp.compile('/yoweb/images/stat.*'))
204                 re = regexp.compile(
205 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
206                         )
207                 standings = { }
208
209                 for skill in puzzles:
210                         standings[skill] = [ ]
211
212                 skl = SoupLog()
213
214                 for img in imgs:
215                         try: puzzle = img['alt']
216                         except KeyError: continue
217
218                         if not puzzle in puzzles:
219                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
220                                 continue
221                         key = img.findParent('td')
222                         if key is None:
223                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
224                                 continue
225                         valelem = key.findNextSibling('td')
226                         if valelem is None:
227                                 skl.soupm(key, 'puzzle missing sibling "%s"'
228                                         % puzzle)
229                                 continue
230                         valstr = soup_text(valelem)
231                         match = re.match(valstr)
232                         if match is None:
233                                 skl.soupm(key, ('puzzle "%s" unparseable'+
234                                         ' standing "%s"') % (puzzle, valstr))
235                                 continue
236                         standing = match.group(match.lastindex)
237                         standings[puzzle].append(standing)
238
239                 self.standings = { }
240
241                 for puzzle in puzzles:
242                         sl = standings[puzzle]
243                         if len(sl) > 1:
244                                 skl.msg('puzzle "%s" multiple standings %s' %
245                                                 (puzzle, `sl`))
246                                 continue
247                         if not sl:
248                                 skl.msg('puzzle "%s" no standing found' % puzzle)
249                                 continue
250                         standing = sl[0]
251                         for i in range(0, len(standingvals)):
252                                 if standing == standingvals[i]:
253                                         self.standings[puzzle] = i
254                         if not puzzle in self.standings:
255                                 skl.msg('puzzle "%s" unknown standing "%s"' %
256                                         (puzzle, standing))
257
258                 all_standings_ok = True
259                 for puzzle in puzzles:
260                         if not puzzle in self.standings:
261                                 self.needs_msgs(skl)
262
263         def _find_crewflag(self, cf, yoweb_re):
264                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
265                 if len(things) != 1:
266                         self.msg('zero or several %s id references found' % cf)
267                         return None
268                 thing = things[0]
269                 id_re = '\\b%sid\\=(\\w+)$' % cf
270                 id_haystack = thing['href']
271                 match = regexp.compile(id_re).search(id_haystack)
272                 if match is None:
273                         self.soupm(thing, ('incomprehensible %s id ref'+
274                                 ' (%s in %s)') % (cf, id_re, id_haystack))
275                         return None
276                 name = soup_text(thing)
277                 return (match.group(1), name)
278
279         def __str__(self):
280                 return `(self.crew, self.flag, self.standings, self.msgs)`
281
282 #---------- scraper for crew pages ----------
283
284 class CrewInfo(SomethingSoupInfo):
285         # Public data members:
286         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
287         #              ('Senior Officer', [...]),
288         #               ... ]
289         #  pi.msgs = [ 'message describing problem with scrape' ]
290
291         def __init__(self, crewid, max_age=300):
292                 SomethingSoupInfo.__init__(self,
293                         'crew/info.wm?crewid=', crewid, max_age)
294                 self._find_crew()
295
296         def _find_crew(self):
297                 self.crew = []
298                 capts = self._soup.findAll('img',
299                         src='/yoweb/images/crew-captain.png')
300                 if len(capts) != 1:
301                         self.msg('crew members: no. of captain images != 1')
302                         return
303                 tbl = capts[0]
304                 while not tbl.find('a', href=pirate_ref_re):
305                         tbl = tbl.findParent('table')
306                         if not tbl:
307                                 self.msg('crew members: cannot find table')
308                                 return
309                 current_rank_crew = None
310                 crew_rank_re = regexp.compile('/yoweb/images/crew')
311                 for row in tbl.contents:
312                         # findAll(recurse=False)
313                         if isinstance(row,basestring):
314                                 continue
315
316                         is_rank = row.find('img', attrs={'src': crew_rank_re})
317                         if is_rank:
318                                 rank = soup_text(row)
319                                 current_rank_crew = []
320                                 self.crew.append((rank, current_rank_crew))
321                                 continue
322                         for cell in row.findAll('a', href=pirate_ref_re):
323                                 if current_rank_crew is None:
324                                         self.soupm(cell, 'crew members: crew'
325                                                 ' before rank')
326                                         continue
327                                 current_rank_crew.append(soup_text(cell))
328
329         def __str__(self):
330                 return `(self.crew, self.msgs)`
331
332 #---------- pretty-printer for tables of pirate puzzle standings ----------
333
334 class StandingsTable:
335         def __init__(self, use_puzzles=None, col_width=6, gap_every=5):
336                 if use_puzzles is None:
337                         if opts.ship_duty:
338                                 use_puzzles=[
339                                         'Navigating','Battle Navigation',
340                                         'Gunning',
341                                         ['Sailing','Rigging'],
342                                         'Bilging',
343                                         'Carpentry',
344                                         'Treasure Haul'
345                                 ]
346                         else:
347                                 use_puzzles=puzzles
348                 self._puzzles = use_puzzles
349                 self.s = ''
350                 self._cw = col_width-1
351                 self._gap_every = gap_every
352                 self._linecount = 0
353
354         def _pline(self, pirate, puzstrs, extra):
355                 if (self._linecount > 0
356                     and self._gap_every is not None
357                     and not (self._linecount % self._gap_every)):
358                         self.s += '\n'
359                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
360                 for v in puzstrs:
361                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
362                 if extra:
363                         self.s += ' ' + extra
364                 self.s += '\n'
365                 self._linecount += 1
366
367         def _puzstr(self, pi, puzzle):
368                 if not isinstance(puzzle,list): puzzle = [puzzle]
369                 try: standing = max([pi.standings[p] for p in puzzle])
370                 except KeyError: return '?'
371                 if not standing: return ''
372                 s = ''
373                 if self._cw > 4:
374                         c1 = standingvals[standing][0]
375                         if standing < 3: c1 = c1.lower() # 3 = Master
376                         s += `standing`
377                 if self._cw > 5:
378                         s += ' '
379                 s += '*' * (standing / 2)
380                 s += '+' * (standing % 2)
381                 return s
382
383         def headings(self):
384                 def puzn_redact(name):
385                         if isinstance(name,list):
386                                 return '/'.join(
387                                         ["%.*s" % (self._cw/2, puzn_redact(n))
388                                          for n in name])
389                         spc = name.find(' ')
390                         if spc < 0: return name
391                         return name[0:min(4,spc)] + name[spc+1:]
392                 self._linecount = -2
393                 self._pline('', map(puzn_redact, self._puzzles), None)
394                 self._linecount = 0
395         def literalline(self, line):
396                 self.s += line + '\n'
397                 self._linecount = 0
398         def pirate_dummy(self, name, standingstring, extra=None):
399                 self._pline(name, standingstring * len(self._puzzles), extra)
400         def pirate(self, pi, extra=None):
401                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
402                 self._pline(pi.name, puzstrs, extra)
403
404         def results(self):
405                 return self.s
406
407 #---------- chat log parser ----------
408
409 class PirateAboard:
410         # This is essentially a transparent, dumb, data class.
411         #  pa.v
412         #  pa.name
413         #  pa.last_time
414         #  pa.last_event
415         #  pa.gunner
416         #  pa.last_chat_time
417         #  pa.last_chat_chan
418         #  pa.pi
419
420         def __init__(pa, pn, v, time, event):
421                 pa.name = pn
422                 pa.v = v
423                 pa.last_time = time
424                 pa.last_event = event
425                 pa.last_chat_time = None
426                 pa.last_chat_chan = None
427                 pa.gunner = False
428                 pa.pi = None
429
430         def pirate_info(pa):
431                 now = time.time()
432                 if pa.pi:
433                         age = now - pa.pi_fetched
434                         guide = random.randint(120,240)
435                         if age <= guide:
436                                 return pa.pi
437                         debug('PirateAboard refresh %d > %d  %s' % (
438                                 age, guide, pa.name))
439                         imaginary = [2,6]
440                 else:
441                         imaginary = [1]
442                 wait = fetcher.need_wait(now, imaginary)
443                 if wait:
444                         debug('PirateAboard fetcher not ready %d' % wait)
445                         return pa.pi
446                 pa.pi = PirateInfo(pa.name, 600)
447                 pa.pi_fetched = now
448                 return pa.pi
449
450 class ChatLogTracker:
451         # This is quite complex so we make it opaque.  Use the
452         # official invokers, accessors etc.
453
454         def __init__(self, myself_pi, logfn):
455                 self._pl = {}   # self._pl['Pirate'] =
456                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
457                                 # self._vl['Vessel']['#lastinfo']
458                                 # self._vl['Vessel']['#name']
459                                 # self._v = self._vl[self._vessel]
460                 self._date = None
461                 self._myself = myself_pi
462                 self._f = file(logfn)
463                 self._lbuf = ''
464                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
465                 self._disembark_myself()
466                 self._need_redisplay = False
467                 self._lastvessel = None
468
469         def _disembark_myself(self):
470                 self._v = None
471                 self._vessel = None
472                 self.force_redisplay()
473
474         def force_redisplay(self):
475                 self._need_redisplay = True
476
477         def _vessel_updated(self, v, timestamp):
478                 v['#lastinfo'] = timestamp
479                 self.force_redisplay()
480
481         def _onboard_event(self,v,timestamp,pirate,event):
482                 pa = self._pl.get(pirate, None)
483                 if pa is not None and pa.v is v:
484                         pa.last_time = timestamp
485                         pa.last_event = event
486                 else:
487                         if pa is not None: del pa.v[pirate]
488                         pa = PirateAboard(pirate, v, timestamp, event)
489                         self._pl[pirate] = pa
490                         v[pirate] = pa
491                 self._vessel_updated(v, timestamp)
492                 return pa
493
494         def _trash_vessel(self, v):
495                 for pn in v:
496                         if pn.startswith('#'): continue
497                         del self._pl[pn]
498                 vn = v['#name']
499                 del self._vl[vn]
500                 if v is self._v: self._disembark_myself()
501                 self.force_redisplay()
502
503         def _vessel_stale(self, v, timestamp):
504                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
505
506         def _vessel_check_expire(self, v, timestamp):
507                 if not self._vessel_stale(v, timestamp):
508                         return v
509                 self._debug_line_disposition(timestamp,'',
510                         'stale-reset ' + v['#name'])
511                 self._trash_vessel(v)
512                 return None
513
514         def expire_garbage(self, timestamp):
515                 for v in self._vl.values():
516                         self._vessel_check_expire(v, timestamp)
517
518         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
519                 v = self._vl.get(vn, None)
520                 if v is not None:
521                         v = self._vessel_check_expire(v, timestamp)
522                 if v is not None:
523                         dml.append('found')
524                         return v
525                 if not create:
526                         dml.append('no')
527                 dml.append('new')
528                 self._vl[vn] = v = { '#name': vn }
529                 self._vessel_updated(v, timestamp)
530                 return v
531
532         def _find_matching_vessel(self, pattern, timestamp, cmdr,
533                                         dml=[], create=False):
534                 # use when a commander pirate `cmdr' specified a vessel
535                 #  by name `pattern' (either may be None)
536                 # if create is true, will create the vessel
537                 #  record if an exact name is specified
538
539                 if (pattern is not None and
540                     not '*' in pattern
541                     and len(pattern.split(' ')) == 2):
542                         vn = pattern.title()
543                         dml.append('exact')
544                         return self._vessel_lookup(
545                                 vn, timestamp, dml=dml, create=create)
546
547                 if pattern is None:
548                         pattern_check = lambda vn: True
549                 else:
550                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.+')
551                         pattern_check = regexp.compile(re, regexp.I).match
552
553                 tries = []
554
555                 cmdr_pa = self._pl.get(cmdr, None)
556                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
557
558                 tries.append((self._v, 'here'))
559                 tried_vns = []
560
561                 for (v, dm) in tries:
562                         if v is None: dml.append(dm+'?'); continue
563                         
564                         vn = v['#name']
565                         if not pattern_check(vn):
566                                 tried_vns.append(vn)
567                                 dml.append(dm+'#')
568                                 continue
569
570                         dml.append(dm+'!')
571                         return v
572
573                 if pattern is not None and '*' in pattern:
574                         search = [
575                                 (vn,v)
576                                 for (vn,v) in self._vl.iteritems()
577                                 if not self._vessel_stale(v, timestamp)
578                                 if pattern_check(vn)
579                                 ]
580                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
581                         #       re,
582                         #       '/'.join(tried_vns),
583                         #       '/'.join([vn for (vn,v) in search])))
584
585                         if len(search)==1:
586                                 dml.append('one')
587                                 return search[0][1]
588                         elif search:
589                                 dml.append('many')
590                         else:
591                                 dml.append('none')
592
593         def _debug_line_disposition(self,timestamp,l,m):
594                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
595
596         def chatline(self,l):
597                 rm = lambda re: regexp.match(re,l)
598                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
599                 timestamp = None
600
601                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
602                 if m:
603                         self._date = [int(x) for x in m.groups()]
604                         self._previous_timestamp = None
605                         return d('date '+`self._date`)
606
607                 if self._date is None:
608                         return d('date unset')
609
610                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
611                 if not m:
612                         return d('no timestamp')
613
614                 while True:
615                         time_tuple = (self._date +
616                                       [int(x) for x in m.groups()] +
617                                       [-1,-1,-1])
618                         timestamp = time.mktime(time_tuple)
619                         if timestamp >= self._previous_timestamp: break
620                         self._date[2] += 1
621                         self._debug_line_disposition(timestamp,'',
622                                 'new date '+`self._date`)
623
624                 self._previous_timestamp = timestamp
625
626                 l = l[l.find(' ')+1:]
627
628                 def ob_x(pirate,event):
629                         return self._onboard_event(
630                                         self._v, timestamp, pirate, event)
631                 def ob1(did): ob_x(m.group(1), did); return d(did)
632                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
633
634                 def disembark(v, timestamp, pirate, event):
635                         self._onboard_event(
636                                         v, timestamp, pirate, 'leaving '+event)
637                         del v[pirate]
638                         del self._pl[pirate]
639
640                 def disembark_me(why):
641                         self._disembark_myself()
642                         return d('disembark-me '+why)
643
644                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
645                 if m:
646                         dm = ['boarding']
647                         pn = self._myself.name
648                         vn = m.group(1)
649                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
650                         self._lastvessel = self._vessel = vn
651                         self._v = v
652                         ob_x(pn, 'we boarded')
653                         self.expire_garbage(timestamp)
654                         return d(' '.join(dm))
655
656                 if self._v is None:
657                         return d('no vessel')
658
659                 m = rm('(\\w+) has come aboard\\.$')
660                 if m: return ob1('boarded');
661
662                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
663                 if m:
664                         (who,what) = m.groups()
665                         pa = ob_x(who,'ord '+what)
666                         if what == 'Gunning':
667                                 pa.gunner = True
668                         return d('duty order')
669
670                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
671                 if m: oba('stopped'); return d("end")
672
673                 def chat_core(speaker, chan):
674                         try: pa = self._pl[speaker]
675                         except KeyError: return 'mystery'
676                         if pa.v is not self._v: return 'elsewhere'
677                         pa.last_chat_time = timestamp
678                         pa.last_chat_chan = chan
679                         self.force_redisplay()
680                         return 'here'
681
682                 def chat(chan):
683                         speaker = m.group(1)
684                         dm = chat_core(speaker, chan)
685                         return d('chat %s %s' % (chan, dm))
686
687                 def chat_metacmd(chan):
688                         (cmdr, metacmd) = m.groups()
689                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
690                         m2 = regexp.match(
691                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
692                                 metacmd)
693                         if not m2: return chat(chan)
694
695                         (cmd, pattern, targets) = m2.groups()
696                         dml = ['cmd', chan, cmd]
697
698                         if cmd == 'a': each = self._onboard_event
699                         else: each = disembark
700
701                         if cmdr == self._myself.name:
702                                 dml.append('self')
703                                 how = 'cmd: %s' % cmd
704                         else:
705                                 dml.append('other')
706                                 how = 'cmd: %s %s' % (cmd,cmdr)
707
708                         v = self._find_matching_vessel(
709                                 pattern, timestamp, cmdr, dml, create=True)
710
711                         if v is not None:
712                                 targets = targets.strip().split(' ')
713                                 dml.append(`len(targets)`)
714                                 for target in targets:
715                                         each(v, timestamp, target.title(), how)
716                                 self._vessel_updated(v, timestamp)
717
718                         dm = ' '.join(dml)
719                         chat_core(cmdr, 'cmd '+chan)
720                         return d(dm)
721
722                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
723                 if m: return ob1('general order');
724
725                 m = rm('(\\w+) says, "')
726                 if m: return chat('public')
727
728                 m = rm('(\\w+) tells ye, "')
729                 if m: return chat('private')
730
731                 m = rm('Ye told (\\w+), "(.*)"$')
732                 if m: return chat_metacmd('private')
733
734                 m = rm('(\\w+) flag officer chats, "')
735                 if m: return chat('flag officer')
736
737                 m = rm('(\\w+) officer chats, "(.*)"$')
738                 if m: return chat_metacmd('officer')
739
740                 m = rm('Ye accepted the offer to job with ')
741                 if m: return disembark_me('jobbing')
742
743                 m = rm('Ye hop on the ferry and are whisked away ')
744                 if m: return disembark_me('ferry')
745
746                 m = rm('Whisking away to yer home on the magical winds')
747                 if m: return disembark_me('home')
748
749                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
750                 if m:
751                         pl = m.group(1).split(', ')
752                         if not self._myself.name in pl:
753                                 return d('lost melee')
754                         for pn in pl:
755                                 if ' ' in pn: continue
756                                 ob_x(pn,'won melee')
757                         return d('won melee')
758
759                 m = rm('(\\w+) is eliminated\\!')
760                 if m: return ob1('eliminated in fray');
761
762                 m = rm('(\\w+) has driven \w+ from the ship\\!')
763                 if m: return ob1('boarder repelled');
764
765                 m = rm('\w+ has bested (\\w+), and turns'+
766                         ' to the rest of the ship\\.')
767                 if m: return ob1('boarder unrepelled');
768
769                 m = rm('(\\w+) has left the vessel\.')
770                 if m:
771                         pirate = m.group(1)
772                         disembark(self._v, timestamp, pirate, 'disembarked')
773                         return d('disembarked')
774
775                 return d('not-matched')
776
777         def _str_vessel(self, vn, v):
778                 s = ' vessel %s\n' % vn
779                 s += ' '*20 + "%-*s   %13s\n" % (
780                                 max_pirate_namelen, '#lastinfo',
781                                 v['#lastinfo'])
782                 assert v['#name'] == vn
783                 for pn in sorted(v.keys()):
784                         if pn.startswith('#'): continue
785                         pa = v[pn]
786                         assert pa.v == v
787                         assert self._pl[pn] == pa
788                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
789                                 (' ','G')[pa.gunner],
790                                 max_pirate_namelen, pn,
791                                 pa.last_time, pa.last_event,
792                                 pa.last_chat_time, pa.last_chat_chan)
793                 return s
794
795         def __str__(self):
796                 s = '''<ChatLogTracker
797  myself %s
798  vessel %s
799 '''                     % (self._myself.name, self._vessel)
800                 assert ((self._v is None and self._vessel is None) or
801                         (self._v is self._vl[self._vessel]))
802                 if self._vessel is not None:
803                         s += self._str_vessel(self._vessel, self._v)
804                 for vn in sorted(self._vl.keys()):
805                         if vn == self._vessel: continue
806                         s += self._str_vessel(vn, self._vl[vn])
807                 for p in self._pl:
808                         pa = self._pl[p]
809                         assert pa.v[p] is pa
810                         assert pa.v in self._vl.values()
811                 s += '>\n'
812                 return s
813
814         def catchup(self, progress=None):
815                 while True:
816                         more = self._f.readline()
817                         if not more: break
818
819                         self._progress[0] += len(more)
820                         if progress: progress.progress(*self._progress)
821
822                         self._lbuf += more
823                         if self._lbuf.endswith('\n'):
824                                 self.chatline(self._lbuf.rstrip())
825                                 self._lbuf = ''
826                                 if opts.debug >= 2:
827                                         debug(self.__str__())
828                 if progress: progress.caughtup()
829
830         def changed(self):
831                 rv = self._need_redisplay
832                 self._need_redisplay = False
833                 return rv
834         def myname(self):
835                 # returns our pirate name
836                 return self._myself.name
837         def vesselname(self):
838                 # returns the vessel name we're aboard or None
839                 return self._vessel
840         def lastvesselname(self):
841                 # returns the last vessel name we were aboard or None
842                 return self._lastvessel
843         def aboard(self, vesselname=True):
844                 # returns a list of PirateAboard the vessel
845                 #  sorted by pirate name
846                 #  you can pass this None and you'll get []
847                 #  or True for the current vessel (which is the default)
848                 if vesselname is True: v = self._v
849                 else: v = self._vl.get(vesselname.title())
850                 if v is None: return []
851                 return [ v[pn]
852                          for pn in sorted(v.keys())
853                          if not pn.startswith('#') ]
854
855 #---------- implementations of actual operation modes ----------
856
857 def do_pirate(pirates, bu):
858         print '{'
859         for pirate in pirates:
860                 info = PirateInfo(pirate)
861                 print '%s: %s,' % (`pirate`, info)
862         print '}'
863
864 def prep_crew_of(args, bu, max_age=300):
865         if len(args) != 1: bu('crew-of takes one pirate name')
866         pi = PirateInfo(args[0], max_age)
867         if pi.crew is None: return None
868         return CrewInfo(pi.crew[0], max_age)
869
870 def do_crew_of(args, bu):
871         ci = prep_crew_of(args, bu)
872         print ci
873
874 def do_standings_crew_of(args, bu):
875         ci = prep_crew_of(args, bu, 60)
876         tab = StandingsTable()
877         tab.headings()
878         for (rank, members) in ci.crew:
879                 if not members: continue
880                 tab.literalline('')
881                 tab.literalline('%s:' % rank)
882                 for p in members:
883                         pi = PirateInfo(p, random.randint(900,1800))
884                         tab.pirate(pi)
885         print tab.results()
886
887 class ProgressPrintPercentage:
888         def __init__(self, f=sys.stdout):
889                 self._f = f
890         def progress_string(self,done,total):
891                 return "scan chat logs %3d%%\r" % ((done*100) / total)
892         def progress(self,*a):
893                 self._f.write(self.progress_string(*a))
894                 self._f.flush()
895         def show_init(self, pirate, ocean):
896                 print >>self._f, 'Starting up, %s on the %s ocean' % (
897                         pirate, ocean)
898         def caughtup(self):
899                 self._f.write('                   \r')
900                 self._f.flush()
901
902 #----- modes which use the chat log parser are quite complex -----
903
904 def prep_chat_log(args, bu,
905                 progress=ProgressPrintPercentage(),
906                 max_myself_age=3600):
907         if len(args) != 1: bu('this action takes only chat log filename')
908         logfn = args[0]
909         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
910         match = regexp.match(logfn_re, logfn)
911         if not match: bu('chat log filename is not in expected format')
912         (pirate, ocean) = match.groups()
913         fetcher.default_ocean(ocean)
914
915         progress.show_init(pirate, fetcher.ocean)
916         myself = PirateInfo(pirate,max_myself_age)
917         track = ChatLogTracker(myself, logfn)
918
919         opts.debug -= 2
920         track.catchup(progress)
921         opts.debug += 2
922
923         track.force_redisplay()
924
925         return (myself, track)
926
927 def do_track_chat_log(args, bu):
928         (myself, track) = prep_chat_log(args, bu)
929         while True:
930                 track.catchup()
931                 if track.changed():
932                         print track
933                 sleep(1)
934
935 #----- ship management aid -----
936
937 class Display_dumb(ProgressPrintPercentage):
938         def __init__(self):
939                 ProgressPrintPercentage.__init__(self)
940         def show(self, s):
941                 print '\n\n', s;
942         def realstart(self):
943                 pass
944
945 class Display_overwrite(ProgressPrintPercentage):
946         def __init__(self):
947                 ProgressPrintPercentage.__init__(self)
948
949                 null = file('/dev/null','w')
950                 curses.setupterm(fd=null.fileno())
951
952                 self._clear = curses.tigetstr('clear')
953                 if not self._clear:
954                         self._debug('missing clear!')
955                         self.show = Display_dumb.show
956                         return
957
958                 self._t = {'el':'', 'ed':''}
959                 if not self._init_sophisticated():
960                         for k in self._t.keys(): self._t[k] = ''
961                         self._t['ho'] = self._clear
962
963         def _debug(self,m): debug('display overwrite: '+m)
964
965         def _init_sophisticated(self):
966                 for k in self._t.keys():
967                         s = curses.tigetstr(k)
968                         self._t[k] = s
969                 self._t['ho'] = curses.tigetstr('ho')
970                 if not self._t['ho']:
971                         cup = curses.tigetstr('cup')
972                         self._t['ho'] = curses.tparm(cup,0,0)
973                 missing = [k for k in self._t.keys() if not self._t[k]]
974                 if missing:
975                         self.debug('missing '+(' '.join(missing)))
976                         return 0
977                 return 1
978
979         def show(self, s):
980                 w = sys.stdout.write
981                 def wti(k): w(self._t[k])
982
983                 wti('ho')
984                 nl = ''
985                 for l in s.rstrip().split('\n'):
986                         w(nl)
987                         w(l)
988                         wti('el')
989                         nl = '\r\n'
990                 wti('ed')
991                 w(' ')
992                 sys.stdout.flush()
993
994         def realstart(self):
995                 sys.stdout.write(self._clear)
996                 sys.stdout.flush()
997                         
998
999 def do_ship_aid(args, bu):
1000         if opts.ship_duty is None: opts.ship_duty = True
1001
1002         displayer = globals()['Display_'+opts.display]()
1003         rotate_nya = '/-\\'
1004
1005         (myself, track) = prep_chat_log(args, bu, progress=displayer)
1006
1007         def timeevent(t,e):
1008                 if t is None: return ' ' * 22
1009                 return " %-4s %-16s" % (format_time_interval(now - t),e)
1010
1011         displayer.realstart()
1012
1013         def find_vessel():
1014                 vn = track.vesselname()
1015                 if vn: return (vn, " on board the %s" % vn)
1016                 vn = track.lastvesselname()
1017                 if vn: return (vn, " ashore from the %s" % vn)
1018                 return (None, " not on a vessel")
1019
1020         displayer.show(track.myname() + find_vessel()[1] + '...')
1021
1022         while True:
1023                 track.catchup()
1024                 now = time.time()
1025
1026                 (vn, s) = find_vessel()
1027                 s = track.myname() + s
1028                 s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")
1029
1030                 tbl = StandingsTable()
1031                 tbl.headings()
1032
1033                 aboard = track.aboard(vn)
1034
1035                 for pa in aboard:
1036                         pi = pa.pirate_info()
1037
1038                         xs = ''
1039                         if pa.gunner: xs += 'G '
1040                         else: xs += '  '
1041                         xs += timeevent(pa.last_time, pa.last_event)
1042                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
1043
1044                         if pi is None:
1045                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1046                         else:
1047                                 tbl.pirate(pi, xs)
1048
1049                 s += tbl.results()
1050
1051                 displayer.show(s)
1052                 sleep(1)
1053                 rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1054
1055 #---------- main program ----------
1056
1057 def main():
1058         global opts, fetcher
1059
1060         pa = OptionParser(
1061 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1062 actions:
1063  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1064  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1065  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1066  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1067  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1068
1069 display modes (for --display) apply to ship-aid:
1070  --display=dumb       just print new information, scrolling the screen
1071  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
1072 ''')
1073         ao = pa.add_option
1074         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1075                 help='select ocean OCEAN')
1076         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1077                 default='~/.yoweb-scrape-cache',
1078                 help='cache yoweb pages in DIR')
1079         ao('-D','--debug', action='count', dest='debug', default=0,
1080                 help='enable debugging output')
1081         ao('--debug-fd', type='int', dest='debug_fd',
1082                 help='write any debugging output to specified fd')
1083         ao('-q','--quiet', action='store_true', dest='quiet',
1084                 help='suppress warning output')
1085         ao('--display', action='store', dest='display',
1086                 type='choice', choices=['dumb','overwrite'],
1087                 help='how to display ship aid')
1088
1089         ao('--ship-duty', action='store_true', dest='ship_duty',
1090                 help='show ship duty station puzzles')
1091         ao('--all-puzzles', action='store_false', dest='ship_duty',
1092                 help='show all puzzles, not just ship duty stations')
1093
1094         ao('--min-cache-reuse', type='int', dest='min_max_age',
1095                 metavar='SECONDS', default=60,
1096                 help='always reuse cache yoweb data if no older than this')
1097
1098         (opts,args) = pa.parse_args()
1099         random.seed()
1100
1101         if len(args) < 1:
1102                 pa.error('need a mode argument')
1103
1104         if opts.debug_fd is not None:
1105                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1106         else:
1107                 opts.debug_file = sys.stdout
1108
1109         mode = args[0]
1110         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1111         try: mode_fn = globals()[mode_fn_name]
1112         except KeyError: pa.error('unknown mode "%s"' % mode)
1113
1114         # fixed parameters
1115         opts.expire_age = max(3600, opts.min_max_age)
1116
1117         opts.ship_reboard_clearout = 3600
1118
1119         if opts.cache_dir.startswith('~/'):
1120                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1121
1122         if opts.display is None:
1123                 if ((opts.debug > 0 and opts.debug_fd is None)
1124                     or not os.isatty(sys.stdout.fileno())):
1125                         opts.display = 'dumb'
1126                 else:
1127                         opts.display = 'overwrite'
1128
1129         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1130
1131         mode_fn(args[1:], pa.error)
1132
1133 main()