chiark / gitweb /
Match a few more messages
[ypp-sc-tools.db-test.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 from optparse import OptionParser
18
19 from BeautifulSoup import BeautifulSoup
20
21 opts = None
22
23 #---------- YPP parameters and arrays ----------
24
25 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
26         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
27         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
28         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
29
30 standingvals = ('Able/Distinguished/Respected/Master'+
31                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
32
33 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
34
35 max_pirate_namelen = 12
36
37
38 #---------- general utilities ----------
39
40 def debug(m):
41         if opts.debug > 0:
42                 print >>opts.debug_file, m
43
44 def sleep(seconds):
45         if opts.debug > 0:
46                 opts.debug_file.flush()
47         time.sleep(seconds)
48
49 def format_time_interval(ti):
50         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
51         if ti < 7200: return '%2dm' % (ti / 60)
52         if ti < 86400: return '%dh' % (ti / 3600)
53         return '%dd' % (ti / 86400)
54
55 #---------- caching and rate-limiting data fetcher ----------
56
57 class Fetcher:
58         def __init__(self, ocean, cachedir):
59                 debug('Fetcher init %s' % cachedir)
60                 self.ocean = ocean
61                 self.cachedir = cachedir
62                 try: os.mkdir(cachedir)
63                 except (OSError,IOError), oe:
64                         if oe.errno != errno.EEXIST: raise
65                 self._cache_scan(time.time())
66
67         def default_ocean(self, ocean='ice'):
68                 if self.ocean is None:
69                         self.ocean = ocean
70
71         def _cache_scan(self, now):
72                 # returns list of ages, unsorted
73                 ages = []
74                 debug('Fetcher   scan_cache')
75                 for leaf in os.listdir(self.cachedir):
76                         if not leaf.startswith('#'): continue
77                         path = self.cachedir + '/' + leaf
78                         try: s = os.stat(path)
79                         except (OSError,IOError), oe:
80                                 if oe.errno != errno.ENOENT: raise
81                                 continue
82                         age = now - s.st_mtime
83                         if age > opts.expire_age:
84                                 debug('Fetcher    expire %d %s' % (age, path))
85                                 try: os.remove(path)
86                                 except (OSError,IOError), oe:
87                                         if oe.errno != errno.ENOENT: raise
88                                 continue
89                         ages.append(age)
90                 return ages
91
92         def need_wait(self, now):
93                 ages = self._cache_scan(now)
94                 ages.sort()
95                 debug('Fetcher   ages ' + `ages`)
96                 min_age = 1
97                 need_wait = 0
98                 for age in ages:
99                         if age < min_age and age < 300:
100                                 debug('Fetcher   morewait min=%d age=%d' %
101                                         (min_age, age))
102                                 need_wait = max(need_wait, min_age - age)
103                         min_age += 3
104                         min_age *= 1.25
105                 return need_wait
106
107         def _rate_limit_cache_clean(self, now):
108                 need_wait = self.need_wait(now)
109                 if need_wait > 0:
110                         debug('Fetcher   wait %d' % need_wait)
111                         sleep(need_wait)
112
113         def fetch(self, url, max_age):
114                 debug('Fetcher fetch %s' % url)
115                 cache_corename = urllib.quote_plus(url)
116                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
117                 try: f = file(cache_item, 'r')
118                 except (OSError,IOError), oe:
119                         if oe.errno != errno.ENOENT: raise
120                         f = None
121                 now = time.time()
122                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
123                 if f is not None:
124                         s = os.fstat(f.fileno())
125                         age = now - s.st_mtime
126                         if age > max_age:
127                                 debug('Fetcher  stale %d < %d'% (max_age, age))
128                                 f = None
129                 if f is not None:
130                         data = f.read()
131                         f.close()
132                         debug('Fetcher  cached %d > %d' % (max_age, age))
133                         return data
134
135                 debug('Fetcher  fetch')
136                 self._rate_limit_cache_clean(now)
137
138                 stream = urllib2.urlopen(url)
139                 data = stream.read()
140                 cache_tmp = "%s/#%s~%d#" % (
141                         self.cachedir, cache_corename, os.getpid())
142                 f = file(cache_tmp, 'w')
143                 f.write(data)
144                 f.close()
145                 os.rename(cache_tmp, cache_item)
146                 debug('Fetcher  stored')
147                 return data
148
149         def yoweb(self, kind, tail, max_age):
150                 self.default_ocean()
151                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
152                         self.ocean, kind, tail)
153                 return self.fetch(url, max_age)
154
155 #---------- logging assistance for troubled screenscrapers ----------
156
157 class SoupLog:
158         def __init__(self):
159                 self.msgs = [ ]
160         def msg(self, m):
161                 self.msgs.append(m)
162         def soupm(self, obj, m):
163                 self.msg(m + '; in ' + `obj`)
164         def needs_msgs(self, child_souplog):
165                 self.msgs += child_souplog.msgs
166                 child_souplog.msgs = [ ]
167
168 def soup_text(obj):
169         str = ''.join(obj.findAll(text=True))
170         return str.strip()
171
172 class SomethingSoupInfo(SoupLog):
173         def __init__(self, kind, tail, max_age):
174                 SoupLog.__init__(self)
175                 html = fetcher.yoweb(kind, tail, max_age)
176                 self._soup = BeautifulSoup(html,
177                         convertEntities=BeautifulSoup.HTML_ENTITIES
178                         )
179
180 #---------- scraper for pirate pages ----------
181
182 class PirateInfo(SomethingSoupInfo):
183         # Public data members:
184         #  pi.standings = { 'Treasure Haul': 'Able' ... }
185         #  pi.name = name
186         #  pi.crew = (id, name)
187         #  pi.flag = (id, name)
188         #  pi.msgs = [ 'message describing problem with scrape' ]
189                 
190         def __init__(self, pirate, max_age=300):
191                 SomethingSoupInfo.__init__(self,
192                         'pirate.wm?target=', pirate, max_age)
193                 self.name = pirate
194                 self._find_standings()
195                 self.crew = self._find_crewflag('crew',
196                         '^/yoweb/crew/info\\.wm')
197                 self.flag = self._find_crewflag('flag',
198                         '^/yoweb/flag/info\\.wm')
199
200         def _find_standings(self):
201                 imgs = self._soup.findAll('img',
202                         src=regexp.compile('/yoweb/images/stat.*'))
203                 re = regexp.compile(
204 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
205                         )
206                 standings = { }
207
208                 for skill in puzzles:
209                         standings[skill] = [ ]
210
211                 skl = SoupLog()
212
213                 for img in imgs:
214                         try: puzzle = img['alt']
215                         except KeyError: continue
216
217                         if not puzzle in puzzles:
218                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
219                                 continue
220                         key = img.findParent('td')
221                         if key is None:
222                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
223                                 continue
224                         valelem = key.findNextSibling('td')
225                         if valelem is None:
226                                 skl.soupm(key, 'puzzle missing sibling "%s"'
227                                         % puzzle)
228                                 continue
229                         valstr = soup_text(valelem)
230                         match = re.match(valstr)
231                         if match is None:
232                                 skl.soupm(key, ('puzzle "%s" unparseable'+
233                                         ' standing "%s"') % (puzzle, valstr))
234                                 continue
235                         standing = match.group(match.lastindex)
236                         standings[puzzle].append(standing)
237
238                 self.standings = { }
239
240                 for puzzle in puzzles:
241                         sl = standings[puzzle]
242                         if len(sl) > 1:
243                                 skl.msg('puzzle "%s" multiple standings %s' %
244                                                 (puzzle, `sl`))
245                                 continue
246                         if not sl:
247                                 skl.msg('puzzle "%s" no standing found' % puzzle)
248                                 continue
249                         standing = sl[0]
250                         for i in range(0, len(standingvals)-1):
251                                 if standing == standingvals[i]:
252                                         self.standings[puzzle] = i
253                         if not puzzle in self.standings:
254                                 skl.msg('puzzle "%s" unknown standing "%s"' %
255                                         (puzzle, standing))
256
257                 all_standings_ok = True
258                 for puzzle in puzzles:
259                         if not puzzle in self.standings:
260                                 self.needs_msgs(skl)
261
262         def _find_crewflag(self, cf, yoweb_re):
263                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
264                 if len(things) != 1:
265                         self.msg('zero or several %s id references found' % cf)
266                         return None
267                 thing = things[0]
268                 id_re = '\\b%sid\\=(\\w+)$' % cf
269                 id_haystack = thing['href']
270                 match = regexp.compile(id_re).search(id_haystack)
271                 if match is None:
272                         self.soupm(thing, ('incomprehensible %s id ref'+
273                                 ' (%s in %s)') % (cf, id_re, id_haystack))
274                         return None
275                 name = soup_text(thing)
276                 return (match.group(1), name)
277
278         def __str__(self):
279                 return `(self.crew, self.flag, self.standings, self.msgs)`
280
281 #---------- scraper for crew pages ----------
282
283 class CrewInfo(SomethingSoupInfo):
284         # Public data members:
285         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
286         #              ('Senior Officer', [...]),
287         #               ... ]
288         #  pi.msgs = [ 'message describing problem with scrape' ]
289
290         def __init__(self, crewid, max_age=300):
291                 SomethingSoupInfo.__init__(self,
292                         'crew/info.wm?crewid=', crewid, max_age)
293                 self._find_crew()
294
295         def _find_crew(self):
296                 self.crew = []
297                 capts = self._soup.findAll('img',
298                         src='/yoweb/images/crew-captain.png')
299                 if len(capts) != 1:
300                         self.msg('crew members: no. of captain images != 1')
301                         return
302                 tbl = capts[0]
303                 while not tbl.find('a', href=pirate_ref_re):
304                         tbl = tbl.findParent('table')
305                         if not tbl:
306                                 self.msg('crew members: cannot find table')
307                                 return
308                 current_rank_crew = None
309                 crew_rank_re = regexp.compile('/yoweb/images/crew')
310                 for row in tbl.contents:
311                         # findAll(recurse=False)
312                         if isinstance(row,basestring):
313                                 continue
314
315                         is_rank = row.find('img', attrs={'src': crew_rank_re})
316                         if is_rank:
317                                 rank = soup_text(row)
318                                 current_rank_crew = []
319                                 self.crew.append((rank, current_rank_crew))
320                                 continue
321                         for cell in row.findAll('a', href=pirate_ref_re):
322                                 if current_rank_crew is None:
323                                         self.soupm(cell, 'crew members: crew'
324                                                 ' before rank')
325                                         continue
326                                 current_rank_crew.append(soup_text(cell))
327
328         def __str__(self):
329                 return `(self.crew, self.msgs)`
330
331 #---------- pretty-printer for tables of pirate puzzle standings ----------
332
333 class StandingsTable:
334         def __init__(self, use_puzzles=None, col_width=6):
335                 if use_puzzles is None:
336                         if opts.ship_duty:
337                                 use_puzzles=[
338                                         'Navigating','Battle Navigation',
339                                         'Gunning',
340                                         ['Sailing','Rigging'],
341                                         'Bilging',
342                                         'Carpentry',
343                                         'Treasure Haul'
344                                 ]
345                         else:
346                                 use_puzzles=puzzles
347                 self._puzzles = use_puzzles
348                 self.s = ''
349                 self._cw = col_width-1
350
351         def _pline(self, pirate, puzstrs, extra):
352                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
353                 for v in puzstrs:
354                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
355                 if extra:
356                         self.s += ' ' + extra
357                 self.s += '\n'
358
359         def _puzstr(self, pi, puzzle):
360                 if not isinstance(puzzle,list): puzzle = [puzzle]
361                 try: standing = max([pi.standings[p] for p in puzzle])
362                 except KeyError: return '?'
363                 if not standing: return ''
364                 s = ''
365                 if self._cw > 4:
366                         c1 = standingvals[standing][0]
367                         if standing < 3: c1 = c1.lower() # 3 = Master
368                         s += `standing`
369                 if self._cw > 5:
370                         s += ' '
371                 s += '*' * (standing / 2)
372                 s += '+' * (standing % 2)
373                 return s
374
375         def headings(self):
376                 def puzn_redact(name):
377                         if isinstance(name,list):
378                                 return '/'.join(
379                                         ["%.*s" % (self._cw/2, puzn_redact(n))
380                                          for n in name])
381                         spc = name.find(' ')
382                         if spc < 0: return name
383                         return name[0:min(4,spc)] + name[spc+1:]
384                 self._pline('', map(puzn_redact, self._puzzles), None)
385         def literalline(self, line):
386                 self.s += line + '\n'
387         def pirate_dummy(self, name, standingstring, extra=None):
388                 self._pline(name, standingstring * len(self._puzzles), extra)
389         def pirate(self, pi, extra=None):
390                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
391                 self._pline(pi.name, puzstrs, extra)
392
393         def results(self):
394                 return self.s
395
396 #---------- chat log parser ----------
397
398 class PirateAboard:
399         # This is essentially a transparent, dumb, data class.
400         #  pa.v
401         #  pa.name
402         #  pa.last_time
403         #  pa.last_event
404         #  pa.gunner
405         #  pa.last_chat_time
406         #  pa.last_chat_chan
407         #  pa.pi
408
409         def __init__(pa, pn, v, time, event):
410                 pa.name = pn
411                 pa.v = v
412                 pa.last_time = time
413                 pa.last_event = event
414                 pa.last_chat_time = None
415                 pa.last_chat_chan = None
416                 pa.gunner = False
417                 pa.pi = None
418
419         def pirate_info(pa):
420                 if not pa.pi and not fetcher.need_wait(time.time()):
421                         pa.pi = PirateInfo(pa.name, 3600)
422                 return pa.pi
423
424 class ChatLogTracker:
425         # This is quite complex so we make it opaque.  Use the
426         # official invokers, accessors etc.
427
428         def __init__(self, myself_pi, logfn):
429                 self._pl = {}   # self._pl['Pirate'] =
430                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
431                                 # self._vl['Vessel']['#lastinfo']
432                                 # self._vl['Vessel']['#name']
433                                 # self._v = self._vl[self._vessel]
434                 self._date = None
435                 self._myself = myself_pi
436                 self._f = file(logfn)
437                 self._lbuf = ''
438                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
439                 self._disembark_myself()
440                 self._need_redisplay = False
441
442         def _disembark_myself(self):
443                 self._v = None
444                 self._vessel = None
445                 self.force_redisplay()
446
447         def force_redisplay(self):
448                 self._need_redisplay = True
449
450         def _vessel_updated(self, v, timestamp):
451                 v['#lastinfo'] = timestamp
452                 self.force_redisplay()
453
454         def _onboard_event(self,v,timestamp,pirate,event):
455                 pa = self._pl.get(pirate, None)
456                 if pa is not None and pa.v is v:
457                         pa.last_time = timestamp
458                         pa.last_event = event
459                 else:
460                         if pa is not None: del pa.v[pirate]
461                         pa = PirateAboard(pirate, v, timestamp, event)
462                         self._pl[pirate] = pa
463                         v[pirate] = pa
464                 self._vessel_updated(v, timestamp)
465                 return pa
466
467         def _trash_vessel(self, v):
468                 for pn in v:
469                         if pn.startswith('#'): continue
470                         del self._pl[pn]
471                 vn = v['#name']
472                 del self._vl[vn]
473                 if v is self._v: self._disembark_myself()
474                 self.force_redisplay()
475
476         def _vessel_stale(self, v, timestamp):
477                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
478
479         def _vessel_check_expire(self, v, timestamp):
480                 if not self._vessel_stale(v, timestamp):
481                         return v
482                 self._debug_line_disposition(timestamp,'',
483                         'stale-reset ' + v['#name'])
484                 self._trash_vessel(v)
485                 return None
486
487         def expire_garbage(self, timestamp):
488                 for v in self._vl.values():
489                         self._vessel_check_expire(v, timestamp)
490
491         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
492                 v = self._vl.get(vn, None)
493                 if v is not None:
494                         v = self._vessel_check_expire(v, timestamp)
495                 if v is not None:
496                         dml.append('found')
497                         return v
498                 if not create:
499                         dml.append('no')
500                 dml.append('new')
501                 self._vl[vn] = v = { '#name': vn }
502                 self._vessel_updated(v, timestamp)
503                 return v
504
505         def _find_matching_vessel(self, pattern, timestamp, cmdr,
506                                         dml=[], create=False):
507                 # use when a commander pirate `cmdr' specified a vessel
508                 #  by name `pattern' (either may be None)
509                 # if create is true, will create the vessel
510                 #  record if an exact name is specified
511
512                 if (pattern is not None and
513                     not '*' in pattern
514                     and len(pattern.split(' ')) == 2):
515                         vn = pattern.title()
516                         dml.append('exact')
517                         return self._vessel_lookup(
518                                 vn, timestamp, dml=dml, create=create)
519
520                 if pattern is None:
521                         pattern_check = lambda vn: True
522                 else:
523                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.*')
524                         pattern_check = regexp.compile(re, regexp.I).match
525
526                 tries = []
527
528                 cmdr_pa = self._pl.get(cmdr, None)
529                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
530
531                 tries.append((self._v, 'here'))
532                 tried_vns = []
533
534                 for (v, dm) in tries:
535                         if v is None: dml.append(dm+'?'); continue
536                         
537                         vn = v['#name']
538                         if not pattern_check(vn):
539                                 tried_vns.append(vn)
540                                 dml.append(dm+'#')
541                                 continue
542
543                         dml.append(dm+'!')
544                         return v
545
546                 if pattern is not None and '*' in pattern:
547                         search = [
548                                 (vn,v)
549                                 for (vn,v) in self._vl.iteritems()
550                                 if not self._vessel_stale(v, timestamp)
551                                 if pattern_check(vn)
552                                 ]
553                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
554                         #       re,
555                         #       '/'.join(tried_vns),
556                         #       '/'.join([vn for (vn,v) in search])))
557
558                         if len(search)==1:
559                                 dml.append('one')
560                                 return search[0][1]
561                         elif search:
562                                 dml.append('many')
563                         else:
564                                 dml.append('none')
565
566         def _debug_line_disposition(self,timestamp,l,m):
567                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
568
569         def chatline(self,l):
570                 rm = lambda re: regexp.match(re,l)
571                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
572                 timestamp = None
573
574                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
575                 if m:
576                         self._date = [int(x) for x in m.groups()]
577                         self._previous_timestamp = None
578                         return d('date '+`self._date`)
579
580                 if self._date is None:
581                         return d('date unset')
582
583                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
584                 if not m:
585                         return d('no timestamp')
586
587                 while True:
588                         time_tuple = (self._date +
589                                       [int(x) for x in m.groups()] +
590                                       [-1,-1,-1])
591                         timestamp = time.mktime(time_tuple)
592                         if timestamp >= self._previous_timestamp: break
593                         self._date[2] += 1
594                         self._debug_line_disposition(timestamp,'',
595                                 'new date '+`self._date`)
596
597                 self._previous_timestamp = timestamp
598
599                 l = l[l.find(' ')+1:]
600
601                 def ob_x(pirate,event):
602                         return self._onboard_event(
603                                         self._v, timestamp, pirate, event)
604                 def ob1(did): ob_x(m.group(1), did); return d(did)
605                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
606
607                 def disembark(v, timestamp, pirate, event):
608                         self._onboard_event(
609                                         v, timestamp, pirate, 'leaving '+event)
610                         del v[pirate]
611                         del self._pl[pirate]
612
613                 def disembark_me(why):
614                         self._disembark_myself()
615                         return d('disembark-me '+why)
616
617                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
618                 if m:
619                         dm = ['boarding']
620                         pn = self._myself.name
621                         vn = m.group(1)
622                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
623                         self._vessel = vn
624                         self._v = v
625                         ob_x(pn, 'we boarded')
626                         self.expire_garbage(timestamp)
627                         return d(' '.join(dm))
628
629                 if self._v is None:
630                         return d('no vessel')
631
632                 m = rm('(\\w+) has come aboard\\.$')
633                 if m: return ob1('boarded');
634
635                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
636                 if m:
637                         (who,what) = m.groups()
638                         pa = ob_x(who,'ord '+what)
639                         if what == 'Gunning':
640                                 pa.gunner = True
641                         return d('duty order')
642
643                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
644                 if m: oba('stopped'); return d("end")
645
646                 def chat_core(speaker, chan):
647                         try: pa = self._pl[speaker]
648                         except KeyError: return 'mystery'
649                         if pa.v is not self._v: return 'elsewhere'
650                         pa.last_chat_time = timestamp
651                         pa.last_chat_chan = chan
652                         self.force_redisplay()
653                         return 'here'
654
655                 def chat(chan):
656                         speaker = m.group(1)
657                         dm = chat_core(speaker, chan)
658                         return d('chat %s %s' % (chan, dm))
659
660                 def chat_metacmd(chan):
661                         (cmdr, metacmd) = m.groups()
662                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
663                         m2 = regexp.match(
664                                 '/([ad]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
665                                 metacmd)
666                         if not m2: return chat(chan)
667
668                         (cmd, pattern, targets) = m2.groups()
669                         dml = ['cmd', chan, cmd]
670
671                         if cmd == 'a': each = self._onboard_event
672                         else: each = disembark
673
674                         if cmdr == self._myself.name:
675                                 dml.append('self')
676                                 how = 'cmd: %s' % cmd
677                         else:
678                                 dml.append('other')
679                                 how = 'cmd: %s %s' % (cmd,cmdr)
680
681                         v = self._find_matching_vessel(
682                                 pattern, timestamp, cmdr, dml, create=True)
683
684                         if v is not None:
685                                 targets = targets.strip().split(' ')
686                                 dml.append(`len(targets)`)
687                                 for target in targets:
688                                         each(v, timestamp, target.title(), how)
689                                 self._vessel_updated(v, timestamp)
690
691                         dm = ' '.join(dml)
692                         chat_core(cmdr, 'cmd '+chan)
693                         return d(dm)
694
695                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
696                 if m: return ob1('general order');
697
698                 m = rm('(\\w+) says, "')
699                 if m: return chat('public')
700
701                 m = rm('(\\w+) tells ye, "')
702                 if m: return chat('private')
703
704                 m = rm('Ye told (\\w+), "(.*)"$')
705                 if m: return chat_metacmd('private')
706
707                 m = rm('(\\w+) flag officer chats, "')
708                 if m: return chat('flag officer')
709
710                 m = rm('(\\w+) officer chats, "(.*)"$')
711                 if m: return chat_metacmd('officer')
712
713                 m = rm('Ye accepted the offer to job with ')
714                 if m: return disembark_me('jobbing')
715
716                 m = rm('Ye hop on the ferry and are whisked away ')
717                 if m: return disembark_me('ferry')
718
719                 m = rm('Whisking away to yer home on the magical winds')
720                 if m: return disembark_me('home')
721
722                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
723                 if m:
724                         pl = m.group(1).split(', ')
725                         if not self._myself.name in pl:
726                                 return d('lost melee')
727                         for pn in pl:
728                                 if ' ' in pn: continue
729                                 ob_x(pn,'won melee')
730                         return d('won melee')
731
732                 m = rm('(\\w+) is eliminated\\!')
733                 if m: return ob1('eliminated in fray');
734
735                 m = rm('(\\w+) has driven \w+ from the ship\\!')
736                 if m: return ob1('boarder repelled');
737
738                 m = rm('\w+ has bested (\\w+), and turns'+
739                         ' to the rest of the ship\\.')
740                 if m: return ob1('boarder unrepelled');
741
742                 m = rm('(\\w+) has left the vessel\.')
743                 if m:
744                         pirate = m.group(1)
745                         disembark(self._v, timestamp, pirate, 'disembarked')
746                         return d('disembarked')
747
748                 return d('not-matched')
749
750         def _str_vessel(self, vn, v):
751                 s = ' vessel %s\n' % vn
752                 s += ' '*20 + "%-*s   %13s\n" % (
753                                 max_pirate_namelen, '#lastinfo',
754                                 v['#lastinfo'])
755                 assert v['#name'] == vn
756                 for pn in sorted(v.keys()):
757                         if pn.startswith('#'): continue
758                         pa = v[pn]
759                         assert pa.v == v
760                         assert self._pl[pn] == pa
761                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
762                                 (' ','G')[pa.gunner],
763                                 max_pirate_namelen, pn,
764                                 pa.last_time, pa.last_event,
765                                 pa.last_chat_time, pa.last_chat_chan)
766                 return s
767
768         def __str__(self):
769                 s = '''<ChatLogTracker
770  myself %s
771  vessel %s
772 '''                     % (self._myself.name, self._vessel)
773                 assert ((self._v is None and self._vessel is None) or
774                         (self._v is self._vl[self._vessel]))
775                 if self._vessel is not None:
776                         s += self._str_vessel(self._vessel, self._v)
777                 for vn in sorted(self._vl.keys()):
778                         if vn == self._vessel: continue
779                         s += self._str_vessel(vn, self._vl[vn])
780                 for p in self._pl:
781                         pa = self._pl[p]
782                         assert pa.v[p] is pa
783                         assert pa.v in self._vl.values()
784                 s += '>\n'
785                 return s
786
787         def catchup(self, progress=None):
788                 while True:
789                         more = self._f.readline()
790                         if not more: break
791
792                         self._progress[0] += len(more)
793                         if progress: progress.progress(*self._progress)
794
795                         self._lbuf += more
796                         if self._lbuf.endswith('\n'):
797                                 self.chatline(self._lbuf.rstrip())
798                                 self._lbuf = ''
799                                 if opts.debug >= 2:
800                                         debug(self.__str__())
801                 if progress: progress.caughtup()
802
803         def changed(self):
804                 rv = self._need_redisplay
805                 self._need_redisplay = False
806                 return rv
807         def myname(self):
808                 # returns our pirate name
809                 return self._myself.name
810         def vessel(self):
811                 # returns the vessel we're aboard or None
812                 return self._vessel
813         def aboard(self):
814                 # returns a list of PirateAboard sorted by name
815                 if self._v is None: return []
816                 return [ self._v[pn]
817                          for pn in sorted(self._v.keys())
818                          if not pn.startswith('#') ]
819
820 #---------- implementations of actual operation modes ----------
821
822 def do_pirate(pirates, bu):
823         print '{'
824         for pirate in pirates:
825                 info = PirateInfo(pirate)
826                 print '%s: %s,' % (`pirate`, info)
827         print '}'
828
829 def prep_crew_of(args, bu, max_age=300):
830         if len(args) != 1: bu('crew-of takes one pirate name')
831         pi = PirateInfo(args[0], max_age)
832         if pi.crew is None: return None
833         return CrewInfo(pi.crew[0], max_age)
834
835 def do_crew_of(args, bu):
836         ci = prep_crew_of(args, bu)
837         print ci
838
839 def do_standings_crew_of(args, bu):
840         ci = prep_crew_of(args, bu, 60)
841         tab = StandingsTable()
842         tab.headings()
843         for (rank, members) in ci.crew:
844                 if not members: continue
845                 tab.literalline('%s:' % rank)
846                 for p in members:
847                         pi = PirateInfo(p, random.randint(900,1800))
848                         tab.pirate(pi)
849         print tab.results()
850
851 class ProgressPrintPercentage:
852         def __init__(self, f=sys.stdout):
853                 self._f = f
854         def progress_string(self,done,total):
855                 return "scan chat logs %3d%%\r" % ((done*100) / total)
856         def progress(self,*a):
857                 self._f.write(self.progress_string(*a))
858                 self._f.flush()
859         def show_init(self, pirate, ocean):
860                 print >>self._f, 'Starting up, %s on the %s ocean' % (
861                         pirate, ocean)
862         def caughtup(self):
863                 self._f.write('                   \r')
864                 self._f.flush()
865
866 #----- modes which use the chat log parser are quite complex -----
867
868 def prep_chat_log(args, bu,
869                 progress=ProgressPrintPercentage(),
870                 max_myself_age=3600):
871         if len(args) != 1: bu('this action takes only chat log filename')
872         logfn = args[0]
873         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
874         match = regexp.match(logfn_re, logfn)
875         if not match: bu('chat log filename is not in expected format')
876         (pirate, ocean) = match.groups()
877         fetcher.default_ocean(ocean)
878         
879         myself = PirateInfo(pirate,max_myself_age)
880         progress.show_init(pirate, fetcher.ocean)
881         track = ChatLogTracker(myself, logfn)
882
883         opts.debug -= 2
884         track.catchup(progress)
885         opts.debug += 2
886
887         track.force_redisplay()
888
889         return (myself, track)
890
891 def do_track_chat_log(args, bu):
892         (myself, track) = prep_chat_log(args, bu)
893         while True:
894                 track.catchup()
895                 if track.changed():
896                         print track
897                 sleep(1)
898
899 #----- ship management aid -----
900
901 class Display_dumb(ProgressPrintPercentage):
902         def __init__(self):
903                 ProgressPrintPercentage.__init__(self)
904         def show(self, s):
905                 print '\n\n', s;
906         def realstart(self):
907                 pass
908
909 class Display_overwrite(ProgressPrintPercentage):
910         def __init__(self):
911                 ProgressPrintPercentage.__init__(self)
912
913                 null = file('/dev/null','w')
914                 curses.setupterm(fd=null.fileno())
915
916                 self._clear = curses.tigetstr('clear')
917                 if not self._clear:
918                         self._debug('missing clear!')
919                         self.show = Display_dumb.show
920                         return
921
922                 self._t = {'el':'', 'ed':''}
923                 if not self._init_sophisticated():
924                         for k in self._t.keys(): self._t[k] = ''
925                         self._t['ho'] = self._clear
926
927         def _debug(self,m): debug('display overwrite: '+m)
928
929         def _init_sophisticated(self):
930                 for k in self._t.keys():
931                         s = curses.tigetstr(k)
932                         self._t[k] = s
933                 self._t['ho'] = curses.tigetstr('ho')
934                 if not self._t['ho']:
935                         cup = curses.tigetstr('cup')
936                         self._t['ho'] = curses.tparm(cup,0,0)
937                 missing = [k for k in self._t.keys() if not self._t[k]]
938                 if missing:
939                         self.debug('missing '+(' '.join(missing)))
940                         return 0
941                 return 1
942
943         def show(self, s):
944                 w = sys.stdout.write
945                 def wti(k): w(self._t[k])
946
947                 wti('ho')
948                 nl = ''
949                 for l in s.rstrip().split('\n'):
950                         w(nl)
951                         w(l)
952                         wti('el')
953                         nl = '\r\n'
954                 wti('ed')
955                 w(' ')
956                 sys.stdout.flush()
957
958         def realstart(self):
959                 sys.stdout.write(self._clear)
960                 sys.stdout.flush()
961                         
962
963 def do_ship_aid(args, bu):
964         if opts.ship_duty is None: opts.ship_duty = True
965
966         displayer = globals()['Display_'+opts.display]()
967         rotate_nya = '/-\\'
968
969         (myself, track) = prep_chat_log(args, bu, progress=displayer)
970
971         def timeevent(t,e):
972                 if t is None: return ' ' * 22
973                 return " %-4s %-16s" % (format_time_interval(now - t),e)
974
975         displayer.realstart()
976
977         while True:
978                 track.catchup()
979                 now = time.time()
980
981                 s = "%s" % track.myname()
982
983                 vn = track.vessel()
984                 if vn is None: s += " not on a vessel"
985                 else: s += " on board the %s" % vn
986                 s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")
987
988                 tbl = StandingsTable()
989                 tbl.headings()
990
991                 for pa in track.aboard():
992                         pi = pa.pirate_info()
993
994                         xs = ''
995                         if pa.gunner: xs += 'G '
996                         else: xs += '  '
997                         xs += timeevent(pa.last_time, pa.last_event)
998                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
999
1000                         if pi is None:
1001                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1002                         else:
1003                                 tbl.pirate(pi, xs)
1004
1005                 s += tbl.results()
1006
1007                 displayer.show(s)
1008                 sleep(1)
1009                 rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1010
1011 #---------- main program ----------
1012
1013 def main():
1014         global opts, fetcher
1015
1016         pa = OptionParser(
1017 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1018 actions:
1019  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1020  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1021  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1022  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1023  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1024
1025 display modes (for --display) apply to ship-aid:
1026  --display=dumb       just print new information, scrolling the screen
1027  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
1028 ''')
1029         ao = pa.add_option
1030         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1031                 help='select ocean OCEAN')
1032         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1033                 default='~/.yoweb-scrape-cache',
1034                 help='cache yoweb pages in DIR')
1035         ao('-D','--debug', action='count', dest='debug', default=0,
1036                 help='enable debugging output')
1037         ao('--debug-fd', type='int', dest='debug_fd',
1038                 help='write any debugging output to specified fd')
1039         ao('-q','--quiet', action='store_true', dest='quiet',
1040                 help='suppress warning output')
1041         ao('--display', action='store', dest='display',
1042                 type='choice', choices=['dumb','overwrite'],
1043                 help='how to display ship aid')
1044
1045         ao('--ship-duty', action='store_true', dest='ship_duty',
1046                 help='show ship duty station puzzles')
1047         ao('--all-puzzles', action='store_false', dest='ship_duty',
1048                 help='show all puzzles, not just ship duty stations')
1049
1050         (opts,args) = pa.parse_args()
1051         random.seed()
1052
1053         if len(args) < 1:
1054                 pa.error('need a mode argument')
1055
1056         if opts.debug_fd is not None:
1057                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1058         else:
1059                 opts.debug_file = sys.stdout
1060
1061         mode = args[0]
1062         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1063         try: mode_fn = globals()[mode_fn_name]
1064         except KeyError: pa.error('unknown mode "%s"' % mode)
1065
1066         # fixed parameters
1067         opts.min_max_age = 60
1068         opts.expire_age = 3600
1069         opts.ship_reboard_clearout = 3600
1070
1071         if opts.cache_dir.startswith('~/'):
1072                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1073
1074         if opts.display is None:
1075                 if ((opts.debug > 0 and opts.debug_fd is None)
1076                     or not os.isatty(sys.stdout.fileno())):
1077                         opts.display = 'dumb'
1078                 else:
1079                         opts.display = 'overwrite'
1080
1081         fetcher = Fetcher(opts.ocean, opts.cache_dir)
1082
1083         mode_fn(args[1:], pa.error)
1084
1085 main()