chiark / gitweb /
flush debugging output before sleeping
[ypp-sc-tools.web-live.git] / yoweb-scrape
1 #!/usr/bin/python
2
3 #---------- setup ----------
4
5 import signal
6 signal.signal(signal.SIGINT, signal.SIG_DFL)
7
8 import os
9 import time
10 import urllib
11 import urllib2
12 import errno
13 import sys
14 import re as regexp
15 import random
16 import curses
17 from optparse import OptionParser
18
19 from BeautifulSoup import BeautifulSoup
20
21 opts = None
22
23 #---------- YPP parameters and arrays ----------
24
25 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
26         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
27         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
28         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
29
30 standingvals = ('Able/Distinguished/Respected/Master'+
31                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
32
33 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
34
35 max_pirate_namelen = 12
36
37
38 #---------- general utilities ----------
39
40 def debug(m):
41         if opts.debug > 0:
42                 print >>opts.debug_file, m
43
44 def sleep(seconds):
45         if opts.debug > 0:
46                 opts.debug_file.flush()
47         time.sleep(seconds)
48
49 def format_time_interval(ti):
50         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
51         if ti < 7200: return '%2dm' % (ti / 60)
52         if ti < 86400: return '%dh' % (ti / 3600)
53         return '%dd' % (ti / 86400)
54
55 #---------- caching and rate-limiting data fetcher ----------
56
57 class Fetcher:
58         def __init__(self, ocean, cachedir):
59                 debug('Fetcher init %s' % cachedir)
60                 self.ocean = ocean
61                 self.cachedir = cachedir
62                 try: os.mkdir(cachedir)
63                 except (OSError,IOError), oe:
64                         if oe.errno != errno.EEXIST: raise
65                 self._cache_scan(time.time())
66
67         def default_ocean(self, ocean='ice'):
68                 if self.ocean is None:
69                         self.ocean = ocean
70
71         def _cache_scan(self, now):
72                 # returns list of ages, unsorted
73                 ages = []
74                 debug('Fetcher   scan_cache')
75                 for leaf in os.listdir(self.cachedir):
76                         if not leaf.startswith('#'): continue
77                         path = self.cachedir + '/' + leaf
78                         try: s = os.stat(path)
79                         except (OSError,IOError), oe:
80                                 if oe.errno != errno.ENOENT: raise
81                                 continue
82                         age = now - s.st_mtime
83                         if age > opts.expire_age:
84                                 debug('Fetcher    expire %d %s' % (age, path))
85                                 try: os.remove(path)
86                                 except (OSError,IOError), oe:
87                                         if oe.errno != errno.ENOENT: raise
88                                 continue
89                         ages.append(age)
90                 return ages
91
92         def need_wait(self, now):
93                 ages = self._cache_scan(now)
94                 ages.sort()
95                 debug('Fetcher   ages ' + `ages`)
96                 min_age = 1
97                 need_wait = 0
98                 for age in ages:
99                         if age < min_age and age < 300:
100                                 debug('Fetcher   morewait min=%d age=%d' %
101                                         (min_age, age))
102                                 need_wait = max(need_wait, min_age - age)
103                         min_age += 3
104                         min_age *= 1.25
105                 return need_wait
106
107         def _rate_limit_cache_clean(self, now):
108                 need_wait = self.need_wait(now)
109                 if need_wait > 0:
110                         debug('Fetcher   wait %d' % need_wait)
111                         sleep(need_wait)
112
113         def fetch(self, url, max_age):
114                 debug('Fetcher fetch %s' % url)
115                 cache_corename = urllib.quote_plus(url)
116                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
117                 try: f = file(cache_item, 'r')
118                 except (OSError,IOError), oe:
119                         if oe.errno != errno.ENOENT: raise
120                         f = None
121                 now = time.time()
122                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
123                 if f is not None:
124                         s = os.fstat(f.fileno())
125                         age = now - s.st_mtime
126                         if age > max_age:
127                                 debug('Fetcher  stale %d < %d'% (max_age, age))
128                                 f = None
129                 if f is not None:
130                         data = f.read()
131                         f.close()
132                         debug('Fetcher  cached %d > %d' % (max_age, age))
133                         return data
134
135                 debug('Fetcher  fetch')
136                 self._rate_limit_cache_clean(now)
137
138                 stream = urllib2.urlopen(url)
139                 data = stream.read()
140                 cache_tmp = "%s/#%s~%d#" % (
141                         self.cachedir, cache_corename, os.getpid())
142                 f = file(cache_tmp, 'w')
143                 f.write(data)
144                 f.close()
145                 os.rename(cache_tmp, cache_item)
146                 debug('Fetcher  stored')
147                 return data
148
149         def yoweb(self, kind, tail, max_age):
150                 self.default_ocean()
151                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
152                         self.ocean, kind, tail)
153                 return self.fetch(url, max_age)
154
155 #---------- logging assistance for troubled screenscrapers ----------
156
157 class SoupLog:
158         def __init__(self):
159                 self.msgs = [ ]
160         def msg(self, m):
161                 self.msgs.append(m)
162         def soupm(self, obj, m):
163                 self.msg(m + '; in ' + `obj`)
164         def needs_msgs(self, child_souplog):
165                 self.msgs += child_souplog.msgs
166                 child_souplog.msgs = [ ]
167
168 def soup_text(obj):
169         str = ''.join(obj.findAll(text=True))
170         return str.strip()
171
172 class SomethingSoupInfo(SoupLog):
173         def __init__(self, kind, tail, max_age):
174                 SoupLog.__init__(self)
175                 html = fetcher.yoweb(kind, tail, max_age)
176                 self._soup = BeautifulSoup(html,
177                         convertEntities=BeautifulSoup.HTML_ENTITIES
178                         )
179
180 #---------- scraper for pirate pages ----------
181
182 class PirateInfo(SomethingSoupInfo):
183         # Public data members:
184         #  pi.standings = { 'Treasure Haul': 'Able' ... }
185         #  pi.name = name
186         #  pi.crew = (id, name)
187         #  pi.flag = (id, name)
188         #  pi.msgs = [ 'message describing problem with scrape' ]
189                 
190         def __init__(self, pirate, max_age=300):
191                 SomethingSoupInfo.__init__(self,
192                         'pirate.wm?target=', pirate, max_age)
193                 self.name = pirate
194                 self._find_standings()
195                 self.crew = self._find_crewflag('crew',
196                         '^/yoweb/crew/info\\.wm')
197                 self.flag = self._find_crewflag('flag',
198                         '^/yoweb/flag/info\\.wm')
199
200         def _find_standings(self):
201                 imgs = self._soup.findAll('img',
202                         src=regexp.compile('/yoweb/images/stat.*'))
203                 re = regexp.compile(
204 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
205                         )
206                 standings = { }
207
208                 for skill in puzzles:
209                         standings[skill] = [ ]
210
211                 skl = SoupLog()
212
213                 for img in imgs:
214                         try: puzzle = img['alt']
215                         except KeyError: continue
216
217                         if not puzzle in puzzles:
218                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
219                                 continue
220                         key = img.findParent('td')
221                         if key is None:
222                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
223                                 continue
224                         valelem = key.findNextSibling('td')
225                         if valelem is None:
226                                 skl.soupm(key, 'puzzle missing sibling "%s"'
227                                         % puzzle)
228                                 continue
229                         valstr = soup_text(valelem)
230                         match = re.match(valstr)
231                         if match is None:
232                                 skl.soupm(key, ('puzzle "%s" unparseable'+
233                                         ' standing "%s"') % (puzzle, valstr))
234                                 continue
235                         standing = match.group(match.lastindex)
236                         standings[puzzle].append(standing)
237
238                 self.standings = { }
239
240                 for puzzle in puzzles:
241                         sl = standings[puzzle]
242                         if len(sl) > 1:
243                                 skl.msg('puzzle "%s" multiple standings %s' %
244                                                 (puzzle, `sl`))
245                                 continue
246                         if not sl:
247                                 skl.msg('puzzle "%s" no standing found' % puzzle)
248                                 continue
249                         standing = sl[0]
250                         for i in range(0, len(standingvals)-1):
251                                 if standing == standingvals[i]:
252                                         self.standings[puzzle] = i
253                         if not puzzle in self.standings:
254                                 skl.msg('puzzle "%s" unknown standing "%s"' %
255                                         (puzzle, standing))
256
257                 all_standings_ok = True
258                 for puzzle in puzzles:
259                         if not puzzle in self.standings:
260                                 self.needs_msgs(skl)
261
262         def _find_crewflag(self, cf, yoweb_re):
263                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
264                 if len(things) != 1:
265                         self.msg('zero or several %s id references found' % cf)
266                         return None
267                 thing = things[0]
268                 id_re = '\\b%sid\\=(\\w+)$' % cf
269                 id_haystack = thing['href']
270                 match = regexp.compile(id_re).search(id_haystack)
271                 if match is None:
272                         self.soupm(thing, ('incomprehensible %s id ref'+
273                                 ' (%s in %s)') % (cf, id_re, id_haystack))
274                         return None
275                 name = soup_text(thing)
276                 return (match.group(1), name)
277
278         def __str__(self):
279                 return `(self.crew, self.flag, self.standings, self.msgs)`
280
281 #---------- scraper for crew pages ----------
282
283 class CrewInfo(SomethingSoupInfo):
284         # Public data members:
285         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
286         #              ('Senior Officer', [...]),
287         #               ... ]
288         #  pi.msgs = [ 'message describing problem with scrape' ]
289
290         def __init__(self, crewid, max_age=300):
291                 SomethingSoupInfo.__init__(self,
292                         'crew/info.wm?crewid=', crewid, max_age)
293                 self._find_crew()
294
295         def _find_crew(self):
296                 self.crew = []
297                 capts = self._soup.findAll('img',
298                         src='/yoweb/images/crew-captain.png')
299                 if len(capts) != 1:
300                         self.msg('crew members: no. of captain images != 1')
301                         return
302                 tbl = capts[0]
303                 while not tbl.find('a', href=pirate_ref_re):
304                         tbl = tbl.findParent('table')
305                         if not tbl:
306                                 self.msg('crew members: cannot find table')
307                                 return
308                 current_rank_crew = None
309                 crew_rank_re = regexp.compile('/yoweb/images/crew')
310                 for row in tbl.contents:
311                         # findAll(recurse=False)
312                         if isinstance(row,basestring):
313                                 continue
314
315                         is_rank = row.find('img', attrs={'src': crew_rank_re})
316                         if is_rank:
317                                 rank = soup_text(row)
318                                 current_rank_crew = []
319                                 self.crew.append((rank, current_rank_crew))
320                                 continue
321                         for cell in row.findAll('a', href=pirate_ref_re):
322                                 if current_rank_crew is None:
323                                         self.soupm(cell, 'crew members: crew'
324                                                 ' before rank')
325                                         continue
326                                 current_rank_crew.append(soup_text(cell))
327
328         def __str__(self):
329                 return `(self.crew, self.msgs)`
330
331 #---------- pretty-printer for tables of pirate puzzle standings ----------
332
333 class StandingsTable:
334         def __init__(self, use_puzzles=None, col_width=6):
335                 if use_puzzles is None:
336                         if opts.ship_duty:
337                                 use_puzzles=[
338                                         'Navigating','Battle Navigation',
339                                         'Gunning',
340                                         ['Sailing','Rigging'],
341                                         'Bilging',
342                                         'Carpentry',
343                                         'Treasure Haul'
344                                 ]
345                         else:
346                                 use_puzzles=puzzles
347                 self._puzzles = use_puzzles
348                 self.s = ''
349                 self._cw = col_width-1
350
351         def _pline(self, pirate, puzstrs, extra):
352                 self.s += ' %-*s' % (max(max_pirate_namelen, 14), pirate)
353                 for v in puzstrs:
354                         self.s += ' %-*.*s' % (self._cw,self._cw, v)
355                 if extra:
356                         self.s += ' ' + extra
357                 self.s += '\n'
358
359         def _puzstr(self, pi, puzzle):
360                 if not isinstance(puzzle,list): puzzle = [puzzle]
361                 try: standing = max([pi.standings[p] for p in puzzle])
362                 except KeyError: return '?'
363                 if not standing: return ''
364                 s = ''
365                 if self._cw > 4:
366                         c1 = standingvals[standing][0]
367                         if standing < 3: c1 = c1.lower() # 3 = Master
368                         s += `standing`
369                 if self._cw > 5:
370                         s += ' '
371                 s += '*' * (standing / 2)
372                 s += '+' * (standing % 2)
373                 return s
374
375         def headings(self):
376                 def puzn_redact(name):
377                         if isinstance(name,list):
378                                 return '/'.join(
379                                         ["%.*s" % (self._cw/2, puzn_redact(n))
380                                          for n in name])
381                         spc = name.find(' ')
382                         if spc < 0: return name
383                         return name[0:min(4,spc)] + name[spc+1:]
384                 self._pline('', map(puzn_redact, self._puzzles), None)
385         def literalline(self, line):
386                 self.s += line + '\n'
387         def pirate_dummy(self, name, standingstring, extra=None):
388                 self._pline(name, standingstring * len(self._puzzles), extra)
389         def pirate(self, pi, extra=None):
390                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
391                 self._pline(pi.name, puzstrs, extra)
392
393         def results(self):
394                 return self.s
395
396 #---------- chat log parser ----------
397
398 class PirateAboard:
399         # This is essentially a transparent, dumb, data class.
400         #  pa.v
401         #  pa.name
402         #  pa.last_time
403         #  pa.last_event
404         #  pa.gunner
405         #  pa.last_chat_time
406         #  pa.last_chat_chan
407         #  pa.pi
408
409         def __init__(pa, pn, v, time, event):
410                 pa.name = pn
411                 pa.v = v
412                 pa.last_time = time
413                 pa.last_event = event
414                 pa.last_chat_time = None
415                 pa.last_chat_chan = None
416                 pa.gunner = False
417                 pa.pi = None
418
419         def pirate_info(pa):
420                 if not pa.pi and not fetcher.need_wait(time.time()):
421                         pa.pi = PirateInfo(pa.name, 3600)
422                 return pa.pi
423
424 class ChatLogTracker:
425         # This is quite complex so we make it opaque.  Use the
426         # official invokers, accessors etc.
427
428         def __init__(self, myself_pi, logfn):
429                 self._pl = {}   # self._pl['Pirate'] =
430                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
431                                 # self._vl['Vessel']['#lastinfo']
432                 self._v = None          # self._v =
433                 self._vessel = None     #       self._vl[self._vessel]
434                 self._date = None
435                 self._myself = myself_pi
436                 self._need_redisplay = False
437                 self._f = file(logfn)
438                 self._lbuf = ''
439                 self._progress = [0, os.fstat(self._f.fileno()).st_size]
440
441         def force_redisplay(self):
442                 self._need_redisplay = True
443
444         def _onboard_event(self,v,timestamp,pirate,event):
445                 pa = self._pl.get(pirate, None)
446                 if pa is not None and pa.v is v:
447                         pa.last_time = timestamp
448                         pa.last_event = event
449                 else:
450                         if pa is not None: del pa.v[pirate]
451                         pa = PirateAboard(pirate, v, timestamp, event)
452                         self._pl[pirate] = pa
453                         v[pirate] = pa
454                 v['#lastinfo'] = timestamp
455                 self.force_redisplay()
456                 return pa
457
458         def _trash_vessel(self, v):
459                 for pn in v:
460                         if pn.startswith('#'): continue
461                         del self._pl[pn]
462                 self.force_redisplay()
463
464         def expire_garbage(self, timestamp):
465                 for (vn,v) in list(self._vl.iteritems()):
466                         la = v['#lastinfo']
467                         if timestamp - la > opts.ship_reboard_clearout:
468                                 self._debug_line_disposition(timestamp,'',
469                                         'stale reset '+vn)
470                                 self._trash_vessel(v)
471                                 del self._vl[vn]
472
473         def _create_vessel(self, vn, timestamp):
474                 self._vl[vn] = v = { '#lastinfo': timestamp }
475                 return v
476
477         def _update_vessel_lookup(self, vn, timestamp, dml):
478                 v = self._vl.get(vn, None)
479                 if v is None:
480                         dml.append('new')
481                         v = self._create_vessel(vn, timestamp)
482                 elif timestamp - v['#lastinfo'] > opts.ship_reboard_clearout:
483                         dml.append('stale')
484                         self._trash_vessel(v)
485                         v = self._create_vessel(vn, timestamp)
486                 else:
487                         dml.append('current')
488                 return v
489
490         def _debug_line_disposition(self,timestamp,l,m):
491                 debug('CLT %13s %-30s %s' % (timestamp,m,l))
492
493         def chatline(self,l):
494                 rm = lambda re: regexp.match(re,l)
495                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
496                 timestamp = None
497
498                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
499                 if m:
500                         self._date = [int(x) for x in m.groups()]
501                         self._previous_timestamp = None
502                         return d('date '+`self._date`)
503
504                 if self._date is None:
505                         return d('date unset')
506
507                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
508                 if not m:
509                         return d('no timestamp')
510
511                 while True:
512                         time_tuple = (self._date +
513                                       [int(x) for x in m.groups()] +
514                                       [-1,-1,-1])
515                         timestamp = time.mktime(time_tuple)
516                         if timestamp >= self._previous_timestamp: break
517                         self._date[2] += 1
518                         self._debug_line_disposition(timestamp,'',
519                                 'new date '+`self._date`)
520
521                 self._previous_timestamp = timestamp
522
523                 l = l[l.find(' ')+1:]
524
525                 def ob_x(pirate,event):
526                         return self._onboard_event(
527                                         self._v, timestamp, pirate, event)
528                 def ob1(did): ob_x(m.group(1), did); return d(did)
529                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
530
531                 def disembark(v, timestamp, pirate, event):
532                         self._onboard_event(
533                                         v, timestamp, pirate, 'leaving '+event)
534                         del v[pirate]
535                         del self._pl[pirate]
536
537                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
538                 if m:
539                         dm = ['boarding']
540                         pn = self._myself.name
541                         self._vessel = vn = m.group(1)
542                         self._v = self._update_vessel_lookup(vn, timestamp, dm)
543
544                         ob_x(pn, 'we boarded')
545                         self.expire_garbage(timestamp)
546                         return d(' '.join(dm))
547
548                 if self._v is None:
549                         return d('no vessel')
550
551                 m = rm('(\\w+) has come aboard\\.$')
552                 if m: return ob1('boarded');
553
554                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
555                 if m:
556                         (who,what) = m.groups()
557                         pa = ob_x(who,'ord '+what)
558                         if what == 'Gunning':
559                                 pa.gunner = True
560                         return d('duty order')
561
562                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
563                 if m: oba('stopped'); return d("end")
564
565                 def chat_core(speaker, chan):
566                         try: pa = self._pl[speaker]
567                         except KeyError: return 'mystery'
568                         if pa.v is not self._v: return 'elsewhere'
569                         pa.last_chat_time = timestamp
570                         pa.last_chat_chan = chan
571                         self.force_redisplay()
572                         return 'here'
573
574                 def chat(chan):
575                         speaker = m.group(1)
576                         dm = chat_core(speaker, chan)
577                         return d('chat %s %s' % (chan, dm))
578
579                 def chat_metacmd(chan):
580                         (cmdr, metacmd) = m.groups()
581                         metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
582                         m2 = regexp.match(
583                                 '/([ad]) (?:([A-Za-z ]+)\\s*:)?([A-Za-z ]+)$',
584                                 metacmd)
585                         if not m2: return chat(chan)
586
587                         (cmd, vn, targets) = m2.groups()
588                         dml = ['cmd', chan, cmd]
589
590                         if cmd == 'a': each = self._onboard_event
591                         else: each = disembark
592
593                         if cmdr == self._myself.name:
594                                 dml.append('self')
595                                 how = 'manual: /%s' % cmd
596                         else:
597                                 dml.append('other')
598                                 how = '/%s %s' % (cmd,cmdr)
599
600                         v = None
601                         if vn is not None and len(vn.split(' ')) == 2:
602                                 v = self._update_vessel_lookup(
603                                         vn.title(), timestamp, dml)
604                         elif self._v is None:
605                                 dml.append('no-current')
606                         elif vn is None:
607                                 dml.append('current')
608                                 v = self._v
609                         elif regexp.match('(?:.* )?%s$' % vn.title(),
610                                         self._vessel):
611                                 dml.append('match')
612                                 v = self._v
613                         else:
614                                 dml.append('unk-abbrev')
615
616                         if v is not None:
617                                 targets = targets.strip().split(' ')
618                                 dml.append(`len(targets)`)
619                                 for target in targets:
620                                         each(v, timestamp, target.title(), how)
621
622                         dm = ' '.join(dml)
623                         chat_core(cmdr, 'cmd '+chan)
624                         return d(dm)
625
626                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
627                 if m: return ob1('general order');
628
629                 m = rm('(\\w+) says, "')
630                 if m: return chat('public')
631
632                 m = rm('(\\w+) tells ye, "')
633                 if m: return chat('private')
634
635                 m = rm('Ye told (\\w+), "(.*)"$')
636                 if m: return chat_metacmd('private')
637
638                 m = rm('(\\w+) flag officer chats, "')
639                 if m: return chat('flag officer')
640
641                 m = rm('(\\w+) officer chats, "(.*)"$')
642                 if m: return chat_metacmd('officer')
643
644                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
645                 if m:
646                         pl = m.group(1).split(', ')
647                         if not self._myself.name in pl:
648                                 return d('lost boarding battle')
649                         for pn in pl:
650                                 if ' ' in pn: continue
651                                 ob_x(pn,'won boarding battle')
652                         return d('won boarding battle')
653
654                 m = rm('(\\w+) is eliminated\\!')
655                 if m: return ob1('eliminated in fray');
656
657                 m = rm('(\\w+) has left the vessel\.')
658                 if m:
659                         pirate = m.group(1)
660                         disembark(self._v, timestamp, pirate, 'disembarked')
661                         return d('disembarked')
662
663                 return d('not matched')
664
665         def _str_vessel(self, vn, v):
666                 s = ' vessel %s\n' % vn
667                 s += ' '*20 + "%-*s   %13s\n" % (
668                                 max_pirate_namelen, '#lastinfo',
669                                 v['#lastinfo'])
670                 for pn in sorted(v.keys()):
671                         if pn.startswith('#'): continue
672                         pa = v[pn]
673                         assert pa.v == v
674                         assert self._pl[pn] == pa
675                         s += ' '*20 + "%s %-*s %13s %-30s %13s %s\n" % (
676                                 (' ','G')[pa.gunner],
677                                 max_pirate_namelen, pn,
678                                 pa.last_time, pa.last_event,
679                                 pa.last_chat_time, pa.last_chat_chan)
680                 return s
681
682         def __str__(self):
683                 s = '''<ChatLogTracker
684  myself %s
685  vessel %s
686 '''                     % (self._myself.name, self._vessel)
687                 assert ((self._v is None and self._vessel is None) or
688                         (self._v is self._vl[self._vessel]))
689                 if self._vessel is not None:
690                         s += self._str_vessel(self._vessel, self._v)
691                 for vn in sorted(self._vl.keys()):
692                         if vn == self._vessel: continue
693                         s += self._str_vessel(vn, self._vl[vn])
694                 for p in self._pl:
695                         pa = self._pl[p]
696                         assert pa.v[p] is pa
697                         assert pa.v in self._vl.values()
698                 s += '>\n'
699                 return s
700
701         def catchup(self, progress=None):
702                 while True:
703                         more = self._f.readline()
704                         if not more: break
705
706                         self._progress[0] += len(more)
707                         if progress: progress.progress(*self._progress)
708
709                         self._lbuf += more
710                         if self._lbuf.endswith('\n'):
711                                 self.chatline(self._lbuf.rstrip())
712                                 self._lbuf = ''
713                                 if opts.debug >= 2:
714                                         debug(self.__str__())
715                 if progress: progress.caughtup()
716
717         def changed(self):
718                 rv = self._need_redisplay
719                 self._need_redisplay = False
720                 return rv
721         def myname(self):
722                 # returns our pirate name
723                 return self._myself.name
724         def vessel(self):
725                 # returns the vessel we're aboard or None
726                 return self._vessel
727         def aboard(self):
728                 # returns a list of PirateAboard sorted by name
729                 if self._v is None: return []
730                 return [ self._v[pn]
731                          for pn in sorted(self._v.keys())
732                          if not pn.startswith('#') ]
733
734 #---------- implementations of actual operation modes ----------
735
736 def do_pirate(pirates, bu):
737         print '{'
738         for pirate in pirates:
739                 info = PirateInfo(pirate)
740                 print '%s: %s,' % (`pirate`, info)
741         print '}'
742
743 def prep_crew_of(args, bu, max_age=300):
744         if len(args) != 1: bu('crew-of takes one pirate name')
745         pi = PirateInfo(args[0], max_age)
746         if pi.crew is None: return None
747         return CrewInfo(pi.crew[0], max_age)
748
749 def do_crew_of(args, bu):
750         ci = prep_crew_of(args, bu)
751         print ci
752
753 def do_standings_crew_of(args, bu):
754         ci = prep_crew_of(args, bu, 60)
755         tab = StandingsTable()
756         tab.headings()
757         for (rank, members) in ci.crew:
758                 if not members: continue
759                 tab.literalline('%s:' % rank)
760                 for p in members:
761                         pi = PirateInfo(p, random.randint(900,1800))
762                         tab.pirate(pi)
763         print tab.results()
764
765 class ProgressPrintPercentage:
766         def __init__(self, f=sys.stdout):
767                 self._f = f
768         def progress_string(self,done,total):
769                 return "scan chat logs %3d%%\r" % ((done*100) / total)
770         def progress(self,*a):
771                 self._f.write(self.progress_string(*a))
772                 self._f.flush()
773         def show_init(self, pirate, ocean):
774                 print >>self._f, 'Starting up, %s on the %s ocean' % (
775                         pirate, ocean)
776         def caughtup(self):
777                 self._f.write('                   \r')
778                 self._f.flush()
779
780 #----- modes which use the chat log parser are quite complex -----
781
782 def prep_chat_log(args, bu,
783                 progress=ProgressPrintPercentage(),
784                 max_myself_age=3600):
785         if len(args) != 1: bu('this action takes only chat log filename')
786         logfn = args[0]
787         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
788         match = regexp.match(logfn_re, logfn)
789         if not match: bu('chat log filename is not in expected format')
790         (pirate, ocean) = match.groups()
791         fetcher.default_ocean(ocean)
792         
793         myself = PirateInfo(pirate,max_myself_age)
794         progress.show_init(pirate, fetcher.ocean)
795         track = ChatLogTracker(myself, logfn)
796
797         opts.debug -= 2
798         track.catchup(progress)
799         opts.debug += 2
800
801         track.force_redisplay()
802
803         return (myself, track)
804
805 def do_track_chat_log(args, bu):
806         (myself, track) = prep_chat_log(args, bu)
807         while True:
808                 track.catchup()
809                 if track.changed():
810                         print track
811                 sleep(1)
812
813 #----- ship management aid -----
814
815 class Display_dumb(ProgressPrintPercentage):
816         def __init__(self):
817                 ProgressPrintPercentage.__init__(self)
818         def show(self, s):
819                 print '\n\n', s;
820         def realstart(self):
821                 pass
822
823 class Display_overwrite(ProgressPrintPercentage):
824         def __init__(self):
825                 ProgressPrintPercentage.__init__(self)
826
827                 null = file('/dev/null','w')
828                 curses.setupterm(fd=null.fileno())
829
830                 self._clear = curses.tigetstr('clear')
831                 if not self._clear:
832                         self._debug('missing clear!')
833                         self.show = Display_dumb.show
834                         return
835
836                 self._t = {'el':'', 'ed':''}
837                 if not self._init_sophisticated():
838                         for k in self._t.keys(): self._t[k] = ''
839                         self._t['ho'] = self._clear
840
841         def _debug(self,m): debug('display overwrite: '+m)
842
843         def _init_sophisticated(self):
844                 for k in self._t.keys():
845                         s = curses.tigetstr(k)
846                         self._t[k] = s
847                 self._t['ho'] = curses.tigetstr('ho')
848                 if not self._t['ho']:
849                         cup = curses.tigetstr('cup')
850                         self._t['ho'] = curses.tparm(cup,0,0)
851                 missing = [k for k in self._t.keys() if not self._t[k]]
852                 if missing:
853                         self.debug('missing '+(' '.join(missing)))
854                         return 0
855                 return 1
856
857         def show(self, s):
858                 w = sys.stdout.write
859                 def wti(k): w(self._t[k])
860
861                 wti('ho')
862                 nl = ''
863                 for l in s.rstrip().split('\n'):
864                         w(nl)
865                         w(l)
866                         wti('el')
867                         nl = '\r\n'
868                 wti('ed')
869                 w(' ')
870                 sys.stdout.flush()
871
872         def realstart(self):
873                 sys.stdout.write(self._clear)
874                 sys.stdout.flush()
875                         
876
877 def do_ship_aid(args, bu):
878         if opts.ship_duty is None: opts.ship_duty = True
879
880         displayer = globals()['Display_'+opts.display]()
881         rotate_nya = '/-\\'
882
883         (myself, track) = prep_chat_log(args, bu, progress=displayer)
884
885         def timeevent(t,e):
886                 if t is None: return ' ' * 22
887                 return " %-4s %-16s" % (format_time_interval(now - t),e)
888
889         displayer.realstart()
890
891         while True:
892                 track.catchup()
893                 now = time.time()
894
895                 s = "%s" % track.myname()
896
897                 vn = track.vessel()
898                 if vn is None: s += " not on a vessel?!"
899                 else: s += " on board the %s" % vn
900                 s += " at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")
901
902                 tbl = StandingsTable()
903                 tbl.headings()
904
905                 for pa in track.aboard():
906                         pi = pa.pirate_info()
907
908                         xs = ''
909                         if pa.gunner: xs += 'G '
910                         else: xs += '  '
911                         xs += timeevent(pa.last_time, pa.last_event)
912                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
913
914                         if pi is None:
915                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
916                         else:
917                                 tbl.pirate(pi, xs)
918
919                 s += tbl.results()
920
921                 displayer.show(s)
922                 sleep(1)
923                 rotate_nya = rotate_nya[1:3] + rotate_nya[0]
924
925 #---------- main program ----------
926
927 def main():
928         global opts, fetcher
929
930         pa = OptionParser(
931 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
932 actions:
933  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
934  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
935  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
936  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
937  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
938
939 display modes (for --display) apply to ship-aid:
940  --display=dumb       just print new information, scrolling the screen
941  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top
942 ''')
943         ao = pa.add_option
944         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
945                 help='select ocean OCEAN')
946         ao('--cache-dir', dest='cache_dir', metavar='DIR',
947                 default='~/.yoweb-scrape-cache',
948                 help='cache yoweb pages in DIR')
949         ao('-D','--debug', action='count', dest='debug', default=0,
950                 help='enable debugging output')
951         ao('--debug-fd', type='int', dest='debug_fd',
952                 help='write any debugging output to specified fd')
953         ao('-q','--quiet', action='store_true', dest='quiet',
954                 help='suppress warning output')
955         ao('--display', action='store', dest='display',
956                 type='choice', choices=['dumb','overwrite'],
957                 help='how to display ship aid')
958
959         ao('--ship-duty', action='store_true', dest='ship_duty',
960                 help='show ship duty station puzzles')
961         ao('--all-puzzles', action='store_false', dest='ship_duty',
962                 help='show all puzzles, not just ship duty stations')
963
964         (opts,args) = pa.parse_args()
965         random.seed()
966
967         if len(args) < 1:
968                 pa.error('need a mode argument')
969
970         if opts.debug_fd is not None:
971                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
972         else:
973                 opts.debug_file = sys.stdout
974
975         mode = args[0]
976         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
977         try: mode_fn = globals()[mode_fn_name]
978         except KeyError: pa.error('unknown mode "%s"' % mode)
979
980         # fixed parameters
981         opts.min_max_age = 60
982         opts.expire_age = 3600
983         opts.ship_reboard_clearout = 3600
984
985         if opts.cache_dir.startswith('~/'):
986                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
987
988         if opts.display is None:
989                 if ((opts.debug > 0 and opts.debug_fd is None)
990                     or not os.isatty(sys.stdout.fileno())):
991                         opts.display = 'dumb'
992                 else:
993                         opts.display = 'overwrite'
994
995         fetcher = Fetcher(opts.ocean, opts.cache_dir)
996
997         mode_fn(args[1:], pa.error)
998
999 main()