chiark / gitweb /
yoweb-scrape: add a markup massager to cope with broken html from yoweb
[ypp-sc-tools.db-test.git] / yoweb-scrape
1 #!/usr/bin/python
2 # This is part of ypp-sc-tools, a set of third-party tools for assisting
3 # players of Yohoho Puzzle Pirates.
4 #
5 # Copyright (C) 2009 Ian Jackson <ijackson@chiark.greenend.org.uk>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 # Yohoho and Puzzle Pirates are probably trademarks of Three Rings and
21 # are used without permission.  This program is not endorsed or
22 # sponsored by Three Rings.
23
24 copyright_info = '''
25 yoweb-scrape is part of ypp-sc-tools  Copyright (C) 2009 Ian Jackson
26 This program comes with ABSOLUTELY NO WARRANTY; this is free software,
27 and you are welcome to redistribute it under certain conditions.
28 For details, read the top of the yoweb-scrape file.
29 '''
30
31 #---------- setup ----------
32
33 import signal
34 signal.signal(signal.SIGINT, signal.SIG_DFL)
35
36 import os
37 import time
38 import urllib
39 import urllib2
40 import errno
41 import sys
42 import re as regexp
43 import random
44 import curses
45 import termios
46 import random
47 import subprocess
48 import copy
49 from optparse import OptionParser
50 from StringIO import StringIO
51
52 from BeautifulSoup import BeautifulSoup
53
54 opts = None
55
56 #---------- YPP parameters and arrays ----------
57
58 puzzles = ('Swordfighting/Bilging/Sailing/Rigging/Navigating'+
59         '/Battle Navigation/Gunning/Carpentry/Rumble/Treasure Haul'+
60         '/Drinking/Spades/Hearts/Treasure Drop/Poker/Distilling'+
61         '/Alchemistry/Shipwrightery/Blacksmithing/Foraging').split('/')
62
63 core_duty_puzzles = [
64                 'Gunning',
65                 ['Sailing','Rigging'],
66                 'Bilging',
67                 'Carpentry',
68                 ]
69
70 duty_puzzles = ([ 'Navigating', 'Battle Navigation' ] +
71                 core_duty_puzzles +
72                 [ 'Treasure Haul' ])
73
74 standingvals = ('Able/Proficient/Distinguished/Respected/Master'+
75                 '/Renowned/Grand-Master/Legendary/Ultimate').split('/')
76 standing_limit = len(standingvals)
77
78 pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
79
80 max_pirate_namelen = 12
81
82
83 #---------- general utilities ----------
84
85 def debug(m):
86         if opts.debug > 0:
87                 print >>opts.debug_file, m
88
89 def debug_flush():
90         if opts.debug > 0:
91                 opts.debug_file.flush() 
92
93 def sleep(seconds):
94         debug_flush()
95         time.sleep(seconds)
96
97 def format_time_interval(ti):
98         if ti < 120: return '%d:%02d' % (ti / 60, ti % 60)
99         if ti < 7200: return '%2dm' % (ti / 60)
100         if ti < 86400: return '%dh' % (ti / 3600)
101         return '%dd' % (ti / 86400)
102
103 def yppsc_dir():
104         lib = os.getenv("YPPSC_YARRG_SRCBASE")
105         if lib is not None: return lib
106         lib = sys.argv[0] 
107         lib = regexp.sub('/[^/]+$', '', lib)
108         os.environ["YPPSC_YARRG_SRCBASE"] = lib
109         return lib
110
111 soup_massage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
112 soup_massage.append(
113                 (regexp.compile('(\<td.*") ("center")'),
114                  lambda m: m.group(1)+' align='+m.group(2))
115         )
116
117 def make_soup(*args, **kwargs):
118         return BeautifulSoup(*args,
119                 convertEntities=BeautifulSoup.HTML_ENTITIES,
120                 markupMassage=soup_massage,
121                          **kwargs)
122
123 #---------- caching and rate-limiting data fetcher ----------
124
125 class Fetcher:
126         def __init__(self, cachedir):
127                 debug('Fetcher init %s' % cachedir)
128                 self.cachedir = cachedir
129                 try: os.mkdir(cachedir)
130                 except (OSError,IOError), oe:
131                         if oe.errno != errno.EEXIST: raise
132                 self._cache_scan(time.time())
133
134         def _cache_scan(self, now):
135                 # returns list of ages, unsorted
136                 ages = []
137                 debug('Fetcher   scan_cache')
138                 for leaf in os.listdir(self.cachedir):
139                         if not leaf.startswith('#'): continue
140                         path = self.cachedir + '/' + leaf
141                         try: s = os.stat(path)
142                         except (OSError,IOError), oe:
143                                 if oe.errno != errno.ENOENT: raise
144                                 continue
145                         age = now - s.st_mtime
146                         if age > opts.expire_age:
147                                 debug('Fetcher    expire %d %s' % (age, path))
148                                 try: os.remove(path)
149                                 except (OSError,IOError), oe:
150                                         if oe.errno != errno.ENOENT: raise
151                                 continue
152                         ages.append(age)
153                 return ages
154
155         def need_wait(self, now, imaginary=[]):
156                 ages = self._cache_scan(now)
157                 ages += imaginary
158                 ages.sort()
159                 debug('Fetcher   ages ' + `ages`)
160                 min_age = 1
161                 need_wait = 0
162                 for age in ages:
163                         if age < min_age and age <= 5:
164                                 debug('Fetcher   morewait min=%d age=%d' %
165                                         (min_age, age))
166                                 need_wait = max(need_wait, min_age - age)
167                         min_age += 3
168                         min_age *= 1.25
169                 if need_wait > 0:
170                         need_wait += random.random() - 0.5
171                 return need_wait
172
173         def _rate_limit_cache_clean(self, now):
174                 need_wait = self.need_wait(now)
175                 if need_wait > 0:
176                         debug('Fetcher   wait %d' % need_wait)
177                         sleep(need_wait)
178
179         def fetch(self, url, max_age):
180                 debug('Fetcher fetch %s' % url)
181                 cache_corename = urllib.quote_plus(url)
182                 cache_item = "%s/#%s#" % (self.cachedir, cache_corename)
183                 try: f = file(cache_item, 'r')
184                 except (OSError,IOError), oe:
185                         if oe.errno != errno.ENOENT: raise
186                         f = None
187                 now = time.time()
188                 max_age = max(opts.min_max_age, min(max_age, opts.expire_age))
189                 if f is not None:
190                         s = os.fstat(f.fileno())
191                         age = now - s.st_mtime
192                         if age > max_age:
193                                 debug('Fetcher  stale %d < %d'% (max_age, age))
194                                 f = None
195                 if f is not None:
196                         data = f.read()
197                         f.close()
198                         debug('Fetcher  cached %d > %d' % (max_age, age))
199                         return data
200
201                 debug('Fetcher  fetch')
202                 self._rate_limit_cache_clean(now)
203
204                 stream = urllib2.urlopen(url)
205                 data = stream.read()
206                 cache_tmp = "%s/#%s~%d#" % (
207                         self.cachedir, cache_corename, os.getpid())
208                 f = file(cache_tmp, 'w')
209                 f.write(data)
210                 f.close()
211                 os.rename(cache_tmp, cache_item)
212                 debug('Fetcher  stored')
213                 return data
214
215 class Yoweb(Fetcher):
216         def __init__(self, ocean, cachedir):
217                 debug('Yoweb init %s' % cachedir)
218                 self.ocean = ocean
219                 Fetcher.__init__(self, cachedir)
220
221         def default_ocean(self, ocean='ice'):
222                 if self.ocean is None:
223                         self.ocean = ocean
224
225         def yoweb(self, kind, tail, max_age):
226                 self.default_ocean()
227                 assert(self.ocean)
228                 url = 'http://%s.puzzlepirates.com/yoweb/%s%s' % (
229                         self.ocean, kind, tail)
230                 return self.fetch(url, max_age)
231
232 class Yppedia(Fetcher):
233         def __init__(self, cachedir):
234                 debug('Yoweb init %s' % cachedir)
235                 self.base = 'http://yppedia.puzzlepirates.com/'
236                 self.localhtml = opts.localhtml
237                 Fetcher.__init__(self, cachedir)
238
239         def __call__(self, rhs):
240                 if self.localhtml is None:
241                         url = self.base + rhs
242                         debug('Yppedia retrieving YPP '+url);
243                         return self.fetch(url, 3000)
244                 else:
245                         return file(opts.localhtml + '/' + rhs, 'r')
246
247 #---------- logging assistance for troubled screenscrapers ----------
248
249 class SoupLog:
250         def __init__(self):
251                 self.msgs = [ ]
252         def msg(self, m):
253                 self.msgs.append(m)
254         def soupm(self, obj, m):
255                 self.msg(m + '; in ' + `obj`)
256         def needs_msgs(self, child_souplog):
257                 self.msgs += child_souplog.msgs
258                 child_souplog.msgs = [ ]
259
260 def soup_text(obj):
261         str = ''.join(obj.findAll(text=True))
262         return str.strip()
263
264 class SomethingSoupInfo(SoupLog):
265         def __init__(self, kind, tail, max_age):
266                 SoupLog.__init__(self)
267                 html = fetcher.yoweb(kind, tail, max_age)
268                 self._soup = make_soup(html)
269
270 #---------- scraper for pirate pages ----------
271
272 class PirateInfo(SomethingSoupInfo):
273         # Public data members:
274         #  pi.standings = { 'Treasure Haul': 'Able' ... }
275         #  pi.name = name
276         #  pi.crew = (id, name)
277         #  pi.flag = (id, name)
278         #  pi.msgs = [ 'message describing problem with scrape' ]
279                 
280         def __init__(self, pirate, max_age=300):
281                 SomethingSoupInfo.__init__(self,
282                         'pirate.wm?target=', pirate, max_age)
283                 self.name = pirate
284                 self._find_standings()
285                 self.crew = self._find_crewflag('crew',
286                         '^/yoweb/crew/info\\.wm')
287                 self.flag = self._find_crewflag('flag',
288                         '^/yoweb/flag/info\\.wm')
289
290         def _find_standings(self):
291                 imgs = self._soup.findAll('img',
292                         src=regexp.compile('/yoweb/images/stat.*'))
293                 re = regexp.compile(
294 u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
295                         )
296                 standings = { }
297
298                 for skill in puzzles:
299                         standings[skill] = [ ]
300
301                 skl = SoupLog()
302
303                 for img in imgs:
304                         try: puzzle = img['alt']
305                         except KeyError: continue
306
307                         if not puzzle in puzzles:
308                                 skl.soupm(img, 'unknown puzzle: "%s"' % puzzle)
309                                 continue
310                         key = img.findParent('td')
311                         if key is None:
312                                 skl.soupm(img, 'puzzle at root! "%s"' % puzzle)
313                                 continue
314                         valelem = key.findNextSibling('td')
315                         if valelem is None:
316                                 skl.soupm(key, 'puzzle missing sibling "%s"'
317                                         % puzzle)
318                                 continue
319                         valstr = soup_text(valelem)
320                         match = re.match(valstr)
321                         if match is None:
322                                 skl.soupm(key, ('puzzle "%s" unparseable'+
323                                         ' standing "%s"') % (puzzle, valstr))
324                                 continue
325                         standing = match.group(match.lastindex)
326                         standings[puzzle].append(standing)
327
328                 self.standings = { }
329
330                 for puzzle in puzzles:
331                         sl = standings[puzzle]
332                         if len(sl) > 1:
333                                 skl.msg('puzzle "%s" multiple standings %s' %
334                                                 (puzzle, `sl`))
335                                 continue
336                         if not sl:
337                                 skl.msg('puzzle "%s" no standing found' % puzzle)
338                                 continue
339                         standing = sl[0]
340                         for i in range(0, standing_limit):
341                                 if standing == standingvals[i]:
342                                         self.standings[puzzle] = i
343                         if not puzzle in self.standings:
344                                 skl.msg('puzzle "%s" unknown standing "%s"' %
345                                         (puzzle, standing))
346
347                 all_standings_ok = True
348                 for puzzle in puzzles:
349                         if not puzzle in self.standings:
350                                 self.needs_msgs(skl)
351
352         def _find_crewflag(self, cf, yoweb_re):
353                 things = self._soup.findAll('a', href=regexp.compile(yoweb_re))
354                 if len(things) != 1:
355                         self.msg('zero or several %s id references found' % cf)
356                         return None
357                 thing = things[0]
358                 id_re = '\\b%sid\\=(\\w+)$' % cf
359                 id_haystack = thing['href']
360                 match = regexp.compile(id_re).search(id_haystack)
361                 if match is None:
362                         self.soupm(thing, ('incomprehensible %s id ref'+
363                                 ' (%s in %s)') % (cf, id_re, id_haystack))
364                         return None
365                 name = soup_text(thing)
366                 return (match.group(1), name)
367
368         def __str__(self):
369                 return `(self.crew, self.flag, self.standings, self.msgs)`
370
371 #---------- scraper for crew pages ----------
372
373 class CrewInfo(SomethingSoupInfo):
374         # Public data members:
375         #  ci.crewid
376         #  ci.crew = [ ('Captain',        ['Pirate', ...]),
377         #              ('Senior Officer', [...]),
378         #               ... ]
379         #  pi.msgs = [ 'message describing problem with scrape' ]
380
381         def __init__(self, crewid, max_age=300):
382                 self.crewid = crewid
383                 SomethingSoupInfo.__init__(self,
384                         'crew/info.wm?crewid=', crewid, max_age)
385                 self._find_crew()
386
387         def _find_crew(self):
388                 self.crew = []
389                 capts = self._soup.findAll('img',
390                         src='/yoweb/images/crew-captain.png')
391                 if len(capts) != 1:
392                         self.msg('crew members: no. of captain images != 1')
393                         return
394                 tbl = capts[0]
395                 while not tbl.find('a', href=pirate_ref_re):
396                         tbl = tbl.findParent('table')
397                         if not tbl:
398                                 self.msg('crew members: cannot find table')
399                                 return
400                 current_rank_crew = None
401                 crew_rank_re = regexp.compile('/yoweb/images/crew')
402                 for row in tbl.contents:
403                         # findAll(recurse=False)
404                         if isinstance(row,basestring):
405                                 continue
406
407                         is_rank = row.find('img', attrs={'src': crew_rank_re})
408                         if is_rank:
409                                 rank = soup_text(row)
410                                 current_rank_crew = []
411                                 self.crew.append((rank, current_rank_crew))
412                                 continue
413                         for cell in row.findAll('a', href=pirate_ref_re):
414                                 if current_rank_crew is None:
415                                         self.soupm(cell, 'crew members: crew'
416                                                 ' before rank')
417                                         continue
418                                 current_rank_crew.append(soup_text(cell))
419
420         def __str__(self):
421                 return `(self.crew, self.msgs)`
422
423 class FlagRelation():
424         # Public data members (put there by hand by creater)
425         #       other_flagname
426         #       other_flagid
427         #       yoweb_heading
428         #       this_declaring
429         #       other_declaring_min
430         #       other_declaring_max
431         # where {this,other}_declaring{,_min,_max} are:
432         #       -1      {this,other} is declaring war
433         #        0      {this,other} is not doing either
434         #       +1      {this,other} is allying
435         def __repr__(self):
436                 return '<FlagRelation %s %d/%d..%d %s %s>' % (
437                         self.yoweb_heading, self.this_declaring,
438                         self.other_declaring_min, self.other_declaring_max,
439                         self.other_flagname, self.other_flagid)
440
441 class FlagInfo(SomethingSoupInfo):
442         # Public data members (after init):
443         #
444         #   flagid
445         #   name        #               string
446         #
447         #   relations[n] = FlagRelation
448         #   relation_byname[otherflagname] = relations[some_n]
449         #   relation_byid[otherflagname] = relations[some_n]
450         #
451         #   islands[n] = (islandname, islandid)
452         #
453         def __init__(self, flagid, max_age=600):
454                 self.flagid = flagid
455                 SomethingSoupInfo.__init__(self,
456                         'flag/info.wm?flagid=', flagid, max_age)
457                 self._find_flag()
458
459         def _find_flag(self):
460                 font2 = self._soup.find('font',{'size':'+2'})
461                 self.name = font2.find('b').contents[0]
462
463                 self.relations = [ ]
464                 self.relation_byname = { }
465                 self.relation_byid = { }
466                 self.islands = [ ]
467
468                 magnate = self._soup.find('img',{'src':
469                         '/yoweb/images/repute-MAGNATE.png'})
470                 warinfo = (magnate.findParent('table').findParent('tr').
471                         findNextSibling('tr').findNext('td',{'align':'left'}))
472
473                 def warn(m):
474                         print >>sys.stderr, 'WARNING: '+m
475
476                 def wi_warn(head, waritem):
477                         warn('unknown warmap item: %s: %s' % 
478                                 (`head`, ``waritem``))
479
480                 def wihelp_item(waritem, thing):
481                         url = waritem.get('href', None)
482                         if url is None:
483                                 return ('no url for '+thing,None,None)
484                         m = regexp.search('\?'+thing+'id=(\d+)$', url)
485                         if not m: return ('no '+thing+'id',None,None)
486                         tid = m.group(1)
487                         tname = waritem.string
488                         if tname is None:
489                                 return (thing+' name not just string',None,None)
490                         return (None,tid,tname)
491
492                 def wi_alwar(head, waritem, thisdecl, othermin, othermax):
493                         (err,flagid,flagname) = wihelp_item(waritem,'flag')
494                         if err: return err
495                         rel = self.relation_byid.get(flagid, None)
496                         if rel: return 'flag id twice!'
497                         if flagname in self.relation_byname:
498                                 return 'flag name twice!'
499                         rel = FlagRelation()
500                         rel.other_flagname = flagname
501                         rel.other_flagid = flagid
502                         rel.yoweb_heading = head
503                         rel.this_declaring = thisdecl
504                         rel.other_declaring_min = othermin
505                         rel.other_declaring_max = othermax
506                         self.relations.append(rel)
507                         self.relation_byid[flagid] = rel
508                         self.relation_byname[flagid] = rel
509
510                 def wi_isle(head, waritem):
511                         (err,isleid,islename) = wihelp_item(waritem,'island')
512                         if err: return err
513                         self.islands.append((isleid,islename))
514
515                 warmap = {
516                         'Allied with':                  (wi_alwar,+1,+1,+1),
517                         'Declaring war against':        (wi_alwar,-1, 0,+1),
518                         'At war with':                  (wi_alwar,-1,-1,-1),
519                         'Trying to form an alliance with': (wi_alwar,+1,-1,0),
520                         'Islands controlled by this flag': (wi_isle,),
521                         }
522
523                 how = (wi_warn, None)
524
525                 for waritem in warinfo.findAll(['font','a']):
526                         if waritem is None: break
527                         if waritem.name == 'font':
528                                 colour = waritem.get('color',None)
529                                 if colour.lstrip('#') != '958A5F':
530                                         warn('strange colour %s in %s' %
531                                                 (colour,``waritem``))
532                                         continue
533                                 head = waritem.string
534                                 if head is None:
535                                         warn('no head string in '+``waritem``)
536                                         continue
537                                 head = regexp.sub('\\s+', ' ', head).strip()
538                                 head = head.rstrip(':')
539                                 how = (head,) + warmap.get(head, (wi_warn,))
540                                 continue
541                         assert(waritem.name == 'a')                             
542
543                         debug('WARHOW %s(%s, waritem, *%s)' %
544                                 (how[1], `how[0]`, `how[2:]`))
545                         bad = how[1](how[0], waritem, *how[2:])
546                         if bad:
547                                 warn('bad waritem %s: %s: %s' % (`how[0]`,
548                                         bad, ``waritem``))
549
550         def __str__(self):
551                 return `(self.name, self.islands, self.relations)`
552
553 #---------- scraper for ocean info incl. embargoes etc. ----------
554
555 class IslandBasicInfo():
556         # Public data attributes:
557         #  ocean
558         #  name
559         # Public data attributes maybe set by caller:
560         #  arch
561         def __init__(self, ocean, islename):
562                 self.ocean = ocean
563                 self.name = islename
564         def yppedia(self):
565                 def q(x): return urllib.quote(x.replace(' ','_'))
566                 url_rhs = q(self.name) + '_(' + q(self.ocean) + ')'
567                 return yppedia(url_rhs)
568         def __str__(self):
569                 return `(self.ocean, self.name)`
570
571 class IslandExtendedInfo(IslandBasicInfo):
572         # Public data attributes (inherited):
573         #  ocean
574         #  name
575         # Public data attributes (additional):
576         #  islandid
577         #  yoweb_url
578         #  flagid
579         def __init__(self, ocean, islename):
580                 IslandBasicInfo.__init__(self, ocean, islename)
581                 self.islandid = None
582                 self.yoweb_url = None
583                 self._collect_yoweb()
584                 self._collect_flagid()
585
586         def _collect_yoweb(self):
587                 debug('IEI COLLECT YOWEB '+`self.name`)
588                 self.islandid = None
589                 self.yoweb_url = None
590
591                 soup = make_soup(self.yppedia())
592                 content = soup.find('div', attrs = {'id': 'content'})
593                 yoweb_re = regexp.compile('^http://\w+\.puzzlepirates\.com/'+
594                         'yoweb/island/info\.wm\?islandid=(\d+)$')
595                 a = soup.find('a', attrs = { 'href': yoweb_re })
596                 if a is None:
597                         debug('IEI COLLECT YOWEB '+`self.name`+' NONE')
598                         return
599
600                 debug('IEI COLLECT YOWEB '+`self.name`+' GOT '+``a``)
601                 self.yoweb_url = a['href']
602                 m = yoweb_re.search(self.yoweb_url)
603                 self.islandid = m.group(1)
604
605         def _collect_flagid(self):
606                 self.flagid = None
607
608                 yo = self.yoweb_url
609                 debug('IEI COLLECT FLAGID '+`self.name`+' URL '+`yo`)
610                 if yo is None: return None
611                 dataf = fetcher.fetch(yo, 1800)
612                 soup = make_soup(dataf)
613                 ruler_re = regexp.compile(
614                         '/yoweb/flag/info\.wm\?flagid=(\d+)$')
615                 ruler = soup.find('a', attrs = { 'href': ruler_re })
616                 if not ruler: 
617                         debug('IEI COLLECT FLAGID '+`self.name`+' NONE')
618                         return
619                 debug('IEI COLLECT FLAGID '+`self.name`+' GOT '+``ruler``)
620                 m = ruler_re.search(ruler['href'])
621                 self.flagid = m.group(1)
622
623         def __str__(self):
624                 return `(self.ocean, self.islandid, self.name,
625                         self.yoweb_url, self.flagid)`
626
627 class IslandFlagInfo(IslandExtendedInfo):
628         # Public data attributes (inherited):
629         #  ocean
630         #  name
631         #  islandid
632         #  yoweb_url
633         #  flagid
634         # Public data attributes (additional):
635         #  flag
636         def __init__(self, ocean, islename):
637                 IslandExtendedInfo.__init__(self, ocean, islename)
638                 self.flag = None
639                 self._collect_flag()
640
641         def _collect_flag(self):
642                 if self.flagid is None: return
643                 self.flag = FlagInfo(self.flagid, 1800)
644
645         def __str__(self):
646                 return IslandExtendedInfo.__str__(self) + '; ' + str(self.flag)
647
648 class NullProgressReporter():
649         def doing(self, msg): pass
650         def stop(self): pass
651
652 class TypewriterProgressReporter():
653         def __init__(self):
654                 self._l = 0
655         def doing(self,m):
656                 self._doing(m + '...')
657         def _doing(self,m):
658                 self._write('\r')
659                 self._write(m)
660                 less = self._l - len(m)
661                 if less > 0:
662                         self._write(' ' * less)
663                         self._write('\b' * less)
664                 self._l = len(m)
665                 sys.stdout.flush()
666         def stop(self):
667                 self._doing('')
668                 self._l = 0
669         def _write(self,t):
670                 sys.stdout.write(t)
671
672 class OceanInfo():
673         # Public data attributes:
674         #   oi.islands[islename] = IslandInfo(...)
675         #   oi.arches[archname][islename] = IslandInfo(...)
676         def __init__(self, isleclass=IslandBasicInfo):
677                 self.isleclass = isleclass
678                 self.ocean = fetcher.ocean.lower().capitalize()
679
680                 progressreporter.doing('fetching ocean info')
681
682                 cmdl = ['./yppedia-ocean-scraper']
683                 if opts.localhtml is not None:
684                         cmdl += ['--local-html-dir',opts.localhtml]
685                 cmdl += [self.ocean]
686                 debug('OceanInfo collect running ' + `cmdl`)
687                 oscraper = subprocess.Popen(
688                         cmdl,
689                         stdout = subprocess.PIPE,
690                         cwd = yppsc_dir()+'/yarrg',
691                         shell=False, stderr=None,
692                         )
693                 h = oscraper.stdout.readline()
694                 debug('OceanInfo collect h '+`h`)
695                 assert(regexp.match('^ocean ', h))
696                 arch_re = regexp.compile('^ (\S.*)')
697                 island_re = regexp.compile('^  (\S.*)')
698
699                 oscraper.wait()
700                 assert(oscraper.returncode == 0)
701
702                 self.islands = { }
703                 self.arches = { }
704                 archname = None
705
706                 isles = [ ]
707                 progressreporter.doing('parsing ocean info')
708
709                 for l in oscraper.stdout:
710                         debug('OceanInfo collect l '+`l`)
711                         l = l.rstrip('\n')
712                         m = island_re.match(l)
713                         if m:
714                                 assert(archname is not None)
715                                 islename = m.group(1)
716                                 isles.append((archname, islename))
717                                 continue
718                         m = arch_re.match(l)
719                         if m:
720                                 archname = m.group(1)
721                                 assert(archname not in self.arches)
722                                 self.arches[archname] = { }
723                                 continue
724                         assert(False)
725
726                 for i in xrange(0, len(isles)-1):
727                         (archname, islename) = isles[i]
728                         progressreporter.doing(
729                                 'fetching isle info %2d/%d (%s: %s)'
730                                 % (i, len(isles), archname, islename))
731                         isle = self.isleclass(self.ocean, islename)
732                         isle.arch = archname
733                         self.islands[islename] = isle
734                         self.arches[archname][islename] = isle
735
736         def __str__(self):
737                 return `(self.islands, self.arches)`
738
739 #---------- pretty-printer for tables of pirate puzzle standings ----------
740
741 class StandingsTable:
742         def __init__(self, f, use_puzzles=None, col_width=6, gap_every=5):
743                 if use_puzzles is None:
744                         if opts.ship_duty:
745                                 use_puzzles=duty_puzzles
746                         else:
747                                 use_puzzles=puzzles
748                 self._puzzles = use_puzzles
749                 self.f = f
750                 self._cw = col_width-1
751                 self._gap_every = gap_every
752                 self._linecount = 0
753                 self._o = f.write
754
755         def _nl(self): self._o('\n')
756
757         def _pline(self, lhs, puzstrs, extra):
758                 if (self._linecount > 0
759                     and self._gap_every is not None
760                     and not (self._linecount % self._gap_every)):
761                         self._nl()
762                 self._o('%-*s' % (max(max_pirate_namelen+1, 15), lhs))
763                 for v in puzstrs:
764                         self._o(' %-*.*s' % (self._cw,self._cw, v))
765                 if extra:
766                         self._o(' ' + extra)
767                 self._nl()
768                 self._linecount += 1
769
770         def _puzstr(self, pi, puzzle):
771                 if not isinstance(puzzle,list): puzzle = [puzzle]
772                 try: standing = max([pi.standings[p] for p in puzzle])
773                 except KeyError: return '?'
774                 if not standing: return ''
775                 s = ''
776                 if self._cw > 4:
777                         c1 = standingvals[standing][0]
778                         if standing < 3: c1 = c1.lower() # 3 = Master
779                         s += `standing`
780                 if self._cw > 5:
781                         s += ' '
782                 s += '*' * (standing / 2)
783                 s += '+' * (standing % 2)
784                 return s
785
786         def headings(self, lhs='', rhs=None):
787                 def puzn_redact(name):
788                         if isinstance(name,list):
789                                 return '/'.join(
790                                         ["%.*s" % (self._cw/2, puzn_redact(n))
791                                          for n in name])
792                         spc = name.find(' ')
793                         if spc < 0: return name
794                         return name[0:min(4,spc)] + name[spc+1:]
795                 self._linecount = -2
796                 self._pline(lhs, map(puzn_redact, self._puzzles), rhs)
797                 self._linecount = 0
798         def literalline(self, line):
799                 self._o(line)
800                 self._nl()
801                 self._linecount = 0
802         def pirate_dummy(self, name, standingstring, extra=None):
803                 standings = standingstring * len(self._puzzles)
804                 self._pline(' '+name, standings, extra)
805         def pirate(self, pi, extra=None):
806                 puzstrs = [self._puzstr(pi,puz) for puz in self._puzzles]
807                 self._pline(' '+pi.name, puzstrs, extra)
808
809
810 #---------- chat log parser ----------
811
812 class PirateAboard:
813         # This is essentially a transparent, dumb, data class.
814         #  pa.v                 may be None
815         #  pa.name
816         #  pa.last_time
817         #  pa.last_event
818         #  pa.gunner
819         #  pa.last_chat_time
820         #  pa.last_chat_chan
821         #  pa.pi
822
823         # Also used for jobbing applicants:
824         #               happens when                    expires (to "-")
825         #   -            disembark, leaves crew          no
826         #   aboard       evidence of them being aboard   no
827         #   applied      "has applied for the job"       120s, configurable
828         #   ashore       "has taken a job"               30min, configurable
829         #   declined     "declined the job offer"        30s, configurable
830         #   invited      "has been invited to job"       120s, configurable
831         #
832         #  pa.jobber    None, 'ashore', 'applied', 'invited', 'declined'
833         #  pa.expires   expiry time time
834
835         def __init__(pa, pn, v, time, event):
836                 pa.name = pn
837                 pa.v = v
838                 pa.last_time = time
839                 pa.last_event = event
840                 pa.last_chat_time = None
841                 pa.last_chat_chan = None
842                 pa.gunner = False
843                 pa.pi = None
844                 pa.jobber = None
845                 pa.expires = None
846
847         def pirate_info(pa):
848                 now = time.time()
849                 if pa.pi:
850                         age = now - pa.pi_fetched
851                         guide = random.randint(120,240)
852                         if age <= guide:
853                                 return pa.pi
854                         debug('PirateAboard refresh %d > %d  %s' % (
855                                 age, guide, pa.name))
856                         imaginary = [2,4]
857                 else:
858                         imaginary = [1]
859                 wait = fetcher.need_wait(now, imaginary)
860                 if wait:
861                         debug('PirateAboard fetcher not ready %d' % wait)
862                         return pa.pi
863                 pa.pi = PirateInfo(pa.name, 600)
864                 pa.pi_fetched = now
865                 return pa.pi
866
867 class ChatLogTracker:
868         # This is quite complex so we make it opaque.  Use the
869         # official invokers, accessors etc.
870
871         def __init__(self, myself_pi, logfn):
872                 self._pl = {}   # self._pl['Pirate'] =
873                 self._vl = {}   #   self._vl['Vessel']['Pirate'] = PirateAboard
874                                 # self._vl['Vessel']['#lastinfo']
875                                 # self._vl['Vessel']['#name']
876                                 # self._v = self._vl[self._vessel]
877                 self._date = None
878                 self._myself = myself_pi
879                 self._lbuf = ''
880                 self._f = file(logfn)
881                 flen = os.fstat(self._f.fileno()).st_size
882                 max_backlog = 500000
883                 if flen > max_backlog:
884                         startpos = flen - max_backlog
885                         self._f.seek(startpos)
886                         self._f.readline()
887                 self._progress = [0, flen - self._f.tell()]
888                 self._disembark_myself()
889                 self._need_redisplay = False
890                 self._lastvessel = None
891
892         def _disembark_myself(self):
893                 self._v = None
894                 self._vessel = None
895                 self.force_redisplay()
896
897         def force_redisplay(self):
898                 self._need_redisplay = True
899
900         def _vessel_updated(self, v, timestamp):
901                 if v is None: return
902                 v['#lastinfo'] = timestamp
903                 self.force_redisplay()
904
905         def _onboard_event(self,v,timestamp,pirate,event,jobber=None):
906                 pa = self._pl.get(pirate, None)
907                 if pa is not None and pa.v is v:
908                         pa.last_time = timestamp
909                         pa.last_event = event
910                 else:
911                         if pa is not None and pa.v is not None:
912                                 del pa.v[pirate]
913                         pa = PirateAboard(pirate, v, timestamp, event)
914                         self._pl[pirate] = pa
915                         if v is not None: v[pirate] = pa
916                 pa.jobber = jobber
917
918                 if jobber is None: timeout = None
919                 else: timeout = getattr(opts, 'timeout_'+jobber)
920                 if timeout is None: pa.expires = None
921                 else: pa.expires = timestamp + timeout
922                 self._vessel_updated(v, timestamp)
923                 return pa
924
925         def _expire_jobbers(self, now):
926                 for pa in self._pl.values():
927                         if pa.expires is None: continue
928                         if pa.expires >= now: continue
929                         v = pa.v
930                         del self._pl[pa.name]
931                         if v is not None: del v[pa.name]
932                         self.force_redisplay()
933
934         def _trash_vessel(self, v):
935                 for pn in v:
936                         if pn.startswith('#'): continue
937                         del self._pl[pn]
938                 vn = v['#name']
939                 del self._vl[vn]
940                 if v is self._v: self._disembark_myself()
941                 self.force_redisplay()
942
943         def _vessel_stale(self, v, timestamp):
944                 return timestamp - v['#lastinfo'] > opts.ship_reboard_clearout
945
946         def _vessel_check_expire(self, v, timestamp):
947                 if not self._vessel_stale(v, timestamp):
948                         return v
949                 self._debug_line_disposition(timestamp,'',
950                         'stale-reset ' + v['#name'])
951                 self._trash_vessel(v)
952                 return None
953
954         def expire_garbage(self, timestamp):
955                 for v in self._vl.values():
956                         self._vessel_check_expire(v, timestamp)
957
958         def _vessel_lookup(self, vn, timestamp, dml=[], create=False):
959                 v = self._vl.get(vn, None)
960                 if v is not None:
961                         v = self._vessel_check_expire(v, timestamp)
962                 if v is not None:
963                         dml.append('found')
964                         return v
965                 if not create:
966                         dml.append('no')
967                 dml.append('new')
968                 self._vl[vn] = v = { '#name': vn }
969                 self._vessel_updated(v, timestamp)
970                 return v
971
972         def _find_matching_vessel(self, pattern, timestamp, cmdr,
973                                         dml=[], create=False):
974                 # use when a commander pirate `cmdr' specified a vessel
975                 #  by name `pattern' (either may be None)
976                 # if create is true, will create the vessel
977                 #  record if an exact name is specified
978
979                 if (pattern is not None and
980                     not '*' in pattern
981                     and len(pattern.split(' ')) == 2):
982                         vn = pattern.title()
983                         dml.append('exact')
984                         return self._vessel_lookup(
985                                 vn, timestamp, dml=dml, create=create)
986
987                 if pattern is None:
988                         pattern_check = lambda vn: True
989                 else:
990                         re = '(?:.* )?%s$' % pattern.lower().replace('*','.+')
991                         pattern_check = regexp.compile(re, regexp.I).match
992
993                 tries = []
994
995                 cmdr_pa = self._pl.get(cmdr, None)
996                 if cmdr_pa: tries.append((cmdr_pa.v, 'cmdr'))
997
998                 tries.append((self._v, 'here'))
999                 tried_vns = []
1000
1001                 for (v, dm) in tries:
1002                         if v is None: dml.append(dm+'?'); continue
1003                         
1004                         vn = v['#name']
1005                         if not pattern_check(vn):
1006                                 tried_vns.append(vn)
1007                                 dml.append(dm+'#')
1008                                 continue
1009
1010                         dml.append(dm+'!')
1011                         return v
1012
1013                 if pattern is not None and '*' in pattern:
1014                         search = [
1015                                 (vn,v)
1016                                 for (vn,v) in self._vl.iteritems()
1017                                 if not self._vessel_stale(v, timestamp)
1018                                 if pattern_check(vn)
1019                                 ]
1020                         #debug('CLT-RE /%s/ wanted (%s) searched (%s)' % (
1021                         #       re,
1022                         #       '/'.join(tried_vns),
1023                         #       '/'.join([vn for (vn,v) in search])))
1024
1025                         if len(search)==1:
1026                                 dml.append('one')
1027                                 return search[0][1]
1028                         elif search:
1029                                 dml.append('many')
1030                         else:
1031                                 dml.append('none')
1032
1033         def _debug_line_disposition(self,timestamp,l,m):
1034                 debug('CLT %13s %-40s %s' % (timestamp,m,l))
1035
1036         def _rm_crew_l(self,re,l):
1037                 m = regexp.match(re,l)
1038                 if m and m.group(2) == self._myself.crew[1]:
1039                         return m.group(1)
1040                 else:
1041                         return None
1042
1043         def local_command(self, metacmd):
1044                 # returns None if all went well, or problem message
1045                 return self._command(self._myself.name, metacmd,
1046                         "local", time.time(), 
1047                         (lambda m: debug('CMD %s' % metacmd)))
1048
1049         def _command(self, cmdr, metacmd, chan, timestamp, d):
1050                 # returns None if all went well, or problem message
1051                 metacmd = regexp.sub('\\s+', ' ', metacmd).strip()
1052                 m2 = regexp.match(
1053                     '/([adj]) (?:([A-Za-z* ]+)\\s*:)?([A-Za-z ]+)$',
1054                     metacmd)
1055                 if not m2: return "unknown syntax or command"
1056
1057                 (cmd, pattern, targets) = m2.groups()
1058                 dml = ['cmd', chan, cmd]
1059
1060                 if cmd == 'a': each = self._onboard_event
1061                 elif cmd == 'd': each = disembark
1062                 else: each = lambda *l: self._onboard_event(*l,
1063                                 **{'jobber':'applied'})
1064
1065                 if cmdr == self._myself.name:
1066                         dml.append('self')
1067                         how = 'cmd: %s' % cmd
1068                 else:
1069                         dml.append('other')
1070                         how = 'cmd: %s %s' % (cmd,cmdr)
1071
1072                 if cmd == 'j':
1073                         if pattern is not None:
1074                                 return "/j command does not take a vessel"
1075                         v = None
1076                 else:
1077                         v = self._find_matching_vessel(
1078                                 pattern, timestamp, cmdr,
1079                                 dml, create=True)
1080
1081                 if cmd == 'j' or v is not None:
1082                         targets = targets.strip().split(' ')
1083                         dml.append(`len(targets)`)
1084                         for target in targets:
1085                                 each(v, timestamp, target.title(), how)
1086                         self._vessel_updated(v, timestamp)
1087
1088                 dm = ' '.join(dml)
1089                 return d(dm)
1090
1091                 return None
1092
1093         def chatline(self,l):
1094                 rm = lambda re: regexp.match(re,l)
1095                 d = lambda m: self._debug_line_disposition(timestamp,l,m)
1096                 rm_crew = lambda re: self._rm_crew_l(re,l)
1097                 timestamp = None
1098
1099                 m = rm('=+ (\\d+)/(\\d+)/(\\d+) =+$')
1100                 if m:
1101                         self._date = [int(x) for x in m.groups()]
1102                         self._previous_timestamp = None
1103                         return d('date '+`self._date`)
1104
1105                 if self._date is None:
1106                         return d('date unset')
1107
1108                 m = rm('\\[(\d\d):(\d\d):(\d\d)\\] ')
1109                 if not m:
1110                         return d('no timestamp')
1111
1112                 while True:
1113                         time_tuple = (self._date +
1114                                       [int(x) for x in m.groups()] +
1115                                       [-1,-1,-1])
1116                         timestamp = time.mktime(time_tuple)
1117                         if timestamp >= self._previous_timestamp: break
1118                         self._date[2] += 1
1119                         self._debug_line_disposition(timestamp,'',
1120                                 'new date '+`self._date`)
1121
1122                 self._previous_timestamp = timestamp
1123
1124                 l = l[l.find(' ')+1:]
1125
1126                 def ob_x(pirate,event):
1127                         return self._onboard_event(
1128                                         self._v, timestamp, pirate, event)
1129                 def ob1(did): ob_x(m.group(1), did); return d(did)
1130                 def oba(did): return ob1('%s %s' % (did, m.group(2)))
1131
1132                 def jb(pirate,jobber):
1133                         return self._onboard_event(
1134                                 None, timestamp, pirate,
1135                                 ("jobber %s" % jobber),
1136                                 jobber=jobber
1137                                 )
1138
1139                 def disembark(v, timestamp, pirate, event):
1140                         self._onboard_event(
1141                                         v, timestamp, pirate, 'leaving '+event)
1142                         del v[pirate]
1143                         del self._pl[pirate]
1144
1145                 def disembark_me(why):
1146                         self._disembark_myself()
1147                         return d('disembark-me '+why)
1148
1149                 m = rm('Going aboard the (\\S.*\\S)\\.\\.\\.$')
1150                 if m:
1151                         dm = ['boarding']
1152                         pn = self._myself.name
1153                         vn = m.group(1)
1154                         v = self._vessel_lookup(vn, timestamp, dm, create=True)
1155                         self._lastvessel = self._vessel = vn
1156                         self._v = v
1157                         ob_x(pn, 'we boarded')
1158                         self.expire_garbage(timestamp)
1159                         return d(' '.join(dm))
1160
1161                 if self._v is None:
1162                         return d('no vessel')
1163
1164                 m = rm('(\\w+) has come aboard\\.$')
1165                 if m: return ob1('boarded');
1166
1167                 m = rm('You have ordered (\\w+) to do some (\\S.*\\S)\\.$')
1168                 if m:
1169                         (who,what) = m.groups()
1170                         pa = ob_x(who,'ord '+what)
1171                         if what == 'Gunning':
1172                                 pa.gunner = True
1173                         return d('duty order')
1174
1175                 m = rm('(\\w+) abandoned a (\\S.*\\S) station\\.$')
1176                 if m: oba('stopped'); return d("end")
1177
1178                 def chat_core(speaker, chan):
1179                         try: pa = self._pl[speaker]
1180                         except KeyError: return 'mystery'
1181                         if pa.v is not None and pa.v is not self._v:
1182                                 return 'elsewhere'
1183                         pa.last_chat_time = timestamp
1184                         pa.last_chat_chan = chan
1185                         self.force_redisplay()
1186                         return 'here'
1187
1188                 def chat(chan):
1189                         speaker = m.group(1)
1190                         dm = chat_core(speaker, chan)
1191                         return d('chat %s %s' % (chan, dm))
1192
1193                 def chat_metacmd(chan):
1194                         (cmdr, metacmd) = m.groups()
1195                         whynot = self._command(
1196                                 cmdr, metacmd, chan, timestamp, d)
1197                         if whynot is not None:
1198                                 return chat(chan)
1199                         else:
1200                                 chat_core(cmdr, 'cmd '+chan)
1201
1202                 m = rm('(\\w+) (?:issued an order|ordered everyone) "')
1203                 if m: return ob1('general order');
1204
1205                 m = rm('(\\w+) says, "')
1206                 if m: return chat('public')
1207
1208                 m = rm('(\\w+) tells ye, "')
1209                 if m: return chat('private')
1210
1211                 m = rm('Ye told (\\w+), "(.*)"$')
1212                 if m: return chat_metacmd('private')
1213
1214                 m = rm('(\\w+) flag officer chats, "')
1215                 if m: return chat('flag officer')
1216
1217                 m = rm('(\\w+) officer chats, "(.*)"$')
1218                 if m: return chat_metacmd('officer')
1219
1220                 m = rm('Ye accepted the offer to job with ')
1221                 if m: return disembark_me('jobbing')
1222
1223                 m = rm('Ye hop on the ferry and are whisked away ')
1224                 if m: return disembark_me('ferry')
1225
1226                 m = rm('Whisking away to yer home on the magical winds')
1227                 if m: return disembark_me('home')
1228
1229                 m = rm('Game over\\.  Winners: ([A-Za-z, ]+)\\.$')
1230                 if m:
1231                         pl = m.group(1).split(', ')
1232                         if not self._myself.name in pl:
1233                                 return d('lost melee')
1234                         for pn in pl:
1235                                 if ' ' in pn: continue
1236                                 ob_x(pn,'won melee')
1237                         return d('won melee')
1238
1239                 m = rm('(\\w+) is eliminated\\!')
1240                 if m: return ob1('eliminated in fray');
1241
1242                 m = rm('(\\w+) has driven \w+ from the ship\\!')
1243                 if m: return ob1('boarder repelled');
1244
1245                 m = rm('\w+ has bested (\\w+), and turns'+
1246                         ' to the rest of the ship\\.')
1247                 if m: return ob1('boarder unrepelled');
1248
1249                 pirate = rm_crew("(\\w+) has taken a job with '(.*)'\\.")
1250                 if pirate: return jb(pirate, 'ashore')
1251
1252                 pirate = rm_crew("(\\w+) has left '(.*)'\\.")
1253                 if pirate:
1254                         disembark(self._v, timestamp, pirate, 'left crew')
1255                         return d('left crew')
1256
1257                 m = rm('(\w+) has applied for the posted job\.')
1258                 if m: return jb(m.group(1), 'applied')
1259
1260                 pirate= rm_crew("(\\w+) has been invited to job for '(.*)'\\.")
1261                 if pirate: return jb(pirate, 'invited')
1262
1263                 pirate = rm_crew("(\\w+) declined the job offer for '(.*)'\\.")
1264                 if pirate: return jb(pirate, 'declined')
1265
1266                 m = rm('(\\w+) has left the vessel\.')
1267                 if m:
1268                         pirate = m.group(1)
1269                         disembark(self._v, timestamp, pirate, 'disembarked')
1270                         return d('disembarked')
1271
1272                 return d('not-matched')
1273
1274         def _str_pa(self, pn, pa):
1275                 assert self._pl[pn] == pa
1276                 s = ' '*20 + "%s %-*s %13s %-30s %13s %-20s %13s" % (
1277                         (' ','G')[pa.gunner],
1278                         max_pirate_namelen, pn,
1279                         pa.last_time, pa.last_event,
1280                         pa.last_chat_time, pa.last_chat_chan,
1281                         pa.jobber)
1282                 if pa.expires is not None:
1283                         s += " %-5d" % (pa.expires - pa.last_time)
1284                 s += "\n"
1285                 return s
1286
1287         def _str_vessel(self, vn, v):
1288                 s = ' vessel %s\n' % vn
1289                 s += ' '*20 + "%-*s   %13s\n" % (
1290                                 max_pirate_namelen, '#lastinfo',
1291                                 v['#lastinfo'])
1292                 assert v['#name'] == vn
1293                 for pn in sorted(v.keys()):
1294                         if pn.startswith('#'): continue
1295                         pa = v[pn]
1296                         assert pa.v == v
1297                         s += self._str_pa(pn,pa)
1298                 return s
1299
1300         def __str__(self):
1301                 s = '''<ChatLogTracker
1302  myself %s
1303  vessel %s
1304 '''                     % (self._myself.name, self._vessel)
1305                 assert ((self._v is None and self._vessel is None) or
1306                         (self._v is self._vl[self._vessel]))
1307                 if self._vessel is not None:
1308                         s += self._str_vessel(self._vessel, self._v)
1309                 for vn in sorted(self._vl.keys()):
1310                         if vn == self._vessel: continue
1311                         s += self._str_vessel(vn, self._vl[vn])
1312                 s += " elsewhere\n"
1313                 for p in self._pl:
1314                         pa = self._pl[p]
1315                         if pa.v is not None:
1316                                 assert pa.v[p] is pa
1317                                 assert pa.v in self._vl.values()
1318                         else:
1319                                 s += self._str_pa(pa.name, pa)
1320                 s += '>\n'
1321                 return s
1322
1323         def catchup(self, progress=None):
1324                 while True:
1325                         more = self._f.readline()
1326                         if not more: break
1327
1328                         self._progress[0] += len(more)
1329                         if progress: progress.progress(*self._progress)
1330
1331                         self._lbuf += more
1332                         if self._lbuf.endswith('\n'):
1333                                 self.chatline(self._lbuf.rstrip())
1334                                 self._lbuf = ''
1335                                 if opts.debug >= 2:
1336                                         debug(self.__str__())
1337                 self._expire_jobbers(time.time())
1338
1339                 if progress: progress.caughtup()
1340
1341         def changed(self):
1342                 rv = self._need_redisplay
1343                 self._need_redisplay = False
1344                 return rv
1345         def myname(self):
1346                 # returns our pirate name
1347                 return self._myself.name
1348         def vesselname(self):
1349                 # returns the vessel name we're aboard or None
1350                 return self._vessel
1351         def lastvesselname(self):
1352                 # returns the last vessel name we were aboard or None
1353                 return self._lastvessel
1354         def aboard(self, vesselname=True):
1355                 # returns a list of PirateAboard the vessel
1356                 #  sorted by pirate name
1357                 #  you can pass this None and you'll get []
1358                 #  or True for the current vessel (which is the default)
1359                 #  the returned value is a fresh list of persistent
1360                 #  PirateAboard objects
1361                 if vesselname is True: v = self._v
1362                 else: v = self._vl.get(vesselname.title())
1363                 if v is None: return []
1364                 return [ v[pn]
1365                          for pn in sorted(v.keys())
1366                          if not pn.startswith('#') ]
1367         def jobbers(self):
1368                 # returns a the jobbers' PirateAboards,
1369                 # sorted by jobber class and reverse of expiry time
1370                 l = [ pa
1371                       for pa in self._pl.values()
1372                       if pa.jobber is not None
1373                     ]
1374                 def compar_key(pa):
1375                         return (pa.jobber, -pa.expires)
1376                 l.sort(key = compar_key)
1377                 return l
1378
1379 #---------- implementations of actual operation modes ----------
1380
1381 def do_pirate(pirates, bu):
1382         print '{'
1383         for pirate in pirates:
1384                 info = PirateInfo(pirate)
1385                 print '%s: %s,' % (`pirate`, info)
1386         print '}'
1387
1388 def prep_crewflag_of(args, bu, max_age, selector, constructor):
1389         if len(args) != 1: bu('crew-of etc. take one pirate name')
1390         pi = PirateInfo(args[0], max_age)
1391         cf = selector(pi)
1392         if cf is None: return None
1393         return constructor(cf[0], max_age)
1394
1395 def prep_crew_of(args, bu, max_age=300):
1396         return prep_crewflag_of(args, bu, max_age,
1397                 (lambda pi: pi.crew), CrewInfo)
1398
1399 def prep_flag_of(args, bu, max_age=300):
1400         return prep_crewflag_of(args, bu, max_age,
1401                 (lambda pi: pi.flag), FlagInfo)
1402
1403 def do_crew_of(args, bu):
1404         ci = prep_crew_of(args, bu)
1405         print ci
1406
1407 def do_flag_of(args, bu):
1408         fi = prep_flag_of(args, bu)
1409         print fi
1410
1411 def do_standings_crew_of(args, bu):
1412         ci = prep_crew_of(args, bu, 60)
1413         tab = StandingsTable(sys.stdout)
1414         tab.headings()
1415         for (rank, members) in ci.crew:
1416                 if not members: continue
1417                 tab.literalline('')
1418                 tab.literalline('%s:' % rank)
1419                 for p in members:
1420                         pi = PirateInfo(p, random.randint(900,1800))
1421                         tab.pirate(pi)
1422
1423 def do_ocean(args, bu):
1424         if (len(args)): bu('ocean takes no further arguments')
1425         fetcher.default_ocean()
1426         oi = OceanInfo(IslandFlagInfo)
1427         print oi
1428         for islename in sorted(oi.islands.keys()):
1429                 isle = oi.islands[islename]
1430                 print isle
1431
1432 def do_embargoes(args, bu):
1433         if (len(args)): bu('ocean takes no further arguments')
1434         fetcher.default_ocean()
1435         oi = OceanInfo(IslandFlagInfo)
1436         wr = sys.stdout.write
1437         print ('EMBARGOES:  Island    | Owning flag'+
1438                 '                    | Embargoed flags')
1439
1440         def getflname(isle):
1441                 if isle.islandid is None: return 'uncolonisable'
1442                 if isle.flag is None: return 'uncolonised'
1443                 return isle.flag.name
1444
1445         progressreporter.stop()
1446
1447         for archname in sorted(oi.arches.keys()):
1448                 print 'ARCHIPELAGO: ',archname
1449                 for islename in sorted(oi.arches[archname].keys()):
1450                         isle = oi.islands[islename]
1451                         wr(' %-20s | ' % isle.name)
1452                         flname = getflname(isle)
1453                         wr('%-30s | ' % flname)
1454                         flag = isle.flag
1455                         if flag is None: print ''; continue
1456                         delim = ''
1457                         for rel in flag.relations:
1458                                 if rel.this_declaring >= 0: continue
1459                                 wr(delim)
1460                                 wr(rel.other_flagname)
1461                                 delim = '; '
1462                         print ''
1463
1464 def do_embargoes_flag_of(args, bu):
1465         progressreporter.doing('fetching flag info')
1466         fi = prep_flag_of(args, bu)
1467         if fi is None:
1468                 progressreporter.stop()
1469                 print 'Pirate is not in a flag.'
1470                 return
1471
1472         oi = OceanInfo(IslandFlagInfo)
1473
1474         progressreporter.stop()
1475         print ''
1476
1477         any = False
1478         for islename in sorted(oi.islands.keys()):
1479                 isle = oi.islands[islename]
1480                 flag = isle.flag
1481                 if flag is None: continue
1482                 for rel in flag.relations:
1483                         if rel.this_declaring >= 0: continue
1484                         if rel.other_flagid != fi.flagid: continue
1485                         if not any: print 'EMBARGOED:'
1486                         any = True
1487                         print "  %-30s (%s)" % (islename, flag.name)
1488         if not any:
1489                 print 'No embargoes.'
1490         print ''
1491
1492         war_flag(fi)
1493         print ''
1494
1495 def do_war_flag_of(args, bu):
1496         fi = prep_flag_of(args, bu)
1497         war_flag(fi)
1498
1499 def war_flag(fi):
1500         any = False
1501         for certain in [True, False]:
1502                 anythis = False
1503                 for rel in fi.relations:
1504                         if rel.this_declaring >= 0: continue
1505                         if (rel.other_declaring_max < 0) != certain: continue
1506                         if not anythis:
1507                                 if certain: m = 'SINKING PvP'
1508                                 else: m = 'RISK OF SINKING PvP'
1509                                 print '%s (%s):' % (m, rel.yoweb_heading)
1510                         anythis = True
1511                         any = True
1512                         print " ", rel.other_flagname
1513         if not any:
1514                 print 'No sinking PvP.'
1515
1516 #----- modes which use the chat log parser are quite complex -----
1517
1518 class ProgressPrintPercentage:
1519         def __init__(self, f=sys.stdout):
1520                 self._f = f
1521         def progress_string(self,done,total):
1522                 return "scan chat logs %3d%%\r" % ((done*100) / total)
1523         def progress(self,*a):
1524                 self._f.write(self.progress_string(*a))
1525                 self._f.flush()
1526         def show_init(self, pirate, ocean):
1527                 print >>self._f, 'Starting up, %s on the %s ocean' % (
1528                         pirate, ocean)
1529         def caughtup(self):
1530                 self._f.write('                   \r')
1531                 self._f.flush()
1532
1533 def prep_chat_log(args, bu,
1534                 progress=ProgressPrintPercentage(),
1535                 max_myself_age=3600):
1536         if len(args) != 1: bu('this action takes only chat log filename')
1537         logfn = args[0]
1538         logfn_re = '(?:.*/)?([A-Z][a-z]+)_([a-z]+)_'
1539         match = regexp.match(logfn_re, logfn)
1540         if not match: bu('chat log filename is not in expected format')
1541         (pirate, ocean) = match.groups()
1542         fetcher.default_ocean(ocean)
1543
1544         progress.show_init(pirate, fetcher.ocean)
1545         myself = PirateInfo(pirate,max_myself_age)
1546         track = ChatLogTracker(myself, logfn)
1547
1548         opts.debug -= 2
1549         track.catchup(progress)
1550         opts.debug += 2
1551
1552         track.force_redisplay()
1553
1554         return (myself, track)
1555
1556 def do_track_chat_log(args, bu):
1557         (myself, track) = prep_chat_log(args, bu)
1558         while True:
1559                 track.catchup()
1560                 if track.changed():
1561                         print track
1562                 sleep(0.5 + 0.5 * random.random())
1563
1564 #----- ship management aid -----
1565
1566 class Display_dumb(ProgressPrintPercentage):
1567         def __init__(self):
1568                 ProgressPrintPercentage.__init__(self)
1569         def show(self, s):
1570                 print '\n\n', s;
1571         def realstart(self):
1572                 pass
1573
1574 class Display_overwrite(ProgressPrintPercentage):
1575         def __init__(self):
1576                 ProgressPrintPercentage.__init__(self)
1577
1578                 null = file('/dev/null','w')
1579                 curses.setupterm(fd=null.fileno())
1580
1581                 self._clear = curses.tigetstr('clear')
1582                 if not self._clear:
1583                         self._debug('missing clear!')
1584                         self.show = Display_dumb.show
1585                         return
1586
1587                 self._t = {'el':'', 'ed':''}
1588                 if not self._init_sophisticated():
1589                         for k in self._t.keys(): self._t[k] = ''
1590                         self._t['ho'] = self._clear
1591
1592         def _debug(self,m): debug('display overwrite: '+m)
1593
1594         def _init_sophisticated(self):
1595                 for k in self._t.keys():
1596                         s = curses.tigetstr(k)
1597                         self._t[k] = s
1598                 self._t['ho'] = curses.tigetstr('ho')
1599                 if not self._t['ho']:
1600                         cup = curses.tigetstr('cup')
1601                         self._t['ho'] = curses.tparm(cup,0,0)
1602                 missing = [k for k in self._t.keys() if not self._t[k]]
1603                 if missing:
1604                         self.debug('missing '+(' '.join(missing)))
1605                         return 0
1606                 return 1
1607
1608         def show(self, s):
1609                 w = sys.stdout.write
1610                 def wti(k): w(self._t[k])
1611
1612                 wti('ho')
1613                 nl = ''
1614                 for l in s.rstrip().split('\n'):
1615                         w(nl)
1616                         w(l)
1617                         wti('el')
1618                         nl = '\r\n'
1619                 wti('ed')
1620                 w(' ')
1621                 sys.stdout.flush()
1622
1623         def realstart(self):
1624                 sys.stdout.write(self._clear)
1625                 sys.stdout.flush()
1626                         
1627
1628 def do_ship_aid(args, bu):
1629         if opts.ship_duty is None: opts.ship_duty = True
1630
1631         displayer = globals()['Display_'+opts.display]()
1632
1633         (myself, track) = prep_chat_log(args, bu, progress=displayer)
1634
1635         displayer.realstart()
1636
1637         if os.isatty(0): kr_create = KeystrokeReader
1638         else: kr_create = DummyKeystrokeReader
1639
1640         try:
1641                 kreader = kr_create(0, 10)
1642                 ship_aid_core(myself, track, displayer, kreader)
1643         finally:
1644                 kreader.stop()
1645                 print '\n'
1646
1647 class KeyBasedSorter:
1648         def compar_key_pa(self, pa):
1649                 pi = pa.pirate_info()
1650                 if pi is None: return None
1651                 return self.compar_key(pi)
1652         def lsort_pa(self, l):
1653                 l.sort(key = self.compar_key_pa)
1654
1655 class NameSorter(KeyBasedSorter):
1656         def compar_key(self, pi): return pi.name
1657         def desc(self): return 'name'
1658
1659 class SkillSorter(NameSorter):
1660         def __init__(self, relevant):
1661                 self._want = frozenset(relevant.split('/'))
1662                 self._avoid = set()
1663                 for p in core_duty_puzzles:
1664                         if isinstance(p,basestring): self._avoid.add(p)
1665                         else: self._avoid |= set(p)
1666                 self._avoid -= self._want
1667                 self._desc = '%s' % relevant
1668         
1669         def desc(self): return self._desc
1670
1671         def compar_key(self, pi):
1672                 best_want = max([
1673                         pi.standings.get(puz,-1)
1674                         for puz in self._want
1675                         ])
1676                 best_avoid = [
1677                         -pi.standings.get(puz,standing_limit)
1678                         for puz in self._avoid
1679                         ]
1680                 best_avoid.sort()
1681                 def negate(x): return -x
1682                 debug('compar_key %s bw=%s ba=%s' % (pi.name, `best_want`,
1683                         `best_avoid`))
1684                 return (-best_want, map(negate, best_avoid), pi.name)
1685
1686 def ship_aid_core(myself, track, displayer, kreader):
1687
1688         def find_vessel():
1689                 vn = track.vesselname()
1690                 if vn: return (vn, " on board the %s" % vn)
1691                 vn = track.lastvesselname()
1692                 if vn: return (vn, " ashore from the %s" % vn)
1693                 return (None, " not on a vessel")
1694
1695         def timeevent(t,e):
1696                 if t is None: return ' ' * 22
1697                 return " %-4s %-16s" % (format_time_interval(now - t),e)
1698
1699         displayer.show(track.myname() + find_vessel()[1] + '...')
1700
1701         rotate_nya = '/-\\'
1702
1703         sort = NameSorter()
1704         clicmd = None
1705         clierr = None
1706         cliexec = None
1707
1708         while True:
1709                 track.catchup()
1710                 now = time.time()
1711
1712                 (vn, vs) = find_vessel()
1713
1714                 s = ''
1715                 if cliexec is not None:
1716                         s += '...'
1717                 elif clierr is not None:
1718                         s += 'Error: '+clierr
1719                 elif clicmd is not None:
1720                         s += '/' + clicmd
1721                 else:
1722                         s = track.myname() + vs
1723                         s += " at %s" % time.strftime("%Y-%m-%d %H:%M:%S")
1724                         s += kreader.info()
1725                 s += '\n'
1726
1727                 tbl_s = StringIO()
1728                 tbl = StandingsTable(tbl_s)
1729
1730                 aboard = track.aboard(vn)
1731                 sort.lsort_pa(aboard)
1732
1733                 jobbers = track.jobbers()
1734
1735                 if track.vesselname(): howmany = 'aboard: %2d' % len(aboard)
1736                 else: howmany = ''
1737
1738                 tbl.headings(howmany, '  sorted by '+sort.desc())
1739
1740                 last_jobber = None
1741
1742                 for pa in aboard + jobbers:
1743                         if pa.jobber != last_jobber:
1744                                 last_jobber = pa.jobber
1745                                 tbl.literalline('')
1746                                 tbl.literalline('jobbers '+last_jobber)
1747
1748                         pi = pa.pirate_info()
1749
1750                         xs = ''
1751                         if pa.gunner: xs += 'G '
1752                         else: xs += '  '
1753                         xs += timeevent(pa.last_time, pa.last_event)
1754                         xs += timeevent(pa.last_chat_time, pa.last_chat_chan)
1755
1756                         if pi is None:
1757                                 tbl.pirate_dummy(pa.name, rotate_nya[0], xs)
1758                         else:
1759                                 tbl.pirate(pi, xs)
1760
1761                 s += tbl_s.getvalue()
1762                 displayer.show(s)
1763                 tbl_s.close()
1764
1765                 if cliexec is not None:
1766                         clierr = track.local_command("/"+cliexec.strip())
1767                         cliexec = None
1768                         continue
1769
1770                 k = kreader.getch()
1771                 if k is None:
1772                         rotate_nya = rotate_nya[1:3] + rotate_nya[0]
1773                         continue
1774
1775                 if clierr is not None:
1776                         clierr = None
1777                         continue
1778
1779                 if clicmd is not None:
1780                         if k == '\r' or k == '\n':
1781                                 cliexec = clicmd
1782                                 clicmd = clicmdbase
1783                         elif k == '\e' and clicmd != "":
1784                                 clicmd = clicmdbase
1785                         elif k == '\33':
1786                                 clicmd = None
1787                         elif k == '\b' or k == '\177':
1788                                 clicmd = clicmd[ 0 : len(clicmd)-1 ]
1789                         else:
1790                                 clicmd += k
1791                         continue
1792
1793                 if k == 'q': break
1794                 elif k == 'g': sort = SkillSorter('Gunning')
1795                 elif k == 'c': sort = SkillSorter('Carpentry')
1796                 elif k == 's': sort = SkillSorter('Sailing/Rigging')
1797                 elif k == 'b': sort = SkillSorter('Bilging')
1798                 elif k == 'n': sort = SkillSorter('Navigating')
1799                 elif k == 'd': sort = SkillSorter('Battle Navigation')
1800                 elif k == 't': sort = SkillSorter('Treasure Haul')
1801                 elif k == 'a': sort = NameSorter()
1802                 elif k == '/': clicmdbase = ""; clicmd = clicmdbase
1803                 elif k == '+': clicmdbase = "a "; clicmd = clicmdbase
1804                 else: pass # unknown key command
1805
1806 #---------- individual keystroke input ----------
1807
1808 class DummyKeystrokeReader:
1809         def __init__(self,fd,timeout_dummy): pass
1810         def stop(self): pass
1811         def getch(self): sleep(1); return None
1812         def info(self): return ' [noninteractive]'
1813
1814 class KeystrokeReader(DummyKeystrokeReader):
1815         def __init__(self, fd, timeout_decisec=0):
1816                 self._fd = fd
1817                 self._saved = termios.tcgetattr(fd)
1818                 a = termios.tcgetattr(fd)
1819                 a[3] &= ~(termios.ECHO | termios.ECHONL |
1820                           termios.ICANON | termios.IEXTEN)
1821                 a[6][termios.VMIN] = 0
1822                 a[6][termios.VTIME] = timeout_decisec
1823                 termios.tcsetattr(fd, termios.TCSANOW, a)
1824         def stop(self):
1825                 termios.tcsetattr(self._fd, termios.TCSANOW, self._saved)
1826         def getch(self):
1827                 debug_flush()
1828                 byte = os.read(self._fd, 1)
1829                 if not len(byte): return None
1830                 return byte
1831         def info(self):
1832                 return ''
1833
1834 #---------- main program ----------
1835
1836 def main():
1837         global opts, fetcher, yppedia, progressreporter
1838
1839         pa = OptionParser(
1840 '''usage: .../yoweb-scrape [OPTION...] ACTION [ARGS...]
1841 actions:
1842  yoweb-scrape [--ocean OCEAN ...] pirate PIRATE
1843  yoweb-scrape [--ocean OCEAN ...] crew-of PIRATE
1844  yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
1845  yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
1846  yoweb-scrape [--ocean OCEAN ...] ocean|embargoes
1847  yoweb-scrape [--ocean OCEAN ...] war-flag-of|embargoes-flag-of PIRATE
1848  yoweb-scrape [options] ship-aid CHAT-LOG  (must be .../PIRATE_OCEAN_chat-log*)
1849
1850 display modes (for --display) apply to ship-aid:
1851  --display=dumb       just print new information, scrolling the screen
1852  --display=overwrite  use cursor motion, selective clear, etc. to redraw at top''')
1853         ao = pa.add_option
1854         ao('-O','--ocean',dest='ocean', metavar='OCEAN', default=None,
1855                 help='select ocean OCEAN')
1856         ao('--cache-dir', dest='cache_dir', metavar='DIR',
1857                 default='~/.yoweb-scrape-cache',
1858                 help='cache yoweb pages in DIR')
1859         ao('-D','--debug', action='count', dest='debug', default=0,
1860                 help='enable debugging output')
1861         ao('--debug-fd', type='int', dest='debug_fd',
1862                 help='write any debugging output to specified fd')
1863         ao('-q','--quiet', action='store_true', dest='quiet',
1864                 help='suppress warning output')
1865         ao('--display', action='store', dest='display',
1866                 type='choice', choices=['dumb','overwrite'],
1867                 help='how to display ship aid')
1868         ao('--local-ypp-dir', action='store', dest='localhtml',
1869                 help='get yppedia pages from local directory LOCALHTML'+
1870                         ' instead of via HTTP')
1871
1872         ao_jt = lambda wh, t: ao(
1873                 '--timeout-sa-'+wh, action='store', dest='timeout_'+wh,
1874                 default=t, help=('set timeout for expiring %s jobbers' % wh))
1875         ao_jt('applied',      120)
1876         ao_jt('invited',      120)
1877         ao_jt('declined',      30)
1878         ao_jt('ashore',      1800)
1879
1880         ao('--ship-duty', action='store_true', dest='ship_duty',
1881                 help='show ship duty station puzzles')
1882         ao('--all-puzzles', action='store_false', dest='ship_duty',
1883                 help='show all puzzles, not just ship duty stations')
1884
1885         ao('--min-cache-reuse', type='int', dest='min_max_age',
1886                 metavar='SECONDS', default=60,
1887                 help='always reuse cache yoweb data if no older than this')
1888
1889         (opts,args) = pa.parse_args()
1890         random.seed()
1891
1892         if len(args) < 1:
1893                 print >>sys.stderr, copyright_info
1894                 pa.error('need a mode argument')
1895
1896         if opts.debug_fd is not None:
1897                 opts.debug_file = os.fdopen(opts.debug_fd, 'w')
1898         else:
1899                 opts.debug_file = sys.stdout
1900
1901         mode = args[0]
1902         mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
1903         try: mode_fn = globals()[mode_fn_name]
1904         except KeyError: pa.error('unknown mode "%s"' % mode)
1905
1906         # fixed parameters
1907         opts.expire_age = max(3600, opts.min_max_age)
1908
1909         opts.ship_reboard_clearout = 3600
1910
1911         if opts.cache_dir.startswith('~/'):
1912                 opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
1913
1914         if opts.display is None:
1915                 if ((opts.debug > 0 and opts.debug_fd is None)
1916                     or not os.isatty(sys.stdout.fileno())):
1917                         opts.display = 'dumb'
1918                 else:
1919                         opts.display = 'overwrite'
1920
1921         fetcher = Yoweb(opts.ocean, opts.cache_dir)
1922         yppedia = Yppedia(opts.cache_dir)
1923
1924         if opts.debug or not os.isatty(0):
1925                  progressreporter = NullProgressReporter()
1926         else:
1927                 progressreporter = TypewriterProgressReporter()
1928
1929         mode_fn(args[1:], pa.error)
1930
1931 main()