standingvals = ('Able/Distinguished/Respected/Master/Renowned'+
'/Grand-Master/Legendary/Ultimate').split('/')
+pirate_ref_re = regexp.compile('^/yoweb/pirate\\.wm')
+
def debug(m):
if opts.debug:
print >>sys.stderr, m
try: os.mkdir(cachedir)
except (OSError,IOError), oe:
if oe.errno != errno.EEXIST: raise
+ self._cache_scan(time.time())
- def _rate_limit_cache_clean(self, now):
+ def _cache_scan(self, now):
+ # returns list of ages, unsorted
ages = []
- for path in os.listdir(self.cachedir):
- if not path.startswith('#'): continue
+ debug('Fetcher scan_cache')
+ for leaf in os.listdir(self.cachedir):
+ if not leaf.startswith('#'): continue
+ path = self.cachedir + '/' + leaf
try: s = os.stat(path)
except (OSError,IOError), oe:
if oe.errno != errno.ENOENT: raise
continue
age = now - s.st_mtime
if age > opts.max_age:
- debug('Fetcher expire %d %s' % (age, path))
+ debug('Fetcher expire %d %s' % (age, path))
try: os.remove(path)
except (OSError,IOError), oe:
if oe.errno != errno.ENOENT: raise
continue
ages.append(age)
+ return ages
+
+ def _rate_limit_cache_clean(self, now):
+ ages = self._cache_scan(now)
ages.sort()
- debug('Fetcher ages ' + `ages`)
+ debug('Fetcher ages ' + `ages`)
min_age = 1
need_wait = 0
for age in ages:
if age < min_age:
- debug('Fetcher morewait min=%d age=%d' %
+ debug('Fetcher morewait min=%d age=%d' %
(min_age, age))
- need_wait = max(need_wait, age - min_age)
+ need_wait = max(need_wait, min_age - age)
min_age *= 2
min_age += 1
- if need_wait:
- debug('Fetcher wait %d' % need_wait)
- os.sleep(need_wait)
+ if need_wait > 0:
+ debug('Fetcher wait %d' % need_wait)
+ time.sleep(need_wait)
def fetch(self, url):
debug('Fetcher fetch %s' % url)
self.msgs += child_souplog.msgs
child_souplog.msgs = [ ]
-class PirateInfo(SoupLog):
+def soup_text(obj):
+ str = ''.join(obj.findAll(text=True))
+ return str.strip()
+
+class SomethingSoupInfo(SoupLog):
+ def __init__(self, kind, tail):
+ SoupLog.__init__(self)
+ html = fetcher.yoweb(kind, tail)
+ self.soup = BeautifulSoup(html,
+ convertEntities=BeautifulSoup.HTML_ENTITIES
+ )
+
+class PirateInfo(SomethingSoupInfo):
# Public data members:
# pi.standings = { 'Treasure Haul': 'Able' ... }
# pi.crew = (id, name)
# pi.flag = (id, name)
# pi.msgs = [ 'message describing problem with scrape' ]
+
+ def __init__(self, pirate):
+ SomethingSoupInfo.__init__(self,
+ 'pirate.wm?target=', pirate)
+ self._find_standings()
+ self.crew = self._find_crewflag('crew',
+ '^/yoweb/crew/info\\.wm')
+ self.flag = self._find_crewflag('flag',
+ '^/yoweb/flag/info\\.wm')
def _find_standings(self):
imgs = self.soup.findAll('img',
skl.soupm(key, 'duty missing sibling "%s"'
% duty)
continue
- valstr = ''.join(valelem.findAll(text=True))
+ valstr = soup_text(valelem)
match = re.match(valstr)
if match is None:
skl.soupm(key, ('duty "%s" unparseable'+
self.soupm(thing, ('incomprehensible %s id ref'+
' (%s in %s)') % (cf, id_re, id_haystack))
return None
- name = ''.join(thing.findAll(text=True))
+ name = soup_text(thing)
return (match.group(1), name)
-
- def __init__(self, pirate):
- SoupLog.__init__(self)
- html = fetcher.yoweb('pirate.wm?target=', pirate)
- self.soup = BeautifulSoup(html,
- convertEntities=BeautifulSoup.HTML_ENTITIES
- )
+ def __str__(self):
+ return `(self.crew, self.flag, self.standings, self.msgs)`
- self._find_standings()
+class CrewInfo(SomethingSoupInfo):
+ # Public data members:
+ # ci.crew = [ ('Captain', ['Pirate', ...]),
+ # ('Senior Officer', [...]),
+ # ... ]
+ # pi.msgs = [ 'message describing problem with scrape' ]
- self.crew = self._find_crewflag('crew',
- '^/yoweb/crew/info\\.wm')
- self.flag = self._find_crewflag('flag',
- '^/yoweb/flag/info\\.wm')
+ def __init__(self, crewid):
+ SomethingSoupInfo.__init__(self,
+ 'crew/info.wm?crewid=', crewid)
+ self._find_crew()
+
+ def _find_crew(self):
+ self.crew = []
+ capts = self.soup.findAll('img',
+ src='/yoweb/images/crew-captain.png')
+ if len(capts) != 1:
+ self.msg('crew members: no. of captain images != 1')
+ return
+ tbl = capts[0]
+ while not tbl.find('a', href=pirate_ref_re):
+ tbl = tbl.findParent('table')
+ if not tbl:
+ self.msg('crew members: cannot find table')
+ return
+ current_rank_crew = None
+ crew_rank_re = regexp.compile('/yoweb/images/crew')
+ for row in tbl.contents:
+ # findAll(recurse=False)
+ if isinstance(row, unicode):
+ continue
+
+ is_rank = row.find('img', attrs={'src': crew_rank_re})
+ if is_rank:
+ rank = soup_text(row)
+ current_rank_crew = []
+ self.crew.append((rank, current_rank_crew))
+ continue
+ for cell in row.findAll('a', href=pirate_ref_re):
+ if current_rank_crew is None:
+ self.soupm(cell, 'crew members: crew'
+ ' before rank')
+ continue
+ current_rank_crew.append(soup_text(cell))
def __str__(self):
- return `(self.crew, self.flag, self.standings, self.msgs)`
+ return `(self.crew, self.msgs)`
+
+def do_pirate(pirates, bu):
+ print '{'
+ for pirate in pirates:
+ info = PirateInfo(pirate)
+ print '%s: %s,' % (`pirate`, info)
+ print '}'
+
+def prep_crew_of(args, bu):
+ if len(args) != 1: bu('crew-of takes one pirate name')
+ pi = PirateInfo(args[0])
+ return CrewInfo(pi.crew[0])
+
+def do_crew_of(args, bu):
+ ci = prep_crew_of(args, bu)
+ print ci
+
+#def do_dutytab_crew_of(pirates, badusage):
+# if len(pirates) != 1: badusage('dutytab-crew-of takes one pirate name')
def main():
global opts, fetcher
help='suppress warning output')
(opts,args) = pa.parse_args()
+ if len(args) < 1:
+ pa.error('need a mode argument')
+
+ mode = args[0]
+ mode_fn_name = 'do_' + mode.replace('_','#').replace('-','_')
+ try: mode_fn = globals()[mode_fn_name]
+ except KeyError: pa.error('unknown mode "%s"' % mode)
+
# fixed parameters
opts.max_age = 240
+
if opts.cache_dir.startswith('~/'):
opts.cache_dir = os.getenv('HOME') + opts.cache_dir[1:]
fetcher = Fetcher(opts.ocean, opts.cache_dir)
- # test program:
- test = PirateInfo('Anaplian')
- print test
+ mode_fn(args[1:], pa.error)
main()