X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?a=blobdiff_plain;ds=sidebyside;f=yoweb-scrape;h=717b539131d8ffb0e4ba6abba5f6cddc3b38b199;hb=56930e8be13d91872c740164e9eb632d5477a455;hp=f9976b8341a14bf96aa5d851a7691c8b13df0ca2;hpb=d3bdf1a84d868f1eb2127cbea55d56f2fed8ecbd;p=ypp-sc-tools.db-live.git
diff --git a/yoweb-scrape b/yoweb-scrape
index f9976b8..717b539 100755
--- a/yoweb-scrape
+++ b/yoweb-scrape
@@ -45,6 +45,7 @@ import curses
import termios
import random
import subprocess
+import copy
from optparse import OptionParser
from StringIO import StringIO
@@ -107,6 +108,18 @@ def yppsc_dir():
os.environ["YPPSC_YARRG_SRCBASE"] = lib
return lib
+soup_massage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
+soup_massage.append(
+ (regexp.compile('(\
0:
need_wait += random.random() - 0.5
return need_wait
- def _rate_limit_cache_clean(self, now):
- need_wait = self.need_wait(now)
+ def _rate_limit_cache_clean(self, now, next_url=None):
+ need_wait = self.need_wait(now, next_url=next_url)
if need_wait > 0:
- debug('Fetcher wait %d' % need_wait)
+ debug('Fetcher wait %f' % need_wait)
sleep(need_wait)
def fetch(self, url, max_age):
@@ -186,7 +210,7 @@ class Fetcher:
return data
debug('Fetcher fetch')
- self._rate_limit_cache_clean(now)
+ self._rate_limit_cache_clean(now, next_url=url)
stream = urllib2.urlopen(url)
data = stream.read()
@@ -252,9 +276,7 @@ class SomethingSoupInfo(SoupLog):
def __init__(self, kind, tail, max_age):
SoupLog.__init__(self)
html = fetcher.yoweb(kind, tail, max_age)
- self._soup = BeautifulSoup(html,
- convertEntities=BeautifulSoup.HTML_ENTITIES
- )
+ self._soup = make_soup(html)
#---------- scraper for pirate pages ----------
@@ -409,18 +431,31 @@ class CrewInfo(SomethingSoupInfo):
def __str__(self):
return `(self.crew, self.msgs)`
+class FlagRelation():
+ # Public data members (put there by hand by creater)
+ # other_flagname
+ # other_flagid
+ # yoweb_heading
+ # this_declaring
+ # other_declaring_min
+ # other_declaring_max
+ # where {this,other}_declaring{,_min,_max} are:
+ # -1 {this,other} is declaring war
+ # 0 {this,other} is not doing either
+ # +1 {this,other} is allying
+ def __repr__(self):
+ return '' % (
+ self.yoweb_heading, self.this_declaring,
+ self.other_declaring_min, self.other_declaring_max,
+ self.other_flagname, self.other_flagid)
+
class FlagInfo(SomethingSoupInfo):
# Public data members (after init):
#
# flagid
# name # string
#
- # relations[n] = (otherflagname, otherflagid, [stringfromyoweb],
- # thisdeclaring, otherdeclaringmin, otherdeclaringmax)
- # # where {this,other}declaring{,min,max} are:
- # # -1 {this,other} is declaring war
- # # 0 {this,other} is not doing either
- # # +1 {this,other} is allying
+ # relations[n] = FlagRelation
# relation_byname[otherflagname] = relations[some_n]
# relation_byid[otherflagname] = relations[some_n]
#
@@ -472,7 +507,13 @@ class FlagInfo(SomethingSoupInfo):
if rel: return 'flag id twice!'
if flagname in self.relation_byname:
return 'flag name twice!'
- rel = (flagname,flagid,head, thisdecl,othermin,othermax)
+ rel = FlagRelation()
+ rel.other_flagname = flagname
+ rel.other_flagid = flagid
+ rel.yoweb_heading = head
+ rel.this_declaring = thisdecl
+ rel.other_declaring_min = othermin
+ rel.other_declaring_max = othermax
self.relations.append(rel)
self.relation_byid[flagid] = rel
self.relation_byname[flagid] = rel
@@ -558,7 +599,7 @@ class IslandExtendedInfo(IslandBasicInfo):
self.islandid = None
self.yoweb_url = None
- soup = BeautifulSoup(self.yppedia())
+ soup = make_soup(self.yppedia())
content = soup.find('div', attrs = {'id': 'content'})
yoweb_re = regexp.compile('^http://\w+\.puzzlepirates\.com/'+
'yoweb/island/info\.wm\?islandid=(\d+)$')
@@ -579,7 +620,7 @@ class IslandExtendedInfo(IslandBasicInfo):
debug('IEI COLLECT FLAGID '+`self.name`+' URL '+`yo`)
if yo is None: return None
dataf = fetcher.fetch(yo, 1800)
- soup = BeautifulSoup(dataf)
+ soup = make_soup(dataf)
ruler_re = regexp.compile(
'/yoweb/flag/info\.wm\?flagid=(\d+)$')
ruler = soup.find('a', attrs = { 'href': ruler_re })
@@ -1376,6 +1417,7 @@ def do_crew_of(args, bu):
def do_flag_of(args, bu):
fi = prep_flag_of(args, bu)
+ print fi
def do_standings_crew_of(args, bu):
ci = prep_crew_of(args, bu, 60)
@@ -1424,34 +1466,63 @@ def do_embargoes(args, bu):
if flag is None: print ''; continue
delim = ''
for rel in flag.relations:
- (oname, oid, dummy, thisdeclaring,
- odeclaringmin,odeclaringmax) = rel
- if thisdeclaring >= 0: continue
+ if rel.this_declaring >= 0: continue
wr(delim)
- wr(oname)
+ wr(rel.other_flagname)
delim = '; '
print ''
def do_embargoes_flag_of(args, bu):
progressreporter.doing('fetching flag info')
fi = prep_flag_of(args, bu)
+ if fi is None:
+ progressreporter.stop()
+ print 'Pirate is not in a flag.'
+ return
+
oi = OceanInfo(IslandFlagInfo)
+
progressreporter.stop()
+ print ''
+
any = False
for islename in sorted(oi.islands.keys()):
isle = oi.islands[islename]
flag = isle.flag
if flag is None: continue
for rel in flag.relations:
- (oname, oid, dummy, thisdeclaring,
- odeclaringmin,odeclaringmax) = rel
- if thisdeclaring >= 0: continue
- if oid != fi.flagid: continue
+ if rel.this_declaring >= 0: continue
+ if rel.other_flagid != fi.flagid: continue
if not any: print 'EMBARGOED:'
any = True
- print " %-30s (%s)" % (islename, flag.name)
+ print " %-30s (%s)" % (islename, flag.name)
if not any:
print 'No embargoes.'
+ print ''
+
+ war_flag(fi)
+ print ''
+
+def do_war_flag_of(args, bu):
+ fi = prep_flag_of(args, bu)
+ war_flag(fi)
+
+def war_flag(fi):
+ any = False
+ for certain in [True, False]:
+ anythis = False
+ for rel in fi.relations:
+ if rel.this_declaring >= 0: continue
+ if (rel.other_declaring_max < 0) != certain: continue
+ if not anythis:
+ if certain: m = 'SINKING PvP'
+ else: m = 'RISK OF SINKING PvP'
+ print '%s (%s):' % (m, rel.yoweb_heading)
+ anythis = True
+ any = True
+ print " ", rel.other_flagname
+ if not any:
+ print 'No sinking PvP.'
#----- modes which use the chat log parser are quite complex -----
@@ -1784,7 +1855,7 @@ actions:
yoweb-scrape [--ocean OCEAN ...] standings-crew-of PIRATE
yoweb-scrape [--ocean OCEAN ...] track-chat-log CHAT-LOG
yoweb-scrape [--ocean OCEAN ...] ocean|embargoes
- yoweb-scrape [--ocean OCEAN ...] embargoes-flag-of PIRATE
+ yoweb-scrape [--ocean OCEAN ...] war-flag-of|embargoes-flag-of PIRATE
yoweb-scrape [options] ship-aid CHAT-LOG (must be .../PIRATE_OCEAN_chat-log*)
display modes (for --display) apply to ship-aid: