From ccf8df182d4d7b5258b87ed6907920f738ff32d2 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 15 May 2009 19:24:37 +0100 Subject: [PATCH] WIP fixes and also do entity conversion --- yoweb-scrape | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yoweb-scrape b/yoweb-scrape index 0866667..4c7cee5 100755 --- a/yoweb-scrape +++ b/yoweb-scrape @@ -129,7 +129,7 @@ class PirateInfo(SoupLog): imgs = self.soup.findAll('img', src=regexp.compile('/yoweb/images/stat.*')) re = regexp.compile( -u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide\\ \\;([-A-Za-z]+)\\)\\s*$' +u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$' ) standings = { } @@ -157,8 +157,8 @@ u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide\\ \\;([-A-Za-z valstr = ''.join(valelem.findAll(text=True)) match = re.match(valstr) if match is None: - skl.soupm(key, 'duty "%s" unparseable'+ - ' standing "%s"' % (duty, valstr)) + skl.soupm(key, ('duty "%s" unparseable'+ + ' standing "%s"') % (duty, valstr)) continue standing = match.group(match.lastindex) standings[duty].append(standing) @@ -208,7 +208,7 @@ u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide\\ \\;([-A-Za-z html = fetcher.yoweb('pirate.wm?target=', pirate) self.soup = BeautifulSoup(html, -# convertEntities=BeautifulSoup.HTML_ENTITIES + convertEntities=BeautifulSoup.HTML_ENTITIES ) self._find_standings() -- 2.30.2