WIP fixes and also do entity conversion

author Ian Jackson <ian@liberator.relativity.greenend.org.uk>

Fri, 15 May 2009 18:24:37 +0000 (19:24 +0100)

committer Ian Jackson <ian@liberator.relativity.greenend.org.uk>

Fri, 15 May 2009 18:24:37 +0000 (19:24 +0100)
author Ian Jackson <ian@liberator.relativity.greenend.org.uk>
Fri, 15 May 2009 18:24:37 +0000 (19:24 +0100)
committer Ian Jackson <ian@liberator.relativity.greenend.org.uk>
Fri, 15 May 2009 18:24:37 +0000 (19:24 +0100)
diff --git a/yoweb-scrape b/yoweb-scrape

index 0866667d7dd6982d20548f1393875e7bf282f8ef..4c7cee50db2024c42735f1259d715d88b74af855 100755 (executable)
--- a/yoweb-scrape
+++ b/yoweb-scrape
@@ -129,7 +129,7 @@ class PirateInfo(SoupLog):
                 imgs = self.soup.findAll('img',
                         src=regexp.compile('/yoweb/images/stat.*'))
                 re = regexp.compile(
-u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide\\&nbsp\\;([-A-Za-z]+)\\)\\s*$'
+u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide(?:\\s|\\xa0)+([-A-Za-z]+)\\)\\s*$'
                         )
                 standings = { }
  
@@ -157,8 +157,8 @@ u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide\\&nbsp\\;([-A-Za-z
                         valstr = ''.join(valelem.findAll(text=True))
                         match = re.match(valstr)
                         if match is None:
-                               skl.soupm(key, 'duty "%s" unparseable'+
-                                       ' standing "%s"' % (duty, valstr))
+                               skl.soupm(key, ('duty "%s" unparseable'+
+                                       ' standing "%s"') % (duty, valstr))
                                 continue
                         standing = match.group(match.lastindex)
                         standings[duty].append(standing)
@@ -208,7 +208,7 @@ u'\\s*\\S*/([-A-Za-z]+)\\s*$|\\s*\\S*/\\S*\\s*\\(ocean\\-wide\\&nbsp\\;([-A-Za-z
  
                 html = fetcher.yoweb('pirate.wm?target=', pirate)
                 self.soup = BeautifulSoup(html,
-#                      convertEntities=BeautifulSoup.HTML_ENTITIES
+                       convertEntities=BeautifulSoup.HTML_ENTITIES
                         )
  
                 self._find_standings()
author	Ian Jackson <ian@liberator.relativity.greenend.org.uk>
	Fri, 15 May 2009 18:24:37 +0000 (19:24 +0100)
committer	Ian Jackson <ian@liberator.relativity.greenend.org.uk>
	Fri, 15 May 2009 18:24:37 +0000 (19:24 +0100)