chiark / gitweb /
More robust way to find arch/islands table
authorIan Jackson <ijackson@chiark.greenend.org.uk>
Tue, 1 Sep 2009 15:57:51 +0000 (16:57 +0100)
committerIan Jackson <Ian.Jackson@eu.citrix.com>
Tue, 1 Sep 2009 15:57:51 +0000 (16:57 +0100)
yarrg/yppedia-ocean-scraper

index ad35c4d..a854035 100755 (executable)
@@ -95,8 +95,7 @@ def parse_chart():
        return s
 
 def parse_ocean():
-       firstarch = soup.find('a', attrs = {'title': title_arch_ok})
-       debug('fa',firstarch)
+       content = soup.find('div', attrs = {'id': 'content'})
 
        def findall_title_arch_ok(t):
                return t.findAll('a', attrs = {'title': title_arch_ok})
@@ -105,7 +104,7 @@ def parse_ocean():
                if u.name != 'table': return False
                return len(findall_title_arch_ok(u)) > 1
 
-       archestable = firstarch.findParent('table', attrs={'border':'1'})
+       archestable = content.findChild('table', attrs={'border':'1'})
        debug('at',archestable)
 
        archsoups = []