From cf91713ebb03e007c03dc70867c73db3a1a754a9 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Mon, 31 Aug 2009 16:21:13 +0100 Subject: [PATCH] WIP ocean scraper seems to mostly work, need to do arg parsing and IO --- yarrg/yppedia-ocean-scraper | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/yarrg/yppedia-ocean-scraper b/yarrg/yppedia-ocean-scraper index 104a408..d53e236 100755 --- a/yarrg/yppedia-ocean-scraper +++ b/yarrg/yppedia-ocean-scraper @@ -54,22 +54,23 @@ def parse(): if u.name != 'table': return False return len(findall_title_arch_ok(u)) > 1 - archestable = firstarch.findParent(is_archestable) + archestable = firstarch.findParent('table', attrs={'border':'1'}) debug('at',archestable) - arches = findall_title_arch_ok(archestable) + arches = [] + for row in archestable.findAll('tr',recursive=False): + arches += row.findAll('td',recursive=False) debug('ac',arches) def is_island(v): return len(v.findAll(text = regexp.compile('.*Large'))) > 0 def arch_up_map(u): return u.findParent(is_island) - arches = map(arch_up_map, arches) - debug('ac2',arches) for arch in arches: links = arch.findAll('a', href=True) debug('links',links) + if not links: continue (a,o) = title_arch_info(links[0]['title']) assert(o == ocean) print 'arch', a -- 2.30.2