From: Ian Jackson Date: Mon, 31 Aug 2009 15:36:42 +0000 (+0100) Subject: Cope with spaces in arch names as found on Hunter X-Git-Tag: 3.4~59 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-live.git;a=commitdiff_plain;h=7078fe9eae28c76913e5c14946f9dbf94cfb3458 Cope with spaces in arch names as found on Hunter --- diff --git a/yarrg/yppedia-ocean-scraper b/yarrg/yppedia-ocean-scraper index 401de9b..9105f19 100755 --- a/yarrg/yppedia-ocean-scraper +++ b/yarrg/yppedia-ocean-scraper @@ -61,7 +61,7 @@ def fetch(): soup = BeautifulSoup(dataf) -title_arch_re = regexp.compile('(\\S+) Archipelago \\((\\S+)\\)$') +title_arch_re = regexp.compile('(\\S.*\\S) Archipelago \\((\\S+)\\)$') title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)$') href_img_re = regexp.compile('\\.png$') @@ -107,8 +107,8 @@ def parse(): debug('links',links) if not links: continue (a,o) = title_arch_info(links[0]['title']) + debug('arch-ocean', (a,o)) assert(o == ocean) - debug('arch', a) assert(a not in arches) isles = [] for link in links[1:]: