X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-test.git;a=blobdiff_plain;f=yarrg%2Fyppedia-ocean-scraper;fp=yarrg%2Fyppedia-ocean-scraper;h=9105f1963998d96943c08237ad9a36f84deb5d85;hp=401de9bd6925fe60b904aa5f09b2791f99fc76a7;hb=7078fe9eae28c76913e5c14946f9dbf94cfb3458;hpb=cf9d0277c5d6bfe900a0881a00984f8268a90514 diff --git a/yarrg/yppedia-ocean-scraper b/yarrg/yppedia-ocean-scraper index 401de9b..9105f19 100755 --- a/yarrg/yppedia-ocean-scraper +++ b/yarrg/yppedia-ocean-scraper @@ -61,7 +61,7 @@ def fetch(): soup = BeautifulSoup(dataf) -title_arch_re = regexp.compile('(\\S+) Archipelago \\((\\S+)\\)$') +title_arch_re = regexp.compile('(\\S.*\\S) Archipelago \\((\\S+)\\)$') title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)$') href_img_re = regexp.compile('\\.png$') @@ -107,8 +107,8 @@ def parse(): debug('links',links) if not links: continue (a,o) = title_arch_info(links[0]['title']) + debug('arch-ocean', (a,o)) assert(o == ocean) - debug('arch', a) assert(a not in arches) isles = [] for link in links[1:]: