From: Ian Jackson Date: Fri, 13 Jan 2012 00:56:37 +0000 (+0000) Subject: yarrg database: when scraping yppedia charts, allow links to islands whose pages... X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-test.git;a=commitdiff_plain;h=2be5ac9a8dd27cc27482d369360352d1620963e0 yarrg database: when scraping yppedia charts, allow links to islands whose pages do not yet exist --- diff --git a/yarrg/yppedia-ocean-scraper b/yarrg/yppedia-ocean-scraper index ba145ea..5e5e090 100755 --- a/yarrg/yppedia-ocean-scraper +++ b/yarrg/yppedia-ocean-scraper @@ -99,7 +99,7 @@ def fetch(): soup = BeautifulSoup(dataf) title_arch_re = regexp.compile('(\\S.*\\S) Archipelago \\((\\S+)\\)$') -title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)$') +title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)(?: \(page does not exist\))?$') href_img_re = regexp.compile('\\.png$') def title_arch_info(t):