chiark / gitweb /
yarrg database: when scraping yppedia charts, allow links to islands whose pages...
[ypp-sc-tools.db-test.git] / yarrg / yppedia-ocean-scraper
index ba145eafcc247b6bae90d13244b159d62570928d..5e5e0901f563b097b6554b891e04cf184f89b45a 100755 (executable)
@@ -99,7 +99,7 @@ def fetch():
        soup = BeautifulSoup(dataf)
 
 title_arch_re = regexp.compile('(\\S.*\\S) Archipelago \\((\\S+)\\)$')
-title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)$')
+title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)(?: \(page does not exist\))?$')
 href_img_re = regexp.compile('\\.png$')
 
 def title_arch_info(t):