X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.main.git;a=blobdiff_plain;f=yarrg%2Fyppedia-ocean-scraper;fp=yarrg%2Fyppedia-ocean-scraper;h=5e5e0901f563b097b6554b891e04cf184f89b45a;hp=ba145eafcc247b6bae90d13244b159d62570928d;hb=2be5ac9a8dd27cc27482d369360352d1620963e0;hpb=deb63b9ee1679d519cd8817009da3587ac7b361c diff --git a/yarrg/yppedia-ocean-scraper b/yarrg/yppedia-ocean-scraper index ba145ea..5e5e090 100755 --- a/yarrg/yppedia-ocean-scraper +++ b/yarrg/yppedia-ocean-scraper @@ -99,7 +99,7 @@ def fetch(): soup = BeautifulSoup(dataf) title_arch_re = regexp.compile('(\\S.*\\S) Archipelago \\((\\S+)\\)$') -title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)$') +title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)(?: \(page does not exist\))?$') href_img_re = regexp.compile('\\.png$') def title_arch_info(t):