chiark / gitweb /
Cope with spaces in arch names as found on Hunter
[ypp-sc-tools.db-test.git] / yarrg / yppedia-ocean-scraper
index 401de9bd6925fe60b904aa5f09b2791f99fc76a7..9105f1963998d96943c08237ad9a36f84deb5d85 100755 (executable)
@@ -61,7 +61,7 @@ def fetch():
        soup = BeautifulSoup(dataf)
 
 
-title_arch_re = regexp.compile('(\\S+) Archipelago \\((\\S+)\\)$')
+title_arch_re = regexp.compile('(\\S.*\\S) Archipelago \\((\\S+)\\)$')
 title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)$')
 href_img_re = regexp.compile('\\.png$')
 
@@ -107,8 +107,8 @@ def parse():
                debug('links',links)
                if not links: continue
                (a,o) = title_arch_info(links[0]['title'])
+               debug('arch-ocean', (a,o))
                assert(o == ocean)
-               debug('arch', a)
                assert(a not in arches)
                isles = []
                for link in links[1:]: