chiark / gitweb /
Merge branch 'master' of /u/ijackson/things/ypp-sc-tools.pctb-dict
authorIan Jackson <ijackson@chiark.greenend.org.uk>
Wed, 1 Feb 2012 22:20:40 +0000 (22:20 +0000)
committerIan Jackson <ijackson@chiark.greenend.org.uk>
Wed, 1 Feb 2012 22:20:40 +0000 (22:20 +0000)
yarrg/source-info.txt
yarrg/yppedia-ocean-scraper

index d2629ffaeb349404abf41b7fefbcafaaeb6e9556..fdd5125739959c0341fe82289c35417bd0a0d318 100644 (file)
@@ -231,6 +231,13 @@ ocean Hunter
   Ix Chel
   Manu Island
 
+ocean Emerald
+ Crab
+  The Beaufort Islands
+ Osprey
+  Scurvy Reef
+  Gauntlet Island
+
 ocean Malachite
  Draco
   Cetus Island
@@ -243,6 +250,16 @@ ocean Sage
 
 ocean Viridian
 
+ocean Meridian
+ Draco
+  Cetus Island
+  Threewood Island
+  Wyvern Island
+ Basilisk
+  Zechstein Island
+ Komodo
+  Buyan's Vortice
+
 # family oceans
 
 ocean Crimson
index ba145eafcc247b6bae90d13244b159d62570928d..5e5e0901f563b097b6554b891e04cf184f89b45a 100755 (executable)
@@ -99,7 +99,7 @@ def fetch():
        soup = BeautifulSoup(dataf)
 
 title_arch_re = regexp.compile('(\\S.*\\S) Archipelago \\((\\S+)\\)$')
-title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)$')
+title_any_re = regexp.compile('(\\S.*\\S) \((\\S+)\\)(?: \(page does not exist\))?$')
 href_img_re = regexp.compile('\\.png$')
 
 def title_arch_info(t):