X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.main.git;a=blobdiff_plain;f=yarrg%2Fyppedia-ocean-scraper;fp=yarrg%2Fyppedia-ocean-scraper;h=68efa38f6781980c63308a55b55b4334195873d5;hp=476c1cd9b381bb0d5d5ec1979cc4928bff9fa9f4;hb=938b4dd547bfc4d9538a5714b6f21ab3da50d8d1;hpb=0da2ddd243bf88c4961cf8a1deb8d174c18976c0 diff --git a/yarrg/yppedia-ocean-scraper b/yarrg/yppedia-ocean-scraper index 476c1cd..68efa38 100755 --- a/yarrg/yppedia-ocean-scraper +++ b/yarrg/yppedia-ocean-scraper @@ -37,8 +37,8 @@ signal.signal(signal.SIGINT, signal.SIG_DFL) import sys import os import urllib -import urllib2 import re as regexp +import subprocess from optparse import OptionParser from BeautifulSoup import BeautifulSoup @@ -59,6 +59,20 @@ def fix_stdout(): fix_stdout() +# User agent: +class YarrgURLopener(urllib.FancyURLopener): + base_version= urllib.URLopener().version + proc= subprocess.Popen( + ["./database-info-fetch", "useragentstringmap", + base_version, "manual islands/topology fetch"], + shell=False, + stderr=None, + stdout=subprocess.PIPE, + ) + version = proc.communicate()[0].rstrip('\n'); + assert(proc.returncode is not None and proc.returncode == 0) +urllib._urlopener = YarrgURLopener() + ocean = None soup = None opts = None @@ -77,7 +91,7 @@ def fetch(): url = ('http://yppedia.puzzlepirates.com/' + (url_base % urllib.quote(ocean,''))) debug('fetching',url) - dataf = urllib2.urlopen(url) + dataf = urllib.urlopen(url) debug('fetched',dataf) soup = BeautifulSoup(dataf)