From: Ian Jackson Date: Wed, 23 Sep 2009 22:55:45 +0000 (+0100) Subject: Better HTTP User-Agent strings (rot13 the library, say who we are) X-Git-Tag: 4.0~2 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-live.git;a=commitdiff_plain;h=938b4dd547bfc4d9538a5714b6f21ab3da50d8d1 Better HTTP User-Agent strings (rot13 the library, say who we are) --- diff --git a/yarrg/Commods.pm b/yarrg/Commods.pm index b58471c..59ad3e1 100644 --- a/yarrg/Commods.pm +++ b/yarrg/Commods.pm @@ -24,6 +24,7 @@ package Commods; use IO::File; use HTTP::Request::Common (); use POSIX; +use LWP::UserAgent; use strict; use warnings; @@ -40,7 +41,8 @@ BEGIN { &get_our_version &check_tsv_line &pipethrough_prep &pipethrough_run &pipethrough_run_along &pipethrough_run_finish - &pipethrough_run_gzip + &pipethrough_run_gzip &http_useragent &version_core + &http_useragent_string_map &cgipostform &yarrgpostform &cgi_get_caller &set_ctype_utf8 $masterinfoversion); %EXPORT_TAGS = ( ); @@ -247,7 +249,14 @@ sub get_our_version ($$) { my ($aref,$prefix) = @_; $aref->{"${prefix}name"}= 'ypp-sc-tools yarrg'; $aref->{"${prefix}fixes"}= 'lastpage checkpager'; + $aref->{"${prefix}version"}= version_core(); + return $aref; + # clientname "ypp-sc-tools" + # clientversion 2.1-g2e06a26 [from git-describe --tags HEAD] + # clientfixes "lastpage" [space separated list] +} +sub version_core () { my $version= ` if type -p git-describe >/dev/null 2>&1; then gd=git-describe @@ -257,11 +266,7 @@ sub get_our_version ($$) { \$gd --tags HEAD || echo 0unknown `; $? and die $?; chomp($version); - $aref->{"${prefix}version"}= $version; - return $aref; - # clientname "ypp-sc-tools" - # clientversion 2.1-g2e06a26 [from git-describe --tags HEAD] - # clientfixes "lastpage" [space separated list] + return $version; } sub pipethrough_prep () { @@ -401,4 +406,22 @@ sub set_ctype_utf8 () { setlocale(LC_CTYPE, "en.UTF-8"); } +sub http_useragent_string_map ($$) { + my ($caller_lib_agent, $reason_style_or_caller) = @_; + $caller_lib_agent =~ y/A-Za-z/N-ZA-Mn-za-m/; + $caller_lib_agent =~ s/\s/_/g; + my $version= version_core(); + return "yarrg/$version ($reason_style_or_caller)". + " $caller_lib_agent". + " (http://yarrg.chiark.net/intro)"; +} + +sub http_useragent ($) { + my ($who) = @_; + my $ua= LWP::UserAgent->new; + my $base= $ua->_agent(); + $ua->agent(http_useragent_string_map($base, $who)); + return $ua; +} + 1; diff --git a/yarrg/commod-results-processor b/yarrg/commod-results-processor index d322c1c..99c6924 100755 --- a/yarrg/commod-results-processor +++ b/yarrg/commod-results-processor @@ -29,7 +29,6 @@ use strict (qw(vars)); use HTTP::Request; use IO::File; use POSIX; -use LWP::UserAgent; use XML::Parser; use Commods; @@ -230,7 +229,7 @@ sub main__tsv () { our ($pctb) = $ENV{'YPPSC_YARRG_PCTB'}; -our ($ua)= LWP::UserAgent->new; +our ($ua)= http_useragent("commod-results-processor $mode"); sub refresh_commodmap() { die unless $pctb; diff --git a/yarrg/database-info-fetch b/yarrg/database-info-fetch index 90db002..041e008 100755 --- a/yarrg/database-info-fetch +++ b/yarrg/database-info-fetch @@ -25,7 +25,6 @@ # sponsored by Three Rings. use strict (qw(vars)); -use LWP::UserAgent; use JSON; #use Data::Dumper; use IO::File; @@ -38,7 +37,7 @@ our ($which) = shift @ARGV; $which =~ s/\W//g; our ($pctb) = $ENV{'YPPSC_YARRG_PCTB'}; -our ($ua)= LWP::UserAgent->new; +our ($ua)= http_useragent("database_info_fetch $which"); our $jsonresp; sub jparsetable ($$) { @@ -193,6 +192,14 @@ sub main__timestamp () { exit(0); } +sub main__yarrgversion () { + printf "%s\n", version_core(); +} + +sub main__useragentstringmap ($$) { + printf "%s\n", http_useragent_string_map($_[0], $_[1]); +} + sub main__sunshinewidget () { print <@ stderr] + ::http::config -useragent $ua + debug "USERAGENT NEW \"$ua\"" } } diff --git a/yarrg/yppedia-ocean-scraper b/yarrg/yppedia-ocean-scraper index 476c1cd..68efa38 100755 --- a/yarrg/yppedia-ocean-scraper +++ b/yarrg/yppedia-ocean-scraper @@ -37,8 +37,8 @@ signal.signal(signal.SIGINT, signal.SIG_DFL) import sys import os import urllib -import urllib2 import re as regexp +import subprocess from optparse import OptionParser from BeautifulSoup import BeautifulSoup @@ -59,6 +59,20 @@ def fix_stdout(): fix_stdout() +# User agent: +class YarrgURLopener(urllib.FancyURLopener): + base_version= urllib.URLopener().version + proc= subprocess.Popen( + ["./database-info-fetch", "useragentstringmap", + base_version, "manual islands/topology fetch"], + shell=False, + stderr=None, + stdout=subprocess.PIPE, + ) + version = proc.communicate()[0].rstrip('\n'); + assert(proc.returncode is not None and proc.returncode == 0) +urllib._urlopener = YarrgURLopener() + ocean = None soup = None opts = None @@ -77,7 +91,7 @@ def fetch(): url = ('http://yppedia.puzzlepirates.com/' + (url_base % urllib.quote(ocean,''))) debug('fetching',url) - dataf = urllib2.urlopen(url) + dataf = urllib.urlopen(url) debug('fetched',dataf) soup = BeautifulSoup(dataf)