urllib.py

   1 """Open an arbitrary URL.
   2
   3 See the following document for more info on URLs:
   4 "Names and Addresses, URIs, URLs, URNs, URCs", at
   5 http://www.w3.org/pub/WWW/Addressing/Overview.html
   6
   7 See also the HTTP spec (from which the error codes are derived):
   8 "HTTP - Hypertext Transfer Protocol", at
   9 http://www.w3.org/pub/WWW/Protocols/
  10
  11 Related standards and specs:
  12 - RFC1808: the "relative URL" spec. (authoritative status)
  13 - RFC1738 - the "URL standard". (authoritative status)
  14 - RFC1630 - the "URI spec". (informational status)
  15
  16 The object returned by URLopener().open(file) will differ per
  17 protocol.  All you know is that is has methods read(), readline(),
  18 readlines(), fileno(), close() and info().  The read*(), fileno()
  19 and close() methods work like those of open files.
  20 The info() method returns a mimetools.Message object which can be
  21 used to query various info about the object, if available.
  22 (mimetools.Message objects are queried with the getheader() method.)
  23 """
  24
  25 import string
  26 import socket
  27 import os
  28 import stat
  29 import time
  30 import sys
  31 import types
  32
  33 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
  34            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
  35            "urlencode", "url2pathname", "pathname2url", "splittag",
  36            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
  37            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
  38            "splitnport", "splitquery", "splitattr", "splitvalue",
  39            "splitgophertype", "getproxies"]
  40
  41 __version__ = '1.15'    # XXX This version is not always updated :-(
  42
  43 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
  44
  45 # Helper for non-unix systems
  46 if os.name == 'mac':
  47     from macurl2path import url2pathname, pathname2url
  48 elif os.name == 'nt':
  49     from nturl2path import url2pathname, pathname2url
  50 elif os.name == 'riscos':
  51     from rourl2path import url2pathname, pathname2url
  52 else:
  53     def url2pathname(pathname):
  54         return unquote(pathname)
  55     def pathname2url(pathname):
  56         return quote(pathname)
  57
  58 # This really consists of two pieces:
  59 # (1) a class which handles opening of all sorts of URLs
  60 #     (plus assorted utilities etc.)
  61 # (2) a set of functions for parsing URLs
  62 # XXX Should these be separated out into different modules?
  63
  64
  65 # Shortcut for basic usage
  66 _urlopener = None
  67 def urlopen(url, data=None):
  68     """urlopen(url [, data]) -> open file-like object"""
  69     global _urlopener
  70     if not _urlopener:
  71         _urlopener = FancyURLopener()
  72     if data is None:
  73         return _urlopener.open(url)
  74     else:
  75         return _urlopener.open(url, data)
  76 def urlretrieve(url, filename=None, reporthook=None, data=None):
  77     global _urlopener
  78     if not _urlopener:
  79         _urlopener = FancyURLopener()
  80     return _urlopener.retrieve(url, filename, reporthook, data)
  81 def urlcleanup():
  82     if _urlopener:
  83         _urlopener.cleanup()
  84
  85
  86 ftpcache = {}
  87 class URLopener:
  88     """Class to open URLs.
  89     This is a class rather than just a subroutine because we may need
  90     more than one set of global protocol-specific options.
  91     Note -- this is a base class for those who don't want the
  92     automatic handling of errors type 302 (relocated) and 401
  93     (authorization needed)."""
  94
  95     __tempfiles = None
  96
  97     version = "Python-urllib/%s" % __version__
  98
  99     # Constructor
 100     def __init__(self, proxies=None, **x509):
 101         if proxies is None:
 102             proxies = getproxies()
 103         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 104         self.proxies = proxies
 105         self.key_file = x509.get('key_file')
 106         self.cert_file = x509.get('cert_file')
 107         self.addheaders = [('User-agent', 'Servus/0.2')]
 108         self.__tempfiles = []
 109         self.__unlink = os.unlink # See cleanup()
 110         self.tempcache = None
 111         # Undocumented feature: if you assign {} to tempcache,
 112         # it is used to cache files retrieved with
 113         # self.retrieve().  This is not enabled by default
 114         # since it does not work for changing documents (and I
 115         # haven't got the logic to check expiration headers
 116         # yet).
 117         self.ftpcache = ftpcache
 118         # Undocumented feature: you can use a different
 119         # ftp cache by assigning to the .ftpcache member;
 120         # in case you want logically independent URL openers
 121         # XXX This is not threadsafe.  Bah.
 122
 123     def __del__(self):
 124         self.close()
 125
 126     def close(self):
 127         self.cleanup()
 128
 129     def cleanup(self):
 130         # This code sometimes runs when the rest of this module
 131         # has already been deleted, so it can't use any globals
 132         # or import anything.
 133         if self.__tempfiles:
 134             for file in self.__tempfiles:
 135                 try:
 136                     self.__unlink(file)
 137                 except OSError:
 138                     pass
 139             del self.__tempfiles[:]
 140         if self.tempcache:
 141             self.tempcache.clear()
 142
 143     def addheader(self, *args):
 144         """Add a header to be used by the HTTP interface only
 145         e.g. u.addheader('Accept', 'sound/basic')"""
 146         self.addheaders.append(args)
 147
 148     # External interface
 149     def open(self, fullurl, data=None):
 150         """Use URLopener().open(file) instead of open(file, 'r')."""
 151         fullurl = unwrap(toBytes(fullurl))
 152         if self.tempcache and self.tempcache.has_key(fullurl):
 153             filename, headers = self.tempcache[fullurl]
 154             fp = open(filename, 'rb')
 155             return addinfourl(fp, headers, fullurl)
 156         urltype, url = splittype(fullurl)
 157         if not urltype:
 158             urltype = 'file'
 159         if self.proxies.has_key(urltype):
 160             proxy = self.proxies[urltype]
 161             urltype, proxyhost = splittype(proxy)
 162             host, selector = splithost(proxyhost)
 163             url = (host, fullurl) # Signal special case to open_*()
 164         else:
 165             proxy = None
 166         name = 'open_' + urltype
 167         self.type = urltype
 168         if '-' in name:
 169             # replace - with _
 170             name = '_'.join(name.split('-'))
 171         if not hasattr(self, name):
 172             if proxy:
 173                 return self.open_unknown_proxy(proxy, fullurl, data)
 174             else:
 175                 return self.open_unknown(fullurl, data)
 176         try:
 177             if data is None:
 178                 return getattr(self, name)(url)
 179             else:
 180                 return getattr(self, name)(url, data)
 181         except socket.error, msg:
 182             raise IOError, ('socket error', msg), sys.exc_info()[2]
 183
 184     def open_unknown(self, fullurl, data=None):
 185         """Overridable interface to open unknown URL type."""
 186         type, url = splittype(fullurl)
 187         raise IOError, ('url error', 'unknown url type', type)
 188
 189     def open_unknown_proxy(self, proxy, fullurl, data=None):
 190         """Overridable interface to open unknown URL type."""
 191         type, url = splittype(fullurl)
 192         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
 193
 194     # External interface
 195     def retrieve(self, url, filename=None, reporthook=None, data=None):
 196         """retrieve(url) returns (filename, None) for a local object
 197         or (tempfilename, headers) for a remote object."""
 198         url = unwrap(toBytes(url))
 199         if self.tempcache and self.tempcache.has_key(url):
 200             return self.tempcache[url]
 201         type, url1 = splittype(url)
 202         if not filename and (not type or type == 'file'):
 203             try:
 204                 fp = self.open_local_file(url1)
 205                 hdrs = fp.info()
 206                 del fp
 207                 return url2pathname(splithost(url1)[1]), hdrs
 208             except IOError, msg:
 209                 pass
 210         fp = self.open(url, data)
 211         headers = fp.info()
 212         if not filename:
 213             import tempfile
 214             garbage, path = splittype(url)
 215             garbage, path = splithost(path or "")
 216             path, garbage = splitquery(path or "")
 217             path, garbage = splitattr(path or "")
 218             suffix = os.path.splitext(path)[1]
 219             filename = tempfile.mktemp(suffix)
 220             self.__tempfiles.append(filename)
 221         result = filename, headers
 222         if self.tempcache is not None:
 223             self.tempcache[url] = result
 224         tfp = open(filename, 'wb')
 225         bs = 1024*8
 226         size = -1
 227         blocknum = 1
 228         if reporthook:
 229             if headers.has_key("content-length"):
 230                 size = int(headers["Content-Length"])
 231             reporthook(0, bs, size)
 232         block = fp.read(bs)
 233         if reporthook:
 234             reporthook(1, bs, size)
 235         while block:
 236             tfp.write(block)
 237             block = fp.read(bs)
 238             blocknum = blocknum + 1
 239             if reporthook:
 240                 reporthook(blocknum, bs, size)
 241         fp.close()
 242         tfp.close()
 243         del fp
 244         del tfp
 245         return result
 246
 247     # Each method named open_<type> knows how to open that type of URL
 248
 249     def open_http(self, url, data=None):
 250         """Use HTTP protocol."""
 251         import httplib
 252         user_passwd = None
 253         if type(url) is types.StringType:
 254             host, selector = splithost(url)
 255             if host:
 256                 user_passwd, host = splituser(host)
 257                 host = unquote(host)
 258             realhost = host
 259         else:
 260             host, selector = url
 261             urltype, rest = splittype(selector)
 262             url = rest
 263             user_passwd = None
 264             if urltype.lower() != 'http':
 265                 realhost = None
 266             else:
 267                 realhost, rest = splithost(rest)
 268                 if realhost:
 269                     user_passwd, realhost = splituser(realhost)
 270                 if user_passwd:
 271                     selector = "%s://%s%s" % (urltype, realhost, rest)
 272                 if proxy_bypass(realhost):
 273                     host = realhost
 274
 275             #print "proxy via http:", host, selector
 276         if not host: raise IOError, ('http error', 'no host given')
 277         if user_passwd:
 278             import base64
 279             auth = base64.encodestring(user_passwd).strip()
 280         else:
 281             auth = None
 282         h = httplib.HTTP(host)
 283         if data is not None:
 284             h.putrequest('POST', selector)
 285             h.putheader('Content-type', 'application/x-www-form-urlencoded')
 286             h.putheader('Content-length', '%d' % len(data))
 287         else:
 288             h.putrequest('GET', selector)
 289         if auth: h.putheader('Authorization', 'Basic %s' % auth)
 290         if realhost: h.putheader('Host', realhost)
 291         for args in self.addheaders: apply(h.putheader, args)
 292         h.endheaders()
 293         if data is not None:
 294             h.send(data)
 295         errcode, errmsg, headers = h.getreply()
 296         fp = h.getfile()
 297         if errcode == 200:
 298             return addinfourl(fp, headers, "http:" + url)
 299         else:
 300             if data is None:
 301                 return self.http_error(url, fp, errcode, errmsg, headers)
 302             else:
 303                 return self.http_error(url, fp, errcode, errmsg, headers, data)
 304
 305     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
 306         """Handle http errors.
 307         Derived class can override this, or provide specific handlers
 308         named http_error_DDD where DDD is the 3-digit error code."""
 309         # First check if there's a specific handler for this error
 310         name = 'http_error_%d' % errcode
 311         if hasattr(self, name):
 312             method = getattr(self, name)
 313             if data is None:
 314                 result = method(url, fp, errcode, errmsg, headers)
 315             else:
 316                 result = method(url, fp, errcode, errmsg, headers, data)
 317             if result: return result
 318         return self.http_error_default(url, fp, errcode, errmsg, headers)
 319
 320     def http_error_default(self, url, fp, errcode, errmsg, headers):
 321         """Default error handler: close the connection and raise IOError."""
 322         void = fp.read()
 323         fp.close()
 324         raise IOError, ('http error', errcode, errmsg, headers)
 325
 326     if hasattr(socket, "ssl"):
 327         def open_https(self, url, data=None):
 328             """Use HTTPS protocol."""
 329             import httplib
 330             user_passwd = None
 331             if type(url) is types.StringType:
 332                 host, selector = splithost(url)
 333                 if host:
 334                     user_passwd, host = splituser(host)
 335                     host = unquote(host)
 336                 realhost = host
 337             else:
 338                 host, selector = url
 339                 urltype, rest = splittype(selector)
 340                 url = rest
 341                 user_passwd = None
 342                 if urltype.lower() != 'https':
 343                     realhost = None
 344                 else:
 345                     realhost, rest = splithost(rest)
 346                     if realhost:
 347                         user_passwd, realhost = splituser(realhost)
 348                     if user_passwd:
 349                         selector = "%s://%s%s" % (urltype, realhost, rest)
 350                 #print "proxy via https:", host, selector
 351             if not host: raise IOError, ('https error', 'no host given')
 352             if user_passwd:
 353                 import base64
 354                 auth = base64.encodestring(user_passwd).strip()
 355             else:
 356                 auth = None
 357             h = httplib.HTTPS(host, 0,
 358                               key_file=self.key_file,
 359                               cert_file=self.cert_file)
 360             if data is not None:
 361                 h.putrequest('POST', selector)
 362                 h.putheader('Content-type',
 363                             'application/x-www-form-urlencoded')
 364                 h.putheader('Content-length', '%d' % len(data))
 365             else:
 366                 h.putrequest('GET', selector)
 367             if auth: h.putheader('Authorization: Basic %s' % auth)
 368             if realhost: h.putheader('Host', realhost)
 369             for args in self.addheaders: apply(h.putheader, args)
 370             h.endheaders()
 371             if data is not None:
 372                 h.send(data)
 373             errcode, errmsg, headers = h.getreply()
 374             fp = h.getfile()
 375             if errcode == 200:
 376                 return addinfourl(fp, headers, "https:" + url)
 377             else:
 378                 if data is None:
 379                     return self.http_error(url, fp, errcode, errmsg, headers)
 380                 else:
 381                     return self.http_error(url, fp, errcode, errmsg, headers,
 382                                            data)
 383
 384     def open_gopher(self, url):
 385         """Use Gopher protocol."""
 386         import gopherlib
 387         host, selector = splithost(url)
 388         if not host: raise IOError, ('gopher error', 'no host given')
 389         host = unquote(host)
 390         type, selector = splitgophertype(selector)
 391         selector, query = splitquery(selector)
 392         selector = unquote(selector)
 393         if query:
 394             query = unquote(query)
 395             fp = gopherlib.send_query(selector, query, host)
 396         else:
 397             fp = gopherlib.send_selector(selector, host)
 398         return addinfourl(fp, noheaders(), "gopher:" + url)
 399
 400     def open_file(self, url):
 401         """Use local file or FTP depending on form of URL."""
 402         if url[:2] == '//' and url[2:3] != '/':
 403             return self.open_ftp(url)
 404         else:
 405             return self.open_local_file(url)
 406
 407     def open_local_file(self, url):
 408         """Use local file."""
 409         import mimetypes, mimetools, rfc822, StringIO
 410         host, file = splithost(url)
 411         localname = url2pathname(file)
 412         try:
 413             stats = os.stat(localname)
 414         except OSError, e:
 415             raise IOError(e.errno, e.strerror, e.filename)
 416         size = stats[stat.ST_SIZE]
 417         modified = rfc822.formatdate(stats[stat.ST_MTIME])
 418         mtype = mimetypes.guess_type(url)[0]
 419         headers = mimetools.Message(StringIO.StringIO(
 420             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
 421             (mtype or 'text/plain', size, modified)))
 422         if not host:
 423             urlfile = file
 424             if file[:1] == '/':
 425                 urlfile = 'file://' + file
 426             return addinfourl(open(localname, 'rb'),
 427                               headers, urlfile)
 428         host, port = splitport(host)
 429         if not port \
 430            and socket.gethostbyname(host) in (localhost(), thishost()):
 431             urlfile = file
 432             if file[:1] == '/':
 433                 urlfile = 'file://' + file
 434             return addinfourl(open(localname, 'rb'),
 435                               headers, urlfile)
 436         raise IOError, ('local file error', 'not on local host')
 437
 438     def open_ftp(self, url):
 439         """Use FTP protocol."""
 440         import mimetypes, mimetools, StringIO
 441         host, path = splithost(url)
 442         if not host: raise IOError, ('ftp error', 'no host given')
 443         host, port = splitport(host)
 444         user, host = splituser(host)
 445         if user: user, passwd = splitpasswd(user)
 446         else: passwd = None
 447         host = unquote(host)
 448         user = unquote(user or '')
 449         passwd = unquote(passwd or '')
 450         host = socket.gethostbyname(host)
 451         if not port:
 452             import ftplib
 453             port = ftplib.FTP_PORT
 454         else:
 455             port = int(port)
 456         path, attrs = splitattr(path)
 457         path = unquote(path)
 458         dirs = path.split('/')
 459         dirs, file = dirs[:-1], dirs[-1]
 460         if dirs and not dirs[0]: dirs = dirs[1:]
 461         if dirs and not dirs[0]: dirs[0] = '/'
 462         key = user, host, port, '/'.join(dirs)
 463         # XXX thread unsafe!
 464         if len(self.ftpcache) > MAXFTPCACHE:
 465             # Prune the cache, rather arbitrarily
 466             for k in self.ftpcache.keys():
 467                 if k != key:
 468                     v = self.ftpcache[k]
 469                     del self.ftpcache[k]
 470                     v.close()
 471         try:
 472             if not self.ftpcache.has_key(key):
 473                 self.ftpcache[key] = \
 474                     ftpwrapper(user, passwd, host, port, dirs)
 475             if not file: type = 'D'
 476             else: type = 'I'
 477             for attr in attrs:
 478                 attr, value = splitvalue(attr)
 479                 if attr.lower() == 'type' and \
 480                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
 481                     type = value.upper()
 482             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
 483             mtype = mimetypes.guess_type("ftp:" + url)[0]
 484             headers = ""
 485             if mtype:
 486                 headers += "Content-Type: %s\n" % mtype
 487             if retrlen is not None and retrlen >= 0:
 488                 headers += "Content-Length: %d\n" % retrlen
 489             headers = mimetools.Message(StringIO.StringIO(headers))
 490             return addinfourl(fp, headers, "ftp:" + url)
 491         except ftperrors(), msg:
 492             raise IOError, ('ftp error', msg), sys.exc_info()[2]
 493
 494     def open_data(self, url, data=None):
 495         """Use "data" URL."""
 496         # ignore POSTed data
 497         #
 498         # syntax of data URLs:
 499         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 500         # mediatype := [ type "/" subtype ] *( ";" parameter )
 501         # data      := *urlchar
 502         # parameter := attribute "=" value
 503         import StringIO, mimetools, time
 504         try:
 505             [type, data] = url.split(',', 1)
 506         except ValueError:
 507             raise IOError, ('data error', 'bad data URL')
 508         if not type:
 509             type = 'text/plain;charset=US-ASCII'
 510         semi = type.rfind(';')
 511         if semi >= 0 and '=' not in type[semi:]:
 512             encoding = type[semi+1:]
 513             type = type[:semi]
 514         else:
 515             encoding = ''
 516         msg = []
 517         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
 518                                             time.gmtime(time.time())))
 519         msg.append('Content-type: %s' % type)
 520         if encoding == 'base64':
 521             import base64
 522             data = base64.decodestring(data)
 523         else:
 524             data = unquote(data)
 525         msg.append('Content-length: %d' % len(data))
 526         msg.append('')
 527         msg.append(data)
 528         msg = '\n'.join(msg)
 529         f = StringIO.StringIO(msg)
 530         headers = mimetools.Message(f, 0)
 531         f.fileno = None     # needed for addinfourl
 532         return addinfourl(f, headers, url)
 533
 534
 535 class FancyURLopener(URLopener):
 536     """Derived class with handlers for errors we can handle (perhaps)."""
 537
 538     def __init__(self, *args):
 539         apply(URLopener.__init__, (self,) + args)
 540         self.auth_cache = {}
 541         self.tries = 0
 542         self.maxtries = 10
 543
 544     def http_error_default(self, url, fp, errcode, errmsg, headers):
 545         """Default error handling -- don't raise an exception."""
 546         return addinfourl(fp, headers, "http:" + url)
 547
 548     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
 549         """Error 302 -- relocated (temporarily)."""
 550         self.tries += 1
 551         if self.maxtries and self.tries >= self.maxtries:
 552             if hasattr(self, "http_error_500"):
 553                 meth = self.http_error_500
 554             else:
 555                 meth = self.http_error_default
 556             self.tries = 0
 557             return meth(url, fp, 500,
 558                         "Internal Server Error: Redirect Recursion", headers)
 559         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
 560                                         data)
 561         self.tries = 0
 562         return result
 563
 564     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
 565         if headers.has_key('location'):
 566             newurl = headers['location']
 567         elif headers.has_key('uri'):
 568             newurl = headers['uri']
 569         else:
 570             return
 571         void = fp.read()
 572         fp.close()
 573         # In case the server sent a relative URL, join with original:
 574         newurl = basejoin(self.type + ":" + url, newurl)
 575         if data is None:
 576             return self.open(newurl)
 577         else:
 578             return self.open(newurl, data)
 579
 580     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
 581         """Error 301 -- also relocated (permanently)."""
 582         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 583
 584     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
 585         """Error 401 -- authentication required.
 586         See this URL for a description of the basic authentication scheme:
 587         http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
 588         if not headers.has_key('www-authenticate'):
 589             URLopener.http_error_default(self, url, fp,
 590                                          errcode, errmsg, headers)
 591         stuff = headers['www-authenticate']
 592         import re
 593         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 594         if not match:
 595             URLopener.http_error_default(self, url, fp,
 596                                          errcode, errmsg, headers)
 597         scheme, realm = match.groups()
 598         if scheme.lower() != 'basic':
 599             URLopener.http_error_default(self, url, fp,
 600                                          errcode, errmsg, headers)
 601         name = 'retry_' + self.type + '_basic_auth'
 602         if data is None:
 603             return getattr(self,name)(url, realm)
 604         else:
 605             return getattr(self,name)(url, realm, data)
 606
 607     def retry_http_basic_auth(self, url, realm, data=None):
 608         host, selector = splithost(url)
 609         i = host.find('@') + 1
 610         host = host[i:]
 611         user, passwd = self.get_user_passwd(host, realm, i)
 612         if not (user or passwd): return None
 613         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 614         newurl = 'http://' + host + selector
 615         if data is None:
 616             return self.open(newurl)
 617         else:
 618             return self.open(newurl, data)
 619
 620     def retry_https_basic_auth(self, url, realm, data=None):
 621         host, selector = splithost(url)
 622         i = host.find('@') + 1
 623         host = host[i:]
 624         user, passwd = self.get_user_passwd(host, realm, i)
 625         if not (user or passwd): return None
 626         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 627         newurl = '//' + host + selector
 628         return self.open_https(newurl, data)
 629
 630     def get_user_passwd(self, host, realm, clear_cache = 0):
 631         key = realm + '@' + host.lower()
 632         if self.auth_cache.has_key(key):
 633             if clear_cache:
 634                 del self.auth_cache[key]
 635             else:
 636                 return self.auth_cache[key]
 637         user, passwd = self.prompt_user_passwd(host, realm)
 638         if user or passwd: self.auth_cache[key] = (user, passwd)
 639         return user, passwd
 640
 641     def prompt_user_passwd(self, host, realm):
 642         """Override this in a GUI environment!"""
 643         import getpass
 644         try:
 645             user = raw_input("Enter username for %s at %s: " % (realm,
 646                                                                 host))
 647             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
 648                 (user, realm, host))
 649             return user, passwd
 650         except KeyboardInterrupt:
 651             print
 652             return None, None
 653
 654
 655 # Utility functions
 656
 657 _localhost = None
 658 def localhost():
 659     """Return the IP address of the magic hostname 'localhost'."""
 660     global _localhost
 661     if not _localhost:
 662         _localhost = socket.gethostbyname('localhost')
 663     return _localhost
 664
 665 _thishost = None
 666 def thishost():
 667     """Return the IP address of the current host."""
 668     global _thishost
 669     if not _thishost:
 670         _thishost = socket.gethostbyname(socket.gethostname())
 671     return _thishost
 672
 673 _ftperrors = None
 674 def ftperrors():
 675     """Return the set of errors raised by the FTP class."""
 676     global _ftperrors
 677     if not _ftperrors:
 678         import ftplib
 679         _ftperrors = ftplib.all_errors
 680     return _ftperrors
 681
 682 _noheaders = None
 683 def noheaders():
 684     """Return an empty mimetools.Message object."""
 685     global _noheaders
 686     if not _noheaders:
 687         import mimetools
 688         import StringIO
 689         _noheaders = mimetools.Message(StringIO.StringIO(), 0)
 690         _noheaders.fp.close()   # Recycle file descriptor
 691     return _noheaders
 692
 693
 694 # Utility classes
 695
 696 class ftpwrapper:
 697     """Class used by open_ftp() for cache of open FTP connections."""
 698
 699     def __init__(self, user, passwd, host, port, dirs):
 700         self.user = user
 701         self.passwd = passwd
 702         self.host = host
 703         self.port = port
 704         self.dirs = dirs
 705         self.init()
 706
 707     def init(self):
 708         import ftplib
 709         self.busy = 0
 710         self.ftp = ftplib.FTP()
 711         self.ftp.connect(self.host, self.port)
 712         self.ftp.login(self.user, self.passwd)
 713         for dir in self.dirs:
 714             self.ftp.cwd(dir)
 715
 716     def retrfile(self, file, type):
 717         import ftplib
 718         self.endtransfer()
 719         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
 720         else: cmd = 'TYPE ' + type; isdir = 0
 721         try:
 722             self.ftp.voidcmd(cmd)
 723         except ftplib.all_errors:
 724             self.init()
 725             self.ftp.voidcmd(cmd)
 726         conn = None
 727         if file and not isdir:
 728             # Use nlst to see if the file exists at all
 729             try:
 730                 self.ftp.nlst(file)
 731             except ftplib.error_perm, reason:
 732                 raise IOError, ('ftp error', reason), sys.exc_info()[2]
 733             # Restore the transfer mode!
 734             self.ftp.voidcmd(cmd)
 735             # Try to retrieve as a file
 736             try:
 737                 cmd = 'RETR ' + file
 738                 conn = self.ftp.ntransfercmd(cmd)
 739             except ftplib.error_perm, reason:
 740                 if str(reason)[:3] != '550':
 741                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
 742         if not conn:
 743             # Set transfer mode to ASCII!
 744             self.ftp.voidcmd('TYPE A')
 745             # Try a directory listing
 746             if file: cmd = 'LIST ' + file
 747             else: cmd = 'LIST'
 748             conn = self.ftp.ntransfercmd(cmd)
 749         self.busy = 1
 750         # Pass back both a suitably decorated object and a retrieval length
 751         return (addclosehook(conn[0].makefile('rb'),
 752                              self.endtransfer), conn[1])
 753     def endtransfer(self):
 754         if not self.busy:
 755             return
 756         self.busy = 0
 757         try:
 758             self.ftp.voidresp()
 759         except ftperrors():
 760             pass
 761
 762     def close(self):
 763         self.endtransfer()
 764         try:
 765             self.ftp.close()
 766         except ftperrors():
 767             pass
 768
 769 class addbase:
 770     """Base class for addinfo and addclosehook."""
 771
 772     def __init__(self, fp):
 773         self.fp = fp
 774         self.read = self.fp.read
 775         self.readline = self.fp.readline
 776         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
 777         if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
 778
 779     def __repr__(self):
 780         return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
 781                                              `id(self)`, `self.fp`)
 782
 783     def close(self):
 784         self.read = None
 785         self.readline = None
 786         self.readlines = None
 787         self.fileno = None
 788         if self.fp: self.fp.close()
 789         self.fp = None
 790
 791 class addclosehook(addbase):
 792     """Class to add a close hook to an open file."""
 793
 794     def __init__(self, fp, closehook, *hookargs):
 795         addbase.__init__(self, fp)
 796         self.closehook = closehook
 797         self.hookargs = hookargs
 798
 799     def close(self):
 800         addbase.close(self)
 801         if self.closehook:
 802             apply(self.closehook, self.hookargs)
 803             self.closehook = None
 804             self.hookargs = None
 805
 806 class addinfo(addbase):
 807     """class to add an info() method to an open file."""
 808
 809     def __init__(self, fp, headers):
 810         addbase.__init__(self, fp)
 811         self.headers = headers
 812
 813     def info(self):
 814         return self.headers
 815
 816 class addinfourl(addbase):
 817     """class to add info() and geturl() methods to an open file."""
 818
 819     def __init__(self, fp, headers, url):
 820         addbase.__init__(self, fp)
 821         self.headers = headers
 822         self.url = url
 823
 824     def info(self):
 825         return self.headers
 826
 827     def geturl(self):
 828         return self.url
 829
 830
 831 def basejoin(base, url):
 832     """Utility to combine a URL with a base URL to form a new URL."""
 833     type, path = splittype(url)
 834     if type:
 835         # if url is complete (i.e., it contains a type), return it
 836         return url
 837     host, path = splithost(path)
 838     type, basepath = splittype(base) # inherit type from base
 839     if host:
 840         # if url contains host, just inherit type
 841         if type: return type + '://' + host + path
 842         else:
 843             # no type inherited, so url must have started with //
 844             # just return it
 845             return url
 846     host, basepath = splithost(basepath) # inherit host
 847     basepath, basetag = splittag(basepath) # remove extraneous cruft
 848     basepath, basequery = splitquery(basepath) # idem
 849     if path[:1] != '/':
 850         # non-absolute path name
 851         if path[:1] in ('#', '?'):
 852             # path is just a tag or query, attach to basepath
 853             i = len(basepath)
 854         else:
 855             # else replace last component
 856             i = basepath.rfind('/')
 857         if i < 0:
 858             # basepath not absolute
 859             if host:
 860                 # host present, make absolute
 861                 basepath = '/'
 862             else:
 863                 # else keep non-absolute
 864                 basepath = ''
 865         else:
 866             # remove last file component
 867             basepath = basepath[:i+1]
 868         # Interpret ../ (important because of symlinks)
 869         while basepath and path[:3] == '../':
 870             path = path[3:]
 871             i = basepath[:-1].rfind('/')
 872             if i > 0:
 873                 basepath = basepath[:i+1]
 874             elif i == 0:
 875                 basepath = '/'
 876                 break
 877             else:
 878                 basepath = ''
 879
 880         path = basepath + path
 881     if host and path and path[0] != '/':
 882         path = '/' + path
 883     if type and host: return type + '://' + host + path
 884     elif type: return type + ':' + path
 885     elif host: return '//' + host + path # don't know what this means
 886     else: return path
 887
 888
 889 # Utilities to parse URLs (most of these return None for missing parts):
 890 # unwrap('<URL:type://host/path>') --> 'type://host/path'
 891 # splittype('type:opaquestring') --> 'type', 'opaquestring'
 892 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
 893 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
 894 # splitpasswd('user:passwd') -> 'user', 'passwd'
 895 # splitport('host:port') --> 'host', 'port'
 896 # splitquery('/path?query') --> '/path', 'query'
 897 # splittag('/path#tag') --> '/path', 'tag'
 898 # splitattr('/path;attr1=value1;attr2=value2;...') ->
 899 #   '/path', ['attr1=value1', 'attr2=value2', ...]
 900 # splitvalue('attr=value') --> 'attr', 'value'
 901 # splitgophertype('/Xselector') --> 'X', 'selector'
 902 # unquote('abc%20def') -> 'abc def'
 903 # quote('abc def') -> 'abc%20def')
 904
 905 def toBytes(url):
 906     """toBytes(u"URL") --> 'URL'."""
 907     # Most URL schemes require ASCII. If that changes, the conversion
 908     # can be relaxed
 909     if type(url) is types.UnicodeType:
 910         try:
 911             url = url.encode("ASCII")
 912         except UnicodeError:
 913             raise UnicodeError("URL " + repr(url) +
 914                                " contains non-ASCII characters")
 915     return url
 916
 917 def unwrap(url):
 918     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
 919     url = url.strip()
 920     if url[:1] == '<' and url[-1:] == '>':
 921         url = url[1:-1].strip()
 922     if url[:4] == 'URL:': url = url[4:].strip()
 923     return url
 924
 925 _typeprog = None
 926 def splittype(url):
 927     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
 928     global _typeprog
 929     if _typeprog is None:
 930         import re
 931         _typeprog = re.compile('^([^/:]+):')
 932
 933     match = _typeprog.match(url)
 934     if match:
 935         scheme = match.group(1)
 936         return scheme.lower(), url[len(scheme) + 1:]
 937     return None, url
 938
 939 _hostprog = None
 940 def splithost(url):
 941     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
 942     global _hostprog
 943     if _hostprog is None:
 944         import re
 945         _hostprog = re.compile('^//([^/]*)(.*)$')
 946
 947     match = _hostprog.match(url)
 948     if match: return match.group(1, 2)
 949     return None, url
 950
 951 _userprog = None
 952 def splituser(host):
 953     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
 954     global _userprog
 955     if _userprog is None:
 956         import re
 957         _userprog = re.compile('^([^@]*)@(.*)$')
 958
 959     match = _userprog.match(host)
 960     if match: return map(unquote, match.group(1, 2))
 961     return None, host
 962
 963 _passwdprog = None
 964 def splitpasswd(user):
 965     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
 966     global _passwdprog
 967     if _passwdprog is None:
 968         import re
 969         _passwdprog = re.compile('^([^:]*):(.*)$')
 970
 971     match = _passwdprog.match(user)
 972     if match: return match.group(1, 2)
 973     return user, None
 974
 975 # splittag('/path#tag') --> '/path', 'tag'
 976 _portprog = None
 977 def splitport(host):
 978     """splitport('host:port') --> 'host', 'port'."""
 979     global _portprog
 980     if _portprog is None:
 981         import re
 982         _portprog = re.compile('^(.*):([0-9]+)$')
 983
 984     match = _portprog.match(host)
 985     if match: return match.group(1, 2)
 986     return host, None
 987
 988 _nportprog = None
 989 def splitnport(host, defport=-1):
 990     """Split host and port, returning numeric port.
 991     Return given default port if no ':' found; defaults to -1.
 992     Return numerical port if a valid number are found after ':'.
 993     Return None if ':' but not a valid number."""
 994     global _nportprog
 995     if _nportprog is None:
 996         import re
 997         _nportprog = re.compile('^(.*):(.*)$')
 998
 999     match = _nportprog.match(host)
1000     if match:
1001         host, port = match.group(1, 2)
1002         try:
1003             if not port: raise ValueError, "no digits"
1004             nport = int(port)
1005         except ValueError:
1006             nport = None
1007         return host, nport
1008     return host, defport
1009
1010 _queryprog = None
1011 def splitquery(url):
1012     """splitquery('/path?query') --> '/path', 'query'."""
1013     global _queryprog
1014     if _queryprog is None:
1015         import re
1016         _queryprog = re.compile('^(.*)\?([^?]*)$')
1017
1018     match = _queryprog.match(url)
1019     if match: return match.group(1, 2)
1020     return url, None
1021
1022 _tagprog = None
1023 def splittag(url):
1024     """splittag('/path#tag') --> '/path', 'tag'."""
1025     global _tagprog
1026     if _tagprog is None:
1027         import re
1028         _tagprog = re.compile('^(.*)#([^#]*)$')
1029
1030     match = _tagprog.match(url)
1031     if match: return match.group(1, 2)
1032     return url, None
1033
1034 def splitattr(url):
1035     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1036         '/path', ['attr1=value1', 'attr2=value2', ...]."""
1037     words = url.split(';')
1038     return words[0], words[1:]
1039
1040 _valueprog = None
1041 def splitvalue(attr):
1042     """splitvalue('attr=value') --> 'attr', 'value'."""
1043     global _valueprog
1044     if _valueprog is None:
1045         import re
1046         _valueprog = re.compile('^([^=]*)=(.*)$')
1047
1048     match = _valueprog.match(attr)
1049     if match: return match.group(1, 2)
1050     return attr, None
1051
1052 def splitgophertype(selector):
1053     """splitgophertype('/Xselector') --> 'X', 'selector'."""
1054     if selector[:1] == '/' and selector[1:2]:
1055         return selector[1], selector[2:]
1056     return None, selector
1057
1058 def unquote(s):
1059     """unquote('abc%20def') -> 'abc def'."""
1060     mychr = chr
1061     myatoi = int
1062     list = s.split('%')
1063     res = [list[0]]
1064     myappend = res.append
1065     del list[0]
1066     for item in list:
1067         if item[1:2]:
1068             try:
1069                 myappend(mychr(myatoi(item[:2], 16))
1070                      + item[2:])
1071             except ValueError:
1072                 myappend('%' + item)
1073         else:
1074             myappend('%' + item)
1075     return "".join(res)
1076
1077 def unquote_plus(s):
1078     """unquote('%7e/abc+def') -> '~/abc def'"""
1079     if '+' in s:
1080         # replace '+' with ' '
1081         s = ' '.join(s.split('+'))
1082     return unquote(s)
1083
1084 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1085                'abcdefghijklmnopqrstuvwxyz'
1086                '0123456789' '_.-')
1087
1088 _fast_safe_test = always_safe + '/'
1089 _fast_safe = None
1090
1091 def _fast_quote(s):
1092     global _fast_safe
1093     if _fast_safe is None:
1094         _fast_safe = {}
1095         for c in _fast_safe_test:
1096             _fast_safe[c] = c
1097     res = list(s)
1098     for i in range(len(res)):
1099         c = res[i]
1100         if not _fast_safe.has_key(c):
1101             res[i] = '%%%02X' % ord(c)
1102     return ''.join(res)
1103
1104 def quote(s, safe = '/'):
1105     """quote('abc def') -> 'abc%20def'
1106
1107     Each part of a URL, e.g. the path info, the query, etc., has a
1108     different set of reserved characters that must be quoted.
1109
1110     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1111     the following reserved characters.
1112
1113     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1114                   "$" | ","
1115
1116     Each of these characters is reserved in some component of a URL,
1117     but not necessarily in all of them.
1118
1119     By default, the quote function is intended for quoting the path
1120     section of a URL.  Thus, it will not encode '/'.  This character
1121     is reserved, but in typical usage the quote function is being
1122     called on a path where the existing slash characters are used as
1123     reserved characters.
1124     """
1125     safe = always_safe + safe
1126     if _fast_safe_test == safe:
1127         return _fast_quote(s)
1128     res = list(s)
1129     for i in range(len(res)):
1130         c = res[i]
1131         if c not in safe:
1132             res[i] = '%%%02X' % ord(c)
1133     return ''.join(res)
1134
1135 def quote_plus(s, safe = ''):
1136     """Quote the query fragment of a URL; replacing ' ' with '+'"""
1137     if ' ' in s:
1138         l = s.split(' ')
1139         for i in range(len(l)):
1140             l[i] = quote(l[i], safe)
1141         return '+'.join(l)
1142     else:
1143         return quote(s, safe)
1144
1145 def urlencode(query,doseq=0):
1146     """Encode a sequence of two-element tuples or dictionary into a URL query string.
1147
1148     If any values in the query arg are sequences and doseq is true, each
1149     sequence element is converted to a separate parameter.
1150
1151     If the query arg is a sequence of two-element tuples, the order of the
1152     parameters in the output will match the order of parameters in the
1153     input.
1154     """
1155
1156     if hasattr(query,"items"):
1157         # mapping objects
1158         query = query.items()
1159     else:
1160         # it's a bother at times that strings and string-like objects are
1161         # sequences...
1162         try:
1163             # non-sequence items should not work with len()
1164             x = len(query)
1165             # non-empty strings will fail this
1166             if len(query) and type(query[0]) != types.TupleType:
1167                 raise TypeError
1168             # zero-length sequences of all types will get here and succeed,
1169             # but that's a minor nit - since the original implementation
1170             # allowed empty dicts that type of behavior probably should be
1171             # preserved for consistency
1172         except TypeError:
1173             ty,va,tb = sys.exc_info()
1174             raise TypeError, "not a valid non-string sequence or mapping object", tb
1175
1176     l = []
1177     if not doseq:
1178         # preserve old behavior
1179         for k, v in query:
1180             k = quote_plus(str(k))
1181             v = quote_plus(str(v))
1182             l.append(k + '=' + v)
1183     else:
1184         for k, v in query:
1185             k = quote_plus(str(k))
1186             if type(v) == types.StringType:
1187                 v = quote_plus(v)
1188                 l.append(k + '=' + v)
1189             elif type(v) == types.UnicodeType:
1190                 # is there a reasonable way to convert to ASCII?
1191                 # encode generates a string, but "replace" or "ignore"
1192                 # lose information and "strict" can raise UnicodeError
1193                 v = quote_plus(v.encode("ASCII","replace"))
1194                 l.append(k + '=' + v)
1195             else:
1196                 try:
1197                     # is this a sufficient test for sequence-ness?
1198                     x = len(v)
1199                 except TypeError:
1200                     # not a sequence
1201                     v = quote_plus(str(v))
1202                     l.append(k + '=' + v)
1203                 else:
1204                     # loop over the sequence
1205                     for elt in v:
1206                         l.append(k + '=' + quote_plus(str(elt)))
1207     return '&'.join(l)
1208
1209 # Proxy handling
1210 def getproxies_environment():
1211     """Return a dictionary of scheme -> proxy server URL mappings.
1212
1213     Scan the environment for variables named <scheme>_proxy;
1214     this seems to be the standard convention.  If you need a
1215     different way, you can pass a proxies dictionary to the
1216     [Fancy]URLopener constructor.
1217
1218     """
1219     proxies = {}
1220     for name, value in os.environ.items():
1221         name = name.lower()
1222         if value and name[-6:] == '_proxy':
1223             proxies[name[:-6]] = value
1224     return proxies
1225
1226 if os.name == 'mac':
1227     def getproxies():
1228         """Return a dictionary of scheme -> proxy server URL mappings.
1229
1230         By convention the mac uses Internet Config to store
1231         proxies.  An HTTP proxy, for instance, is stored under
1232         the HttpProxy key.
1233
1234         """
1235         try:
1236             import ic
1237         except ImportError:
1238             return {}
1239
1240         try:
1241             config = ic.IC()
1242         except ic.error:
1243             return {}
1244         proxies = {}
1245         # HTTP:
1246         if config.has_key('UseHTTPProxy') and config['UseHTTPProxy']:
1247             try:
1248                 value = config['HTTPProxyHost']
1249             except ic.error:
1250                 pass
1251             else:
1252                 proxies['http'] = 'http://%s' % value
1253         # FTP: XXXX To be done.
1254         # Gopher: XXXX To be done.
1255         return proxies
1256
1257     def proxy_bypass(x):
1258         return 0
1259
1260 elif os.name == 'nt':
1261     def getproxies_registry():
1262         """Return a dictionary of scheme -> proxy server URL mappings.
1263
1264         Win32 uses the registry to store proxies.
1265
1266         """
1267         proxies = {}
1268         try:
1269             import _winreg
1270         except ImportError:
1271             # Std module, so should be around - but you never know!
1272             return proxies
1273         try:
1274             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1275                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1276             proxyEnable = _winreg.QueryValueEx(internetSettings,
1277                                                'ProxyEnable')[0]
1278             if proxyEnable:
1279                 # Returned as Unicode but problems if not converted to ASCII
1280                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1281                                                        'ProxyServer')[0])
1282                 if '=' in proxyServer:
1283                     # Per-protocol settings
1284                     for p in proxyServer.split(';'):
1285                         protocol, address = p.split('=', 1)
1286                         # See if address has a type:// prefix
1287                         import re
1288                         if not re.match('^([^/:]+)://', address):
1289                             address = '%s://%s' % (protocol, address)
1290                         proxies[protocol] = address
1291                 else:
1292                     # Use one setting for all protocols
1293                     if proxyServer[:5] == 'http:':
1294                         proxies['http'] = proxyServer
1295                     else:
1296                         proxies['http'] = 'http://%s' % proxyServer
1297                         proxies['ftp'] = 'ftp://%s' % proxyServer
1298             internetSettings.Close()
1299         except (WindowsError, ValueError, TypeError):
1300             # Either registry key not found etc, or the value in an
1301             # unexpected format.
1302             # proxies already set up to be empty so nothing to do
1303             pass
1304         return proxies
1305
1306     def getproxies():
1307         """Return a dictionary of scheme -> proxy server URL mappings.
1308
1309         Returns settings gathered from the environment, if specified,
1310         or the registry.
1311
1312         """
1313         return getproxies_environment() or getproxies_registry()
1314
1315     def proxy_bypass(host):
1316         try:
1317             import _winreg
1318             import re
1319             import socket
1320         except ImportError:
1321             # Std modules, so should be around - but you never know!
1322             return 0
1323         try:
1324             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1325                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1326             proxyEnable = _winreg.QueryValueEx(internetSettings,
1327                                                'ProxyEnable')[0]
1328             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1329                                                      'ProxyOverride')[0])
1330             # ^^^^ Returned as Unicode but problems if not converted to ASCII
1331         except WindowsError:
1332             return 0
1333         if not proxyEnable or not proxyOverride:
1334             return 0
1335         # try to make a host list from name and IP address.
1336         host = [host]
1337         try:
1338             addr = socket.gethostbyname(host[0])
1339             if addr != host:
1340                 host.append(addr)
1341         except socket.error:
1342             pass
1343         # make a check value list from the registry entry: replace the
1344         # '<local>' string by the localhost entry and the corresponding
1345         # canonical entry.
1346         proxyOverride = proxyOverride.split(';')
1347         i = 0
1348         while i < len(proxyOverride):
1349             if proxyOverride[i] == '<local>':
1350                 proxyOverride[i:i+1] = ['localhost',
1351                                         '127.0.0.1',
1352                                         socket.gethostname(),
1353                                         socket.gethostbyname(
1354                                             socket.gethostname())]
1355             i += 1
1356         # print proxyOverride
1357         # now check if we match one of the registry values.
1358         for test in proxyOverride:
1359             test = test.replace(".", r"\.")     # mask dots
1360             test = test.replace("*", r".*")     # change glob sequence
1361             test = test.replace("?", r".")      # change glob char
1362             for val in host:
1363                 # print "%s <--> %s" %( test, val )
1364                 if re.match(test, val, re.I):
1365                     return 1
1366         return 0
1367
1368 else:
1369     # By default use environment variables
1370     getproxies = getproxies_environment
1371
1372     def proxy_bypass(host):
1373         return 0
1374
1375 # Test and time quote() and unquote()
1376 def test1():
1377     import time
1378     s = ''
1379     for i in range(256): s = s + chr(i)
1380     s = s*4
1381     t0 = time.time()
1382     qs = quote(s)
1383     uqs = unquote(qs)
1384     t1 = time.time()
1385     if uqs != s:
1386         print 'Wrong!'
1387     print `s`
1388     print `qs`
1389     print `uqs`
1390     print round(t1 - t0, 3), 'sec'
1391
1392
1393 def reporthook(blocknum, blocksize, totalsize):
1394     # Report during remote transfers
1395     print "Block number: %d, Block size: %d, Total size: %d" % (
1396         blocknum, blocksize, totalsize)
1397
1398 # Test program
1399 def test(args=[]):
1400     if not args:
1401         args = [
1402             '/etc/passwd',
1403             'file:/etc/passwd',
1404             'file://localhost/etc/passwd',
1405             'ftp://ftp.python.org/pub/python/README',
1406 ##          'gopher://gopher.micro.umn.edu/1/',
1407             'http://www.python.org/index.html',
1408             ]
1409         if hasattr(URLopener, "open_https"):
1410             args.append('https://synergy.as.cmu.edu/~geek/')
1411     try:
1412         for url in args:
1413             print '-'*10, url, '-'*10
1414             fn, h = urlretrieve(url, None, reporthook)
1415             print fn
1416             if h:
1417                 print '======'
1418                 for k in h.keys(): print k + ':', h[k]
1419                 print '======'
1420             fp = open(fn, 'rb')
1421             data = fp.read()
1422             del fp
1423             if '\r' in data:
1424                 table = string.maketrans("", "")
1425                 data = data.translate(table, "\r")
1426             print data
1427             fn, h = None, None
1428         print '-'*40
1429     finally:
1430         urlcleanup()
1431
1432 def main():
1433     import getopt, sys
1434     try:
1435         opts, args = getopt.getopt(sys.argv[1:], "th")
1436     except getopt.error, msg:
1437         print msg
1438         print "Use -h for help"
1439         return
1440     t = 0
1441     for o, a in opts:
1442         if o == '-t':
1443             t = t + 1
1444         if o == '-h':
1445             print "Usage: python urllib.py [-t] [url ...]"
1446             print "-t runs self-test;",
1447             print "otherwise, contents of urls are printed"
1448             return
1449     if t:
1450         if t > 1:
1451             test1()
1452         test(args)
1453     else:
1454         if not args:
1455             print "Use -h for help"
1456         for url in args:
1457             print urlopen(url).read(),
1458
1459 # Run test program when run as a script
1460 if __name__ == '__main__':
1461     main()