From 6fee09b9d2d649083063b78f50044f7f7188b098 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 11 Sep 2008 17:07:21 -0700
Subject: [PATCH] IGN:Use patched mechanize implementation that correctly
 closes connections

---
 src/calibre/__init__.py                       |    4 +-
 src/calibre/utils/mechanize/__init__.py       |  125 ++
 src/calibre/utils/mechanize/_auth.py          |  500 +++++
 src/calibre/utils/mechanize/_beautifulsoup.py | 1080 +++++++++++
 src/calibre/utils/mechanize/_clientcookie.py  | 1651 +++++++++++++++++
 src/calibre/utils/mechanize/_debug.py         |   28 +
 src/calibre/utils/mechanize/_gzip.py          |  103 +
 src/calibre/utils/mechanize/_headersutil.py   |  226 +++
 src/calibre/utils/mechanize/_html.py          |  607 ++++++
 src/calibre/utils/mechanize/_http.py          |  729 ++++++++
 src/calibre/utils/mechanize/_lwpcookiejar.py  |  185 ++
 src/calibre/utils/mechanize/_mechanize.py     |  656 +++++++
 .../utils/mechanize/_mozillacookiejar.py      |  159 ++
 src/calibre/utils/mechanize/_msiecookiejar.py |  387 ++++
 src/calibre/utils/mechanize/_opener.py        |  421 +++++
 src/calibre/utils/mechanize/_pullparser.py    |  334 ++++
 src/calibre/utils/mechanize/_request.py       |   86 +
 src/calibre/utils/mechanize/_response.py      |  515 +++++
 src/calibre/utils/mechanize/_rfc3986.py       |  240 +++
 src/calibre/utils/mechanize/_seek.py          |   16 +
 src/calibre/utils/mechanize/_upgrade.py       |   40 +
 src/calibre/utils/mechanize/_urllib2.py       |   62 +
 src/calibre/utils/mechanize/_useragent.py     |  348 ++++
 src/calibre/utils/mechanize/_util.py          |  279 +++
 24 files changed, 8779 insertions(+), 2 deletions(-)
 create mode 100644 src/calibre/utils/mechanize/__init__.py
 create mode 100644 src/calibre/utils/mechanize/_auth.py
 create mode 100644 src/calibre/utils/mechanize/_beautifulsoup.py
 create mode 100644 src/calibre/utils/mechanize/_clientcookie.py
 create mode 100644 src/calibre/utils/mechanize/_debug.py
 create mode 100644 src/calibre/utils/mechanize/_gzip.py
 create mode 100644 src/calibre/utils/mechanize/_headersutil.py
 create mode 100644 src/calibre/utils/mechanize/_html.py
 create mode 100644 src/calibre/utils/mechanize/_http.py
 create mode 100644 src/calibre/utils/mechanize/_lwpcookiejar.py
 create mode 100644 src/calibre/utils/mechanize/_mechanize.py
 create mode 100644 src/calibre/utils/mechanize/_mozillacookiejar.py
 create mode 100644 src/calibre/utils/mechanize/_msiecookiejar.py
 create mode 100644 src/calibre/utils/mechanize/_opener.py
 create mode 100644 src/calibre/utils/mechanize/_pullparser.py
 create mode 100644 src/calibre/utils/mechanize/_request.py
 create mode 100644 src/calibre/utils/mechanize/_response.py
 create mode 100644 src/calibre/utils/mechanize/_rfc3986.py
 create mode 100644 src/calibre/utils/mechanize/_seek.py
 create mode 100644 src/calibre/utils/mechanize/_upgrade.py
 create mode 100644 src/calibre/utils/mechanize/_urllib2.py
 create mode 100644 src/calibre/utils/mechanize/_useragent.py
 create mode 100644 src/calibre/utils/mechanize/_util.py

diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index 536ac5002a..c69518c0c5 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -2,7 +2,7 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import sys, os, re, logging, time, subprocess, mechanize, atexit
+import sys, os, re, logging, time, subprocess, atexit
 from htmlentitydefs import name2codepoint
 from math import floor
 from logging import Formatter
@@ -14,7 +14,7 @@ from calibre.constants import iswindows, isosx, islinux, isfrozen, \
                               terminal_controller, preferred_encoding, \
                               __appname__, __version__, __author__, \
                               win32event, win32api, winerror, fcntl
-
+from calibre.utils import mechanize
 
 def unicode_path(path, abs=False):
     if not isinstance(path, unicode):
diff --git a/src/calibre/utils/mechanize/__init__.py b/src/calibre/utils/mechanize/__init__.py
new file mode 100644
index 0000000000..8bea889f30
--- /dev/null
+++ b/src/calibre/utils/mechanize/__init__.py
@@ -0,0 +1,125 @@
+__all__ = [
+    'AbstractBasicAuthHandler',
+    'AbstractDigestAuthHandler',
+    'BaseHandler',
+    'Browser',
+    'BrowserStateError',
+    'CacheFTPHandler',
+    'ContentTooShortError',
+    'Cookie',
+    'CookieJar',
+    'CookiePolicy',
+    'DefaultCookiePolicy',
+    'DefaultFactory',
+    'FTPHandler',
+    'Factory',
+    'FileCookieJar',
+    'FileHandler',
+    'FormNotFoundError',
+    'FormsFactory',
+    'GopherError',
+    'GopherHandler',
+    'HTTPBasicAuthHandler',
+    'HTTPCookieProcessor',
+    'HTTPDefaultErrorHandler',
+    'HTTPDigestAuthHandler',
+    'HTTPEquivProcessor',
+    'HTTPError',
+    'HTTPErrorProcessor',
+    'HTTPHandler',
+    'HTTPPasswordMgr',
+    'HTTPPasswordMgrWithDefaultRealm',
+    'HTTPProxyPasswordMgr',
+    'HTTPRedirectDebugProcessor',
+    'HTTPRedirectHandler',
+    'HTTPRefererProcessor',
+    'HTTPRefreshProcessor',
+    'HTTPRequestUpgradeProcessor',
+    'HTTPResponseDebugProcessor',
+    'HTTPRobotRulesProcessor',
+    'HTTPSClientCertMgr',
+    'HTTPSHandler',
+    'HeadParser',
+    'History',
+    'LWPCookieJar',
+    'Link',
+    'LinkNotFoundError',
+    'LinksFactory',
+    'LoadError',
+    'MSIECookieJar',
+    'MozillaCookieJar',
+    'OpenerDirector',
+    'OpenerFactory',
+    'ParseError',
+    'ProxyBasicAuthHandler',
+    'ProxyDigestAuthHandler',
+    'ProxyHandler',
+    'Request',
+    'ResponseUpgradeProcessor',
+    'RobotExclusionError',
+    'RobustFactory',
+    'RobustFormsFactory',
+    'RobustLinksFactory',
+    'RobustTitleFactory',
+    'SeekableProcessor',
+    'SeekableResponseOpener',
+    'TitleFactory',
+    'URLError',
+    'USE_BARE_EXCEPT',
+    'UnknownHandler',
+    'UserAgent',
+    'UserAgentBase',
+    'XHTMLCompatibleHeadParser',
+    '__version__',
+    'build_opener',
+    'install_opener',
+    'lwp_cookie_str',
+    'make_response',
+    'request_host',
+    'response_seek_wrapper',  # XXX deprecate in public interface?
+    'seek_wrapped_response'   # XXX should probably use this internally in place of response_seek_wrapper()
+    'str2time',
+    'urlopen',
+    'urlretrieve']
+
+from _mechanize import __version__
+
+# high-level stateful browser-style interface
+from _mechanize import \
+     Browser, History, \
+     BrowserStateError, LinkNotFoundError, FormNotFoundError
+
+# configurable URL-opener interface
+from _useragent import UserAgentBase, UserAgent
+from _html import \
+     ParseError, \
+     Link, \
+     Factory, DefaultFactory, RobustFactory, \
+     FormsFactory, LinksFactory, TitleFactory, \
+     RobustFormsFactory, RobustLinksFactory, RobustTitleFactory
+
+# urllib2 work-alike interface (part from mechanize, part from urllib2)
+# This is a superset of the urllib2 interface.
+from _urllib2 import *
+
+# misc
+from _opener import ContentTooShortError, OpenerFactory, urlretrieve
+from _util import http2time as str2time
+from _response import \
+     response_seek_wrapper, seek_wrapped_response, make_response
+from _http import HeadParser
+try:
+    from _http import XHTMLCompatibleHeadParser
+except ImportError:
+    pass
+
+# cookies
+from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \
+     CookieJar, FileCookieJar, LoadError, request_host
+from _lwpcookiejar import LWPCookieJar, lwp_cookie_str
+from _mozillacookiejar import MozillaCookieJar
+from _msiecookiejar import MSIECookieJar
+
+# If you hate the idea of turning bugs into warnings, do:
+# import mechanize; mechanize.USE_BARE_EXCEPT = False
+USE_BARE_EXCEPT = True
diff --git a/src/calibre/utils/mechanize/_auth.py b/src/calibre/utils/mechanize/_auth.py
new file mode 100644
index 0000000000..9bb5873019
--- /dev/null
+++ b/src/calibre/utils/mechanize/_auth.py
@@ -0,0 +1,500 @@
+"""HTTP Authentication and Proxy support.
+
+All but HTTPProxyPasswordMgr come from Python 2.5.
+
+
+Copyright 2006 John J. Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import re, base64, urlparse, posixpath, md5, sha, sys, copy
+
+from urllib2 import BaseHandler
+from urllib import getproxies, unquote, splittype, splituser, splitpasswd, \
+     splitport
+
+
+def _parse_proxy(proxy):
+    """Return (scheme, user, password, host/port) given a URL or an authority.
+
+    If a URL is supplied, it must have an authority (host:port) component.
+    According to RFC 3986, having an authority component means the URL must
+    have two slashes after the scheme:
+
+    >>> _parse_proxy('file:/ftp.example.com/')
+    Traceback (most recent call last):
+    ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
+
+    The first three items of the returned tuple may be None.
+
+    Examples of authority parsing:
+
+    >>> _parse_proxy('proxy.example.com')
+    (None, None, None, 'proxy.example.com')
+    >>> _parse_proxy('proxy.example.com:3128')
+    (None, None, None, 'proxy.example.com:3128')
+
+    The authority component may optionally include userinfo (assumed to be
+    username:password):
+
+    >>> _parse_proxy('joe:password@proxy.example.com')
+    (None, 'joe', 'password', 'proxy.example.com')
+    >>> _parse_proxy('joe:password@proxy.example.com:3128')
+    (None, 'joe', 'password', 'proxy.example.com:3128')
+
+    Same examples, but with URLs instead:
+
+    >>> _parse_proxy('http://proxy.example.com/')
+    ('http', None, None, 'proxy.example.com')
+    >>> _parse_proxy('http://proxy.example.com:3128/')
+    ('http', None, None, 'proxy.example.com:3128')
+    >>> _parse_proxy('http://joe:password@proxy.example.com/')
+    ('http', 'joe', 'password', 'proxy.example.com')
+    >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
+    ('http', 'joe', 'password', 'proxy.example.com:3128')
+
+    Everything after the authority is ignored:
+
+    >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
+    ('ftp', 'joe', 'password', 'proxy.example.com')
+
+    Test for no trailing '/' case:
+
+    >>> _parse_proxy('http://joe:password@proxy.example.com')
+    ('http', 'joe', 'password', 'proxy.example.com')
+
+    """
+    scheme, r_scheme = splittype(proxy)
+    if not r_scheme.startswith("/"):
+        # authority
+        scheme = None
+        authority = proxy
+    else:
+        # URL
+        if not r_scheme.startswith("//"):
+            raise ValueError("proxy URL with no authority: %r" % proxy)
+        # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
+        # and 3.3.), path is empty or starts with '/'
+        end = r_scheme.find("/", 2)
+        if end == -1:
+            end = None
+        authority = r_scheme[2:end]
+    userinfo, hostport = splituser(authority)
+    if userinfo is not None:
+        user, password = splitpasswd(userinfo)
+    else:
+        user = password = None
+    return scheme, user, password, hostport
+
+class ProxyHandler(BaseHandler):
+    # Proxies must be in front
+    handler_order = 100
+
+    def __init__(self, proxies=None):
+        if proxies is None:
+            proxies = getproxies()
+        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
+        self.proxies = proxies
+        for type, url in proxies.items():
+            setattr(self, '%s_open' % type,
+                    lambda r, proxy=url, type=type, meth=self.proxy_open: \
+                    meth(r, proxy, type))
+
+    def proxy_open(self, req, proxy, type):
+        orig_type = req.get_type()
+        proxy_type, user, password, hostport = _parse_proxy(proxy)
+        if proxy_type is None:
+            proxy_type = orig_type
+        if user and password:
+            user_pass = '%s:%s' % (unquote(user), unquote(password))
+            creds = base64.encodestring(user_pass).strip()
+            req.add_header('Proxy-authorization', 'Basic ' + creds)
+        hostport = unquote(hostport)
+        req.set_proxy(hostport, proxy_type)
+        if orig_type == proxy_type:
+            # let other handlers take care of it
+            return None
+        else:
+            # need to start over, because the other handlers don't
+            # grok the proxy's URL type
+            # e.g. if we have a constructor arg proxies like so:
+            # {'http': 'ftp://proxy.example.com'}, we may end up turning
+            # a request for http://acme.example.com/a into one for
+            # ftp://proxy.example.com/a
+            return self.parent.open(req)
+
+class HTTPPasswordMgr:
+
+    def __init__(self):
+        self.passwd = {}
+
+    def add_password(self, realm, uri, user, passwd):
+        # uri could be a single URI or a sequence
+        if isinstance(uri, basestring):
+            uri = [uri]
+        if not realm in self.passwd:
+            self.passwd[realm] = {}
+        for default_port in True, False:
+            reduced_uri = tuple(
+                [self.reduce_uri(u, default_port) for u in uri])
+            self.passwd[realm][reduced_uri] = (user, passwd)
+
+    def find_user_password(self, realm, authuri):
+        domains = self.passwd.get(realm, {})
+        for default_port in True, False:
+            reduced_authuri = self.reduce_uri(authuri, default_port)
+            for uris, authinfo in domains.iteritems():
+                for uri in uris:
+                    if self.is_suburi(uri, reduced_authuri):
+                        return authinfo
+        return None, None
+
+    def reduce_uri(self, uri, default_port=True):
+        """Accept authority or URI and extract only the authority and path."""
+        # note HTTP URLs do not have a userinfo component
+        parts = urlparse.urlsplit(uri)
+        if parts[1]:
+            # URI
+            scheme = parts[0]
+            authority = parts[1]
+            path = parts[2] or '/'
+        else:
+            # host or host:port
+            scheme = None
+            authority = uri
+            path = '/'
+        host, port = splitport(authority)
+        if default_port and port is None and scheme is not None:
+            dport = {"http": 80,
+                     "https": 443,
+                     }.get(scheme)
+            if dport is not None:
+                authority = "%s:%d" % (host, dport)
+        return authority, path
+
+    def is_suburi(self, base, test):
+        """Check if test is below base in a URI tree
+
+        Both args must be URIs in reduced form.
+        """
+        if base == test:
+            return True
+        if base[0] != test[0]:
+            return False
+        common = posixpath.commonprefix((base[1], test[1]))
+        if len(common) == len(base[1]):
+            return True
+        return False
+
+
+class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
+
+    def find_user_password(self, realm, authuri):
+        user, password = HTTPPasswordMgr.find_user_password(self, realm,
+                                                            authuri)
+        if user is not None:
+            return user, password
+        return HTTPPasswordMgr.find_user_password(self, None, authuri)
+
+
+class AbstractBasicAuthHandler:
+
+    rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
+
+    # XXX there can actually be multiple auth-schemes in a
+    # www-authenticate header.  should probably be a lot more careful
+    # in parsing them to extract multiple alternatives
+
+    def __init__(self, password_mgr=None):
+        if password_mgr is None:
+            password_mgr = HTTPPasswordMgr()
+        self.passwd = password_mgr
+        self.add_password = self.passwd.add_password
+
+    def http_error_auth_reqed(self, authreq, host, req, headers):
+        # host may be an authority (without userinfo) or a URL with an
+        # authority
+        # XXX could be multiple headers
+        authreq = headers.get(authreq, None)
+        if authreq:
+            mo = AbstractBasicAuthHandler.rx.search(authreq)
+            if mo:
+                scheme, realm = mo.groups()
+                if scheme.lower() == 'basic':
+                    return self.retry_http_basic_auth(host, req, realm)
+
+    def retry_http_basic_auth(self, host, req, realm):
+        user, pw = self.passwd.find_user_password(realm, host)
+        if pw is not None:
+            raw = "%s:%s" % (user, pw)
+            auth = 'Basic %s' % base64.encodestring(raw).strip()
+            if req.headers.get(self.auth_header, None) == auth:
+                return None
+            newreq = copy.copy(req)
+            newreq.add_header(self.auth_header, auth)
+            newreq.visit = False
+            return self.parent.open(newreq)
+        else:
+            return None
+
+
+class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+    auth_header = 'Authorization'
+
+    def http_error_401(self, req, fp, code, msg, headers):
+        url = req.get_full_url()
+        return self.http_error_auth_reqed('www-authenticate',
+                                          url, req, headers)
+
+
+class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+    auth_header = 'Proxy-authorization'
+
+    def http_error_407(self, req, fp, code, msg, headers):
+        # http_error_auth_reqed requires that there is no userinfo component in
+        # authority.  Assume there isn't one, since urllib2 does not (and
+        # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
+        # userinfo.
+        authority = req.get_host()
+        return self.http_error_auth_reqed('proxy-authenticate',
+                                          authority, req, headers)
+
+
+def randombytes(n):
+    """Return n random bytes."""
+    # Use /dev/urandom if it is available.  Fall back to random module
+    # if not.  It might be worthwhile to extend this function to use
+    # other platform-specific mechanisms for getting random bytes.
+    if os.path.exists("/dev/urandom"):
+        f = open("/dev/urandom")
+        s = f.read(n)
+        f.close()
+        return s
+    else:
+        L = [chr(random.randrange(0, 256)) for i in range(n)]
+        return "".join(L)
+
+class AbstractDigestAuthHandler:
+    # Digest authentication is specified in RFC 2617.
+
+    # XXX The client does not inspect the Authentication-Info header
+    # in a successful response.
+
+    # XXX It should be possible to test this implementation against
+    # a mock server that just generates a static set of challenges.
+
+    # XXX qop="auth-int" supports is shaky
+
+    def __init__(self, passwd=None):
+        if passwd is None:
+            passwd = HTTPPasswordMgr()
+        self.passwd = passwd
+        self.add_password = self.passwd.add_password
+        self.retried = 0
+        self.nonce_count = 0
+
+    def reset_retry_count(self):
+        self.retried = 0
+
+    def http_error_auth_reqed(self, auth_header, host, req, headers):
+        authreq = headers.get(auth_header, None)
+        if self.retried > 5:
+            # Don't fail endlessly - if we failed once, we'll probably
+            # fail a second time. Hm. Unless the Password Manager is
+            # prompting for the information. Crap. This isn't great
+            # but it's better than the current 'repeat until recursion
+            # depth exceeded' approach <wink>
+            raise HTTPError(req.get_full_url(), 401, "digest auth failed",
+                            headers, None)
+        else:
+            self.retried += 1
+        if authreq:
+            scheme = authreq.split()[0]
+            if scheme.lower() == 'digest':
+                return self.retry_http_digest_auth(req, authreq)
+
+    def retry_http_digest_auth(self, req, auth):
+        token, challenge = auth.split(' ', 1)
+        chal = parse_keqv_list(parse_http_list(challenge))
+        auth = self.get_authorization(req, chal)
+        if auth:
+            auth_val = 'Digest %s' % auth
+            if req.headers.get(self.auth_header, None) == auth_val:
+                return None
+            newreq = copy.copy(req)
+            newreq.add_unredirected_header(self.auth_header, auth_val)
+            newreq.visit = False
+            return self.parent.open(newreq)
+
+    def get_cnonce(self, nonce):
+        # The cnonce-value is an opaque
+        # quoted string value provided by the client and used by both client
+        # and server to avoid chosen plaintext attacks, to provide mutual
+        # authentication, and to provide some message integrity protection.
+        # This isn't a fabulous effort, but it's probably Good Enough.
+        dig = sha.new("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
+                                       randombytes(8))).hexdigest()
+        return dig[:16]
+
+    def get_authorization(self, req, chal):
+        try:
+            realm = chal['realm']
+            nonce = chal['nonce']
+            qop = chal.get('qop')
+            algorithm = chal.get('algorithm', 'MD5')
+            # mod_digest doesn't send an opaque, even though it isn't
+            # supposed to be optional
+            opaque = chal.get('opaque', None)
+        except KeyError:
+            return None
+
+        H, KD = self.get_algorithm_impls(algorithm)
+        if H is None:
+            return None
+
+        user, pw = self.passwd.find_user_password(realm, req.get_full_url())
+        if user is None:
+            return None
+
+        # XXX not implemented yet
+        if req.has_data():
+            entdig = self.get_entity_digest(req.get_data(), chal)
+        else:
+            entdig = None
+
+        A1 = "%s:%s:%s" % (user, realm, pw)
+        A2 = "%s:%s" % (req.get_method(),
+                        # XXX selector: what about proxies and full urls
+                        req.get_selector())
+        if qop == 'auth':
+            self.nonce_count += 1
+            ncvalue = '%08x' % self.nonce_count
+            cnonce = self.get_cnonce(nonce)
+            noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
+            respdig = KD(H(A1), noncebit)
+        elif qop is None:
+            respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
+        else:
+            # XXX handle auth-int.
+            pass
+
+        # XXX should the partial digests be encoded too?
+
+        base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
+               'response="%s"' % (user, realm, nonce, req.get_selector(),
+                                  respdig)
+        if opaque:
+            base += ', opaque="%s"' % opaque
+        if entdig:
+            base += ', digest="%s"' % entdig
+        base += ', algorithm="%s"' % algorithm
+        if qop:
+            base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
+        return base
+
+    def get_algorithm_impls(self, algorithm):
+        # lambdas assume digest modules are imported at the top level
+        if algorithm == 'MD5':
+            H = lambda x: md5.new(x).hexdigest()
+        elif algorithm == 'SHA':
+            H = lambda x: sha.new(x).hexdigest()
+        # XXX MD5-sess
+        KD = lambda s, d: H("%s:%s" % (s, d))
+        return H, KD
+
+    def get_entity_digest(self, data, chal):
+        # XXX not implemented yet
+        return None
+
+
+class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+    """An authentication protocol defined by RFC 2069
+
+    Digest authentication improves on basic authentication because it
+    does not transmit passwords in the clear.
+    """
+
+    auth_header = 'Authorization'
+    handler_order = 490
+
+    def http_error_401(self, req, fp, code, msg, headers):
+        host = urlparse.urlparse(req.get_full_url())[1]
+        retry = self.http_error_auth_reqed('www-authenticate',
+                                           host, req, headers)
+        self.reset_retry_count()
+        return retry
+
+
+class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+
+    auth_header = 'Proxy-Authorization'
+    handler_order = 490
+
+    def http_error_407(self, req, fp, code, msg, headers):
+        host = req.get_host()
+        retry = self.http_error_auth_reqed('proxy-authenticate',
+                                           host, req, headers)
+        self.reset_retry_count()
+        return retry
+
+
+# XXX ugly implementation, should probably not bother deriving
+class HTTPProxyPasswordMgr(HTTPPasswordMgr):
+    # has default realm and host/port
+    def add_password(self, realm, uri, user, passwd):
+        # uri could be a single URI or a sequence
+        if uri is None or isinstance(uri, basestring):
+            uris = [uri]
+        else:
+            uris = uri
+        passwd_by_domain = self.passwd.setdefault(realm, {})
+        for uri in uris:
+            for default_port in True, False:
+                reduced_uri = self.reduce_uri(uri, default_port)
+                passwd_by_domain[reduced_uri] = (user, passwd)
+
+    def find_user_password(self, realm, authuri):
+        attempts = [(realm, authuri), (None, authuri)]
+        # bleh, want default realm to take precedence over default
+        # URI/authority, hence this outer loop
+        for default_uri in False, True:
+            for realm, authuri in attempts:
+                authinfo_by_domain = self.passwd.get(realm, {})
+                for default_port in True, False:
+                    reduced_authuri = self.reduce_uri(authuri, default_port)
+                    for uri, authinfo in authinfo_by_domain.iteritems():
+                        if uri is None and not default_uri:
+                            continue
+                        if self.is_suburi(uri, reduced_authuri):
+                            return authinfo
+                    user, password = None, None
+
+                    if user is not None:
+                        break
+        return user, password
+
+    def reduce_uri(self, uri, default_port=True):
+        if uri is None:
+            return None
+        return HTTPPasswordMgr.reduce_uri(self, uri, default_port)
+
+    def is_suburi(self, base, test):
+        if base is None:
+            # default to the proxy's host/port
+            hostport, path = test
+            base = (hostport, "/")
+        return HTTPPasswordMgr.is_suburi(self, base, test)
+
+
+class HTTPSClientCertMgr(HTTPPasswordMgr):
+    # implementation inheritance: this is not a proper subclass
+    def add_key_cert(self, uri, key_file, cert_file):
+        self.add_password(None, uri, key_file, cert_file)
+    def find_key_cert(self, authuri):
+        return HTTPPasswordMgr.find_user_password(self, None, authuri)
diff --git a/src/calibre/utils/mechanize/_beautifulsoup.py b/src/calibre/utils/mechanize/_beautifulsoup.py
new file mode 100644
index 0000000000..2541dcc63a
--- /dev/null
+++ b/src/calibre/utils/mechanize/_beautifulsoup.py
@@ -0,0 +1,1080 @@
+"""Beautiful Soup
+Elixir and Tonic
+"The Screen-Scraper's Friend"
+v2.1.1
+http://www.crummy.com/software/BeautifulSoup/
+
+Beautiful Soup parses arbitrarily invalid XML- or HTML-like substance
+into a tree representation. It provides methods and Pythonic idioms
+that make it easy to search and modify the tree.
+
+A well-formed XML/HTML document will yield a well-formed data
+structure. An ill-formed XML/HTML document will yield a
+correspondingly ill-formed data structure. If your document is only
+locally well-formed, you can use this library to find and process the
+well-formed part of it. The BeautifulSoup class has heuristics for
+obtaining a sensible parse tree in the face of common HTML errors.
+
+Beautiful Soup has no external dependencies. It works with Python 2.2
+and up.
+
+Beautiful Soup defines classes for four different parsing strategies:
+
+ * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
+   language that kind of looks like XML.
+
+ * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
+   or invalid.
+
+ * ICantBelieveItsBeautifulSoup, for parsing valid but bizarre HTML
+   that trips up BeautifulSoup.
+
+ * BeautifulSOAP, for making it easier to parse XML documents that use
+   lots of subelements containing a single string, where you'd prefer
+   they put that string into an attribute (such as SOAP messages).
+
+You can subclass BeautifulStoneSoup or BeautifulSoup to create a
+parsing strategy specific to an XML schema or a particular bizarre
+HTML document. Typically your subclass would just override
+SELF_CLOSING_TAGS and/or NESTABLE_TAGS.
+"""
+from __future__ import generators
+
+__author__ = "Leonard Richardson (leonardr@segfault.org)"
+__version__ = "2.1.1"
+__date__ = "$Date: 2004/10/18 00:14:20 $"
+__copyright__ = "Copyright (c) 2004-2005 Leonard Richardson"
+__license__ = "PSF"
+
+from sgmllib import SGMLParser, SGMLParseError
+import types
+import re
+import sgmllib
+
+#This code makes Beautiful Soup able to parse XML with namespaces
+sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
+
+class NullType(object):
+
+    """Similar to NoneType with a corresponding singleton instance
+    'Null' that, unlike None, accepts any message and returns itself.
+
+    Examples:
+    >>> Null("send", "a", "message")("and one more",
+    ...      "and what you get still") is Null
+    True
+    """
+
+    def __new__(cls):                    return Null
+    def __call__(self, *args, **kwargs): return Null
+##    def __getstate__(self, *args):       return Null
+    def __getattr__(self, attr):         return Null
+    def __getitem__(self, item):         return Null
+    def __setattr__(self, attr, value):  pass
+    def __setitem__(self, item, value):  pass
+    def __len__(self):                   return 0
+    # FIXME: is this a python bug? otherwise ``for x in Null: pass``
+    #        never terminates...
+    def __iter__(self):                  return iter([])
+    def __contains__(self, item):        return False
+    def __repr__(self):                  return "Null"
+Null = object.__new__(NullType)
+
+class PageElement:
+    """Contains the navigational information for some part of the page
+    (either a tag or a piece of text)"""
+
+    def setup(self, parent=Null, previous=Null):
+        """Sets up the initial relations between this element and
+        other elements."""
+        self.parent = parent
+        self.previous = previous
+        self.next = Null
+        self.previousSibling = Null
+        self.nextSibling = Null
+        if self.parent and self.parent.contents:
+            self.previousSibling = self.parent.contents[-1]
+            self.previousSibling.nextSibling = self
+
+    def findNext(self, name=None, attrs={}, text=None):
+        """Returns the first item that matches the given criteria and
+        appears after this Tag in the document."""
+        return self._first(self.fetchNext, name, attrs, text)
+    firstNext = findNext
+
+    def fetchNext(self, name=None, attrs={}, text=None, limit=None):
+        """Returns all items that match the given criteria and appear
+        before after Tag in the document."""
+        return self._fetch(name, attrs, text, limit, self.nextGenerator)
+
+    def findNextSibling(self, name=None, attrs={}, text=None):
+        """Returns the closest sibling to this Tag that matches the
+        given criteria and appears after this Tag in the document."""
+        return self._first(self.fetchNextSiblings, name, attrs, text)
+    firstNextSibling = findNextSibling
+
+    def fetchNextSiblings(self, name=None, attrs={}, text=None, limit=None):
+        """Returns the siblings of this Tag that match the given
+        criteria and appear after this Tag in the document."""
+        return self._fetch(name, attrs, text, limit, self.nextSiblingGenerator)
+
+    def findPrevious(self, name=None, attrs={}, text=None):
+        """Returns the first item that matches the given criteria and
+        appears before this Tag in the document."""
+        return self._first(self.fetchPrevious, name, attrs, text)
+
+    def fetchPrevious(self, name=None, attrs={}, text=None, limit=None):
+        """Returns all items that match the given criteria and appear
+        before this Tag in the document."""
+        return self._fetch(name, attrs, text, limit, self.previousGenerator)
+    firstPrevious = findPrevious
+
+    def findPreviousSibling(self, name=None, attrs={}, text=None):
+        """Returns the closest sibling to this Tag that matches the
+        given criteria and appears before this Tag in the document."""
+        return self._first(self.fetchPreviousSiblings, name, attrs, text)
+    firstPreviousSibling = findPreviousSibling
+
+    def fetchPreviousSiblings(self, name=None, attrs={}, text=None,
+                              limit=None):
+        """Returns the siblings of this Tag that match the given
+        criteria and appear before this Tag in the document."""
+        return self._fetch(name, attrs, text, limit,
+                           self.previousSiblingGenerator)
+
+    def findParent(self, name=None, attrs={}):
+        """Returns the closest parent of this Tag that matches the given
+        criteria."""
+        r = Null
+        l = self.fetchParents(name, attrs, 1)
+        if l:
+            r = l[0]
+        return r
+    firstParent = findParent
+
+    def fetchParents(self, name=None, attrs={}, limit=None):
+        """Returns the parents of this Tag that match the given
+        criteria."""
+        return self._fetch(name, attrs, None, limit, self.parentGenerator)
+
+    #These methods do the real heavy lifting.
+
+    def _first(self, method, name, attrs, text):
+        r = Null
+        l = method(name, attrs, text, 1)
+        if l:
+            r = l[0]
+        return r
+    
+    def _fetch(self, name, attrs, text, limit, generator):
+        "Iterates over a generator looking for things that match."
+        if not hasattr(attrs, 'items'):
+            attrs = {'class' : attrs}
+
+        results = []
+        g = generator()
+        while True:
+            try:
+                i = g.next()
+            except StopIteration:
+                break
+            found = None
+            if isinstance(i, Tag):
+                if not text:
+                    if not name or self._matches(i, name):
+                        match = True
+                        for attr, matchAgainst in attrs.items():
+                            check = i.get(attr)
+                            if not self._matches(check, matchAgainst):
+                                match = False
+                                break
+                        if match:
+                            found = i
+            elif text:
+                if self._matches(i, text):
+                    found = i                    
+            if found:
+                results.append(found)
+                if limit and len(results) >= limit:
+                    break
+        return results
+
+    #Generators that can be used to navigate starting from both
+    #NavigableTexts and Tags.                
+    def nextGenerator(self):
+        i = self
+        while i:
+            i = i.next
+            yield i
+
+    def nextSiblingGenerator(self):
+        i = self
+        while i:
+            i = i.nextSibling
+            yield i
+
+    def previousGenerator(self):
+        i = self
+        while i:
+            i = i.previous
+            yield i
+
+    def previousSiblingGenerator(self):
+        i = self
+        while i:
+            i = i.previousSibling
+            yield i
+
+    def parentGenerator(self):
+        i = self
+        while i:
+            i = i.parent
+            yield i
+
+    def _matches(self, chunk, howToMatch):
+        #print 'looking for %s in %s' % (howToMatch, chunk)
+        #
+        # If given a list of items, return true if the list contains a
+        # text element that matches.
+        if isList(chunk) and not isinstance(chunk, Tag):
+            for tag in chunk:
+                if isinstance(tag, NavigableText) and self._matches(tag, howToMatch):
+                    return True
+            return False
+        if callable(howToMatch):
+            return howToMatch(chunk)
+        if isinstance(chunk, Tag):
+            #Custom match methods take the tag as an argument, but all other
+            #ways of matching match the tag name as a string
+            chunk = chunk.name
+        #Now we know that chunk is a string
+        if not isinstance(chunk, basestring):
+            chunk = str(chunk)
+        if hasattr(howToMatch, 'match'):
+            # It's a regexp object.
+            return howToMatch.search(chunk)
+        if isList(howToMatch):
+            return chunk in howToMatch
+        if hasattr(howToMatch, 'items'):
+            return howToMatch.has_key(chunk)
+        #It's just a string
+        return str(howToMatch) == chunk
+
+class NavigableText(PageElement):
+
+    def __getattr__(self, attr):
+        "For backwards compatibility, text.string gives you text"
+        if attr == 'string':
+            return self
+        else:
+            raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
+        
+class NavigableString(str, NavigableText):
+    pass
+
+class NavigableUnicodeString(unicode, NavigableText):
+    pass
+
+class Tag(PageElement):
+
+    """Represents a found HTML tag with its attributes and contents."""
+
+    def __init__(self, name, attrs=None, parent=Null, previous=Null):
+        "Basic constructor."
+        self.name = name
+        if attrs == None:
+            attrs = []
+        self.attrs = attrs
+        self.contents = []
+        self.setup(parent, previous)
+        self.hidden = False
+
+    def get(self, key, default=None):
+        """Returns the value of the 'key' attribute for the tag, or
+        the value given for 'default' if it doesn't have that
+        attribute."""
+        return self._getAttrMap().get(key, default)    
+
+    def __getitem__(self, key):
+        """tag[key] returns the value of the 'key' attribute for the tag,
+        and throws an exception if it's not there."""
+        return self._getAttrMap()[key]
+
+    def __iter__(self):
+        "Iterating over a tag iterates over its contents."
+        return iter(self.contents)
+
+    def __len__(self):
+        "The length of a tag is the length of its list of contents."
+        return len(self.contents)
+
+    def __contains__(self, x):
+        return x in self.contents
+
+    def __nonzero__(self):
+        "A tag is non-None even if it has no contents."
+        return True
+
+    def __setitem__(self, key, value):        
+        """Setting tag[key] sets the value of the 'key' attribute for the
+        tag."""
+        self._getAttrMap()
+        self.attrMap[key] = value
+        found = False
+        for i in range(0, len(self.attrs)):
+            if self.attrs[i][0] == key:
+                self.attrs[i] = (key, value)
+                found = True
+        if not found:
+            self.attrs.append((key, value))
+        self._getAttrMap()[key] = value
+
+    def __delitem__(self, key):
+        "Deleting tag[key] deletes all 'key' attributes for the tag."
+        for item in self.attrs:
+            if item[0] == key:
+                self.attrs.remove(item)
+                #We don't break because bad HTML can define the same
+                #attribute multiple times.
+            self._getAttrMap()
+            if self.attrMap.has_key(key):
+                del self.attrMap[key]
+
+    def __call__(self, *args, **kwargs):
+        """Calling a tag like a function is the same as calling its
+        fetch() method. Eg. tag('a') returns a list of all the A tags
+        found within this tag."""
+        return apply(self.fetch, args, kwargs)
+
+    def __getattr__(self, tag):
+        if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
+            return self.first(tag[:-3])
+        elif tag.find('__') != 0:
+            return self.first(tag)
+
+    def __eq__(self, other):
+        """Returns true iff this tag has the same name, the same attributes,
+        and the same contents (recursively) as the given tag.
+
+        NOTE: right now this will return false if two tags have the
+        same attributes in a different order. Should this be fixed?"""
+        if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
+            return False
+        for i in range(0, len(self.contents)):
+            if self.contents[i] != other.contents[i]:
+                return False
+        return True
+
+    def __ne__(self, other):
+        """Returns true iff this tag is not identical to the other tag,
+        as defined in __eq__."""
+        return not self == other
+
+    def __repr__(self):
+        """Renders this tag as a string."""
+        return str(self)
+
+    def __unicode__(self):
+        return self.__str__(1)
+
+    def __str__(self, needUnicode=None, showStructureIndent=None):
+        """Returns a string or Unicode representation of this tag and
+        its contents.
+
+        NOTE: since Python's HTML parser consumes whitespace, this
+        method is not certain to reproduce the whitespace present in
+        the original string."""
+        
+        attrs = []
+        if self.attrs:
+            for key, val in self.attrs:
+                attrs.append('%s="%s"' % (key, val))
+        close = ''
+        closeTag = ''
+        if self.isSelfClosing():
+            close = ' /'
+        else:
+            closeTag = '</%s>' % self.name
+        indentIncrement = None        
+        if showStructureIndent != None:
+            indentIncrement = showStructureIndent
+            if not self.hidden:
+                indentIncrement += 1
+        contents = self.renderContents(indentIncrement, needUnicode=needUnicode)        
+        if showStructureIndent:
+            space = '\n%s' % (' ' * showStructureIndent)
+        if self.hidden:
+            s = contents
+        else:
+            s = []
+            attributeString = ''
+            if attrs:
+                attributeString = ' ' + ' '.join(attrs)            
+            if showStructureIndent:
+                s.append(space)
+            s.append('<%s%s%s>' % (self.name, attributeString, close))
+            s.append(contents)
+            if closeTag and showStructureIndent != None:
+                s.append(space)
+            s.append(closeTag)
+            s = ''.join(s)
+        isUnicode = type(s) == types.UnicodeType
+        if needUnicode and not isUnicode:
+            s = unicode(s)
+        elif isUnicode and needUnicode==False:
+            s = str(s)
+        return s
+
+    def prettify(self, needUnicode=None):
+        return self.__str__(needUnicode, showStructureIndent=True)
+
+    def renderContents(self, showStructureIndent=None, needUnicode=None):
+        """Renders the contents of this tag as a (possibly Unicode) 
+        string."""
+        s=[]
+        for c in self:
+            text = None
+            if isinstance(c, NavigableUnicodeString) or type(c) == types.UnicodeType:
+                text = unicode(c)
+            elif isinstance(c, Tag):
+                s.append(c.__str__(needUnicode, showStructureIndent))
+            elif needUnicode:
+                text = unicode(c)
+            else:
+                text = str(c)
+            if text:
+                if showStructureIndent != None:
+                    if text[-1] == '\n':
+                        text = text[:-1]
+                s.append(text)
+        return ''.join(s)    
+
+    #Soup methods
+
+    def firstText(self, text, recursive=True):
+        """Convenience method to retrieve the first piece of text matching the
+        given criteria. 'text' can be a string, a regular expression object,
+        a callable that takes a string and returns whether or not the
+        string 'matches', etc."""
+        return self.first(recursive=recursive, text=text)
+
+    def fetchText(self, text, recursive=True, limit=None):
+        """Convenience method to retrieve all pieces of text matching the
+        given criteria. 'text' can be a string, a regular expression object,
+        a callable that takes a string and returns whether or not the
+        string 'matches', etc."""
+        return self.fetch(recursive=recursive, text=text, limit=limit)
+
+    def first(self, name=None, attrs={}, recursive=True, text=None):
+        """Return only the first child of this
+        Tag matching the given criteria."""
+        r = Null
+        l = self.fetch(name, attrs, recursive, text, 1)
+        if l:
+            r = l[0]
+        return r
+    findChild = first
+
+    def fetch(self, name=None, attrs={}, recursive=True, text=None,
+              limit=None):
+        """Extracts a list of Tag objects that match the given
+        criteria.  You can specify the name of the Tag and any
+        attributes you want the Tag to have.
+
+        The value of a key-value pair in the 'attrs' map can be a
+        string, a list of strings, a regular expression object, or a
+        callable that takes a string and returns whether or not the
+        string matches for some custom definition of 'matches'. The
+        same is true of the tag name."""
+        generator = self.recursiveChildGenerator
+        if not recursive:
+            generator = self.childGenerator
+        return self._fetch(name, attrs, text, limit, generator)
+    fetchChildren = fetch
+    
+    #Utility methods
+
+    def isSelfClosing(self):
+        """Returns true iff this is a self-closing tag as defined in the HTML
+        standard.
+
+        TODO: This is specific to BeautifulSoup and its subclasses, but it's
+        used by __str__"""
+        return self.name in BeautifulSoup.SELF_CLOSING_TAGS
+
+    def append(self, tag):
+        """Appends the given tag to the contents of this tag."""
+        self.contents.append(tag)
+
+    #Private methods
+
+    def _getAttrMap(self):
+        """Initializes a map representation of this tag's attributes,
+        if not already initialized."""
+        if not getattr(self, 'attrMap'):
+            self.attrMap = {}
+            for (key, value) in self.attrs:
+                self.attrMap[key] = value 
+        return self.attrMap
+
+    #Generator methods
+    def childGenerator(self):
+        for i in range(0, len(self.contents)):
+            yield self.contents[i]
+        raise StopIteration
+    
+    def recursiveChildGenerator(self):
+        stack = [(self, 0)]
+        while stack:
+            tag, start = stack.pop()
+            if isinstance(tag, Tag):            
+                for i in range(start, len(tag.contents)):
+                    a = tag.contents[i]
+                    yield a
+                    if isinstance(a, Tag) and tag.contents:
+                        if i < len(tag.contents) - 1:
+                            stack.append((tag, i+1))
+                        stack.append((a, 0))
+                        break
+        raise StopIteration
+
+
+def isList(l):
+    """Convenience method that works with all 2.x versions of Python
+    to determine whether or not something is listlike."""
+    return hasattr(l, '__iter__') \
+           or (type(l) in (types.ListType, types.TupleType))
+
+def buildTagMap(default, *args):
+    """Turns a list of maps, lists, or scalars into a single map.
+    Used to build the SELF_CLOSING_TAGS and NESTABLE_TAGS maps out
+    of lists and partial maps."""
+    built = {}
+    for portion in args:
+        if hasattr(portion, 'items'):
+            #It's a map. Merge it.
+            for k,v in portion.items():
+                built[k] = v
+        elif isList(portion):
+            #It's a list. Map each item to the default.
+            for k in portion:
+                built[k] = default
+        else:
+            #It's a scalar. Map it to the default.
+            built[portion] = default
+    return built
+
+class BeautifulStoneSoup(Tag, SGMLParser):
+
+    """This class contains the basic parser and fetch code. It defines
+    a parser that knows nothing about tag behavior except for the
+    following:
+   
+      You can't close a tag without closing all the tags it encloses.
+      That is, "<foo><bar></foo>" actually means
+      "<foo><bar></bar></foo>".
+
+    [Another possible explanation is "<foo><bar /></foo>", but since
+    this class defines no SELF_CLOSING_TAGS, it will never use that
+    explanation.]
+
+    This class is useful for parsing XML or made-up markup languages,
+    or when BeautifulSoup makes an assumption counter to what you were
+    expecting."""
+
+    SELF_CLOSING_TAGS = {}
+    NESTABLE_TAGS = {}
+    RESET_NESTING_TAGS = {}
+    QUOTE_TAGS = {}
+
+    #As a public service we will by default silently replace MS smart quotes
+    #and similar characters with their HTML or ASCII equivalents.
+    MS_CHARS = { '\x80' : '&euro;',
+                 '\x81' : ' ',
+                 '\x82' : '&sbquo;',
+                 '\x83' : '&fnof;',
+                 '\x84' : '&bdquo;',
+                 '\x85' : '&hellip;',
+                 '\x86' : '&dagger;',
+                 '\x87' : '&Dagger;',
+                 '\x88' : '&caret;',
+                 '\x89' : '%',
+                 '\x8A' : '&Scaron;',
+                 '\x8B' : '&lt;',
+                 '\x8C' : '&OElig;',
+                 '\x8D' : '?',
+                 '\x8E' : 'Z',
+                 '\x8F' : '?',
+                 '\x90' : '?',
+                 '\x91' : '&lsquo;',
+                 '\x92' : '&rsquo;',
+                 '\x93' : '&ldquo;',
+                 '\x94' : '&rdquo;',
+                 '\x95' : '&bull;',
+                 '\x96' : '&ndash;',
+                 '\x97' : '&mdash;',
+                 '\x98' : '&tilde;',
+                 '\x99' : '&trade;',
+                 '\x9a' : '&scaron;',
+                 '\x9b' : '&gt;',
+                 '\x9c' : '&oelig;',
+                 '\x9d' : '?',
+                 '\x9e' : 'z',
+                 '\x9f' : '&Yuml;',}
+
+    PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
+                       lambda(x):x.group(1) + ' />'),
+                      (re.compile('<!\s+([^<>]*)>'),
+                       lambda(x):'<!' + x.group(1) + '>'),
+                      (re.compile("([\x80-\x9f])"),
+                       lambda(x): BeautifulStoneSoup.MS_CHARS.get(x.group(1)))
+                      ]
+
+    ROOT_TAG_NAME = '[document]'
+
+    def __init__(self, text=None, avoidParserProblems=True,
+                 initialTextIsEverything=True):
+        """Initialize this as the 'root tag' and feed in any text to
+        the parser.
+
+        NOTE about avoidParserProblems: sgmllib will process most bad
+        HTML, and BeautifulSoup has tricks for dealing with some HTML
+        that kills sgmllib, but Beautiful Soup can nonetheless choke
+        or lose data if your data uses self-closing tags or
+        declarations incorrectly. By default, Beautiful Soup sanitizes
+        its input to avoid the vast majority of these problems. The
+        problems are relatively rare, even in bad HTML, so feel free
+        to pass in False to avoidParserProblems if they don't apply to
+        you, and you'll get better performance. The only reason I have
+        this turned on by default is so I don't get so many tech
+        support questions.
+
+        The two most common instances of invalid HTML that will choke
+        sgmllib are fixed by the default parser massage techniques:
+
+         <br/> (No space between name of closing tag and tag close)
+         <! --Comment--> (Extraneous whitespace in declaration)
+
+        You can pass in a custom list of (RE object, replace method)
+        tuples to get Beautiful Soup to scrub your input the way you
+        want."""
+        Tag.__init__(self, self.ROOT_TAG_NAME)
+        if avoidParserProblems \
+           and not isList(avoidParserProblems):
+            avoidParserProblems = self.PARSER_MASSAGE            
+        self.avoidParserProblems = avoidParserProblems
+        SGMLParser.__init__(self)
+        self.quoteStack = []
+        self.hidden = 1
+        self.reset()
+        if hasattr(text, 'read'):
+            #It's a file-type object.
+            text = text.read()
+        if text:
+            self.feed(text)
+        if initialTextIsEverything:
+            self.done()
+
+    def __getattr__(self, methodName):
+        """This method routes method call requests to either the SGMLParser
+        superclass or the Tag superclass, depending on the method name."""
+        if methodName.find('start_') == 0 or methodName.find('end_') == 0 \
+               or methodName.find('do_') == 0:
+            return SGMLParser.__getattr__(self, methodName)
+        elif methodName.find('__') != 0:
+            return Tag.__getattr__(self, methodName)
+        else:
+            raise AttributeError
+
+    def feed(self, text):
+        if self.avoidParserProblems:
+            for fix, m in self.avoidParserProblems:
+                text = fix.sub(m, text)
+        SGMLParser.feed(self, text)
+
+    def done(self):
+        """Called when you're done parsing, so that the unclosed tags can be
+        correctly processed."""
+        self.endData() #NEW
+        while self.currentTag.name != self.ROOT_TAG_NAME:
+            self.popTag()
+            
+    def reset(self):
+        SGMLParser.reset(self)
+        self.currentData = []
+        self.currentTag = None
+        self.tagStack = []
+        self.pushTag(self)        
+    
+    def popTag(self):
+        tag = self.tagStack.pop()
+        # Tags with just one string-owning child get the child as a
+        # 'string' property, so that soup.tag.string is shorthand for
+        # soup.tag.contents[0]
+        if len(self.currentTag.contents) == 1 and \
+           isinstance(self.currentTag.contents[0], NavigableText):
+            self.currentTag.string = self.currentTag.contents[0]
+
+        #print "Pop", tag.name
+        if self.tagStack:
+            self.currentTag = self.tagStack[-1]
+        return self.currentTag
+
+    def pushTag(self, tag):
+        #print "Push", tag.name
+        if self.currentTag:
+            self.currentTag.append(tag)
+        self.tagStack.append(tag)
+        self.currentTag = self.tagStack[-1]
+
+    def endData(self):
+        currentData = ''.join(self.currentData)
+        if currentData:
+            if not currentData.strip():
+                if '\n' in currentData:
+                    currentData = '\n'
+                else:
+                    currentData = ' '
+            c = NavigableString
+            if type(currentData) == types.UnicodeType:
+                c = NavigableUnicodeString
+            o = c(currentData)
+            o.setup(self.currentTag, self.previous)
+            if self.previous:
+                self.previous.next = o
+            self.previous = o
+            self.currentTag.contents.append(o)
+        self.currentData = []
+
+    def _popToTag(self, name, inclusivePop=True):
+        """Pops the tag stack up to and including the most recent
+        instance of the given tag. If inclusivePop is false, pops the tag
+        stack up to but *not* including the most recent instqance of
+        the given tag."""
+        if name == self.ROOT_TAG_NAME:
+            return            
+
+        numPops = 0
+        mostRecentTag = None
+        for i in range(len(self.tagStack)-1, 0, -1):
+            if name == self.tagStack[i].name:
+                numPops = len(self.tagStack)-i
+                break
+        if not inclusivePop:
+            numPops = numPops - 1
+
+        for i in range(0, numPops):
+            mostRecentTag = self.popTag()
+        return mostRecentTag    
+
+    def _smartPop(self, name):
+
+        """We need to pop up to the previous tag of this type, unless
+        one of this tag's nesting reset triggers comes between this
+        tag and the previous tag of this type, OR unless this tag is a
+        generic nesting trigger and another generic nesting trigger
+        comes between this tag and the previous tag of this type.
+
+        Examples:
+         <p>Foo<b>Bar<p> should pop to 'p', not 'b'.
+         <p>Foo<table>Bar<p> should pop to 'table', not 'p'.
+         <p>Foo<table><tr>Bar<p> should pop to 'tr', not 'p'.
+         <p>Foo<b>Bar<p> should pop to 'p', not 'b'.
+
+         <li><ul><li> *<li>* should pop to 'ul', not the first 'li'.
+         <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr'
+         <td><tr><td> *<td>* should pop to 'tr', not the first 'td'
+        """
+
+        nestingResetTriggers = self.NESTABLE_TAGS.get(name)
+        isNestable = nestingResetTriggers != None
+        isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
+        popTo = None
+        inclusive = True
+        for i in range(len(self.tagStack)-1, 0, -1):
+            p = self.tagStack[i]
+            if (not p or p.name == name) and not isNestable:
+                #Non-nestable tags get popped to the top or to their
+                #last occurance.
+                popTo = name
+                break
+            if (nestingResetTriggers != None
+                and p.name in nestingResetTriggers) \
+                or (nestingResetTriggers == None and isResetNesting
+                    and self.RESET_NESTING_TAGS.has_key(p.name)):
+                
+                #If we encounter one of the nesting reset triggers
+                #peculiar to this tag, or we encounter another tag
+                #that causes nesting to reset, pop up to but not
+                #including that tag.
+
+                popTo = p.name
+                inclusive = False
+                break
+            p = p.parent
+        if popTo:
+            self._popToTag(popTo, inclusive)
+
+    def unknown_starttag(self, name, attrs, selfClosing=0):
+        #print "Start tag %s" % name
+        if self.quoteStack:
+            #This is not a real tag.
+            #print "<%s> is not real!" % name
+            attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))
+            self.handle_data('<%s%s>' % (name, attrs))
+            return
+        self.endData()
+        if not name in self.SELF_CLOSING_TAGS and not selfClosing:
+            self._smartPop(name)
+        tag = Tag(name, attrs, self.currentTag, self.previous)        
+        if self.previous:
+            self.previous.next = tag
+        self.previous = tag
+        self.pushTag(tag)
+        if selfClosing or name in self.SELF_CLOSING_TAGS:
+            self.popTag()                
+        if name in self.QUOTE_TAGS:
+            #print "Beginning quote (%s)" % name
+            self.quoteStack.append(name)
+            self.literal = 1
+
+    def unknown_endtag(self, name):
+        if self.quoteStack and self.quoteStack[-1] != name:
+            #This is not a real end tag.
+            #print "</%s> is not real!" % name
+            self.handle_data('</%s>' % name)
+            return
+        self.endData()
+        self._popToTag(name)
+        if self.quoteStack and self.quoteStack[-1] == name:
+            self.quoteStack.pop()
+            self.literal = (len(self.quoteStack) > 0)
+
+    def handle_data(self, data):
+        self.currentData.append(data)
+
+    def handle_pi(self, text):
+        "Propagate processing instructions right through."
+        self.handle_data("<?%s>" % text)
+
+    def handle_comment(self, text):
+        "Propagate comments right through."
+        self.handle_data("<!--%s-->" % text)
+
+    def handle_charref(self, ref):
+        "Propagate char refs right through."
+        self.handle_data('&#%s;' % ref)
+
+    def handle_entityref(self, ref):
+        "Propagate entity refs right through."
+        self.handle_data('&%s;' % ref)
+        
+    def handle_decl(self, data):
+        "Propagate DOCTYPEs and the like right through."
+        self.handle_data('<!%s>' % data)
+
+    def parse_declaration(self, i):
+        """Treat a bogus SGML declaration as raw data. Treat a CDATA
+        declaration as regular data."""
+        j = None
+        if self.rawdata[i:i+9] == '<![CDATA[':
+             k = self.rawdata.find(']]>', i)
+             if k == -1:
+                 k = len(self.rawdata)
+             self.handle_data(self.rawdata[i+9:k])
+             j = k+3
+        else:
+            try:
+                j = SGMLParser.parse_declaration(self, i)
+            except SGMLParseError:
+                toHandle = self.rawdata[i:]
+                self.handle_data(toHandle)
+                j = i + len(toHandle)
+        return j
+
+class BeautifulSoup(BeautifulStoneSoup):
+
+    """This parser knows the following facts about HTML:
+
+    * Some tags have no closing tag and should be interpreted as being
+      closed as soon as they are encountered.
+
+    * The text inside some tags (ie. 'script') may contain tags which
+      are not really part of the document and which should be parsed
+      as text, not tags. If you want to parse the text as tags, you can
+      always fetch it and parse it explicitly.
+
+    * Tag nesting rules:
+
+      Most tags can't be nested at all. For instance, the occurance of
+      a <p> tag should implicitly close the previous <p> tag.
+
+       <p>Para1<p>Para2
+        should be transformed into:
+       <p>Para1</p><p>Para2
+
+      Some tags can be nested arbitrarily. For instance, the occurance
+      of a <blockquote> tag should _not_ implicitly close the previous
+      <blockquote> tag.
+
+       Alice said: <blockquote>Bob said: <blockquote>Blah
+        should NOT be transformed into:
+       Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah
+
+      Some tags can be nested, but the nesting is reset by the
+      interposition of other tags. For instance, a <tr> tag should
+      implicitly close the previous <tr> tag within the same <table>,
+      but not close a <tr> tag in another table.
+
+       <table><tr>Blah<tr>Blah
+        should be transformed into:
+       <table><tr>Blah</tr><tr>Blah
+        but,
+       <tr>Blah<table><tr>Blah
+        should NOT be transformed into
+       <tr>Blah<table></tr><tr>Blah
+
+    Differing assumptions about tag nesting rules are a major source
+    of problems with the BeautifulSoup class. If BeautifulSoup is not
+    treating as nestable a tag your page author treats as nestable,
+    try ICantBelieveItsBeautifulSoup before writing your own
+    subclass."""
+
+    SELF_CLOSING_TAGS = buildTagMap(None, ['br' , 'hr', 'input', 'img', 'meta',
+                                           'spacer', 'link', 'frame', 'base'])
+
+    QUOTE_TAGS = {'script': None}
+    
+    #According to the HTML standard, each of these inline tags can
+    #contain another tag of the same type. Furthermore, it's common
+    #to actually use these tags this way.
+    NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
+                            'center']
+
+    #According to the HTML standard, these block tags can contain
+    #another tag of the same type. Furthermore, it's common
+    #to actually use these tags this way.
+    NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del']
+
+    #Lists can contain other lists, but there are restrictions.    
+    NESTABLE_LIST_TAGS = { 'ol' : [],
+                           'ul' : [],
+                           'li' : ['ul', 'ol'],
+                           'dl' : [],
+                           'dd' : ['dl'],
+                           'dt' : ['dl'] }
+
+    #Tables can contain other tables, but there are restrictions.    
+    NESTABLE_TABLE_TAGS = {'table' : [], 
+                           'tr' : ['table', 'tbody', 'tfoot', 'thead'],
+                           'td' : ['tr'],
+                           'th' : ['tr'],
+                           }
+
+    NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre']
+
+    #If one of these tags is encountered, all tags up to the next tag of
+    #this type are popped.
+    RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
+                                     NON_NESTABLE_BLOCK_TAGS,
+                                     NESTABLE_LIST_TAGS,
+                                     NESTABLE_TABLE_TAGS)
+
+    NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
+                                NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)
+    
+class ICantBelieveItsBeautifulSoup(BeautifulSoup):
+
+    """The BeautifulSoup class is oriented towards skipping over
+    common HTML errors like unclosed tags. However, sometimes it makes
+    errors of its own. For instance, consider this fragment:
+
+     <b>Foo<b>Bar</b></b>
+
+    This is perfectly valid (if bizarre) HTML. However, the
+    BeautifulSoup class will implicitly close the first b tag when it
+    encounters the second 'b'. It will think the author wrote
+    "<b>Foo<b>Bar", and didn't close the first 'b' tag, because
+    there's no real-world reason to bold something that's already
+    bold. When it encounters '</b></b>' it will close two more 'b'
+    tags, for a grand total of three tags closed instead of two. This
+    can throw off the rest of your document structure. The same is
+    true of a number of other tags, listed below.
+
+    It's much more common for someone to forget to close (eg.) a 'b'
+    tag than to actually use nested 'b' tags, and the BeautifulSoup
+    class handles the common case. This class handles the
+    not-co-common case: where you can't believe someone wrote what
+    they did, but it's valid HTML and BeautifulSoup screwed up by
+    assuming it wouldn't be.
+
+    If this doesn't do what you need, try subclassing this class or
+    BeautifulSoup, and providing your own list of NESTABLE_TAGS."""
+
+    I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
+     ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
+      'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
+      'big']
+
+    I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript']
+
+    NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
+                                I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
+                                I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
+
+class BeautifulSOAP(BeautifulStoneSoup):
+    """This class will push a tag with only a single string child into
+    the tag's parent as an attribute. The attribute's name is the tag
+    name, and the value is the string child. An example should give
+    the flavor of the change:
+
+    <foo><bar>baz</bar></foo>
+     =>
+    <foo bar="baz"><bar>baz</bar></foo>
+
+    You can then access fooTag['bar'] instead of fooTag.barTag.string.
+
+    This is, of course, useful for scraping structures that tend to
+    use subelements instead of attributes, such as SOAP messages. Note
+    that it modifies its input, so don't print the modified version
+    out.
+
+    I'm not sure how many people really want to use this class; let me
+    know if you do. Mainly I like the name."""
+
+    def popTag(self):
+        if len(self.tagStack) > 1:
+            tag = self.tagStack[-1]
+            parent = self.tagStack[-2]
+            parent._getAttrMap()
+            if (isinstance(tag, Tag) and len(tag.contents) == 1 and
+                isinstance(tag.contents[0], NavigableText) and 
+                not parent.attrMap.has_key(tag.name)):
+                parent[tag.name] = tag.contents[0]
+        BeautifulStoneSoup.popTag(self)
+
+#Enterprise class names! It has come to our attention that some people
+#think the names of the Beautiful Soup parser classes are too silly
+#and "unprofessional" for use in enterprise screen-scraping. We feel
+#your pain! For such-minded folk, the Beautiful Soup Consortium And
+#All-Night Kosher Bakery recommends renaming this file to
+#"RobustParser.py" (or, in cases of extreme enterprisitude,
+#"RobustParserBeanInterface.class") and using the following
+#enterprise-friendly class aliases:
+class RobustXMLParser(BeautifulStoneSoup):
+    pass
+class RobustHTMLParser(BeautifulSoup):
+    pass
+class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):
+    pass
+class SimplifyingSOAPParser(BeautifulSOAP):
+    pass
+
+###
+
+
+#By default, act as an HTML pretty-printer.
+if __name__ == '__main__':
+    import sys
+    soup = BeautifulStoneSoup(sys.stdin.read())
+    print soup.prettify()
diff --git a/src/calibre/utils/mechanize/_clientcookie.py b/src/calibre/utils/mechanize/_clientcookie.py
new file mode 100644
index 0000000000..e8f0f67d4a
--- /dev/null
+++ b/src/calibre/utils/mechanize/_clientcookie.py
@@ -0,0 +1,1651 @@
+"""HTTP cookie handling for web clients.
+
+This module originally developed from my port of Gisle Aas' Perl module
+HTTP::Cookies, from the libwww-perl library.
+
+Docstrings, comments and debug strings in this code refer to the
+attributes of the HTTP cookie system as cookie-attributes, to distinguish
+them clearly from Python attributes.
+
+                        CookieJar____
+                        /     \      \
+            FileCookieJar      \      \
+             /    |   \         \      \
+ MozillaCookieJar | LWPCookieJar \      \
+                  |               |      \
+                  |   ---MSIEBase |       \
+                  |  /      |     |        \
+                  | /   MSIEDBCookieJar BSDDBCookieJar
+                  |/    
+               MSIECookieJar
+
+Comments to John J Lee <jjl@pobox.com>.
+
+
+Copyright 2002-2006 John J Lee <jjl@pobox.com>
+Copyright 1997-1999 Gisle Aas (original libwww-perl code)
+Copyright 2002-2003 Johnny Lee (original MSIE Perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import sys, re, copy, time, struct, urllib, types, logging
+try:
+    import threading
+    _threading = threading; del threading
+except ImportError:
+    import dummy_threading
+    _threading = dummy_threading; del dummy_threading
+import httplib  # only for the default HTTP port
+
+MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
+                         "instance initialised with one)")
+DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
+
+from _headersutil import split_header_words, parse_ns_headers
+from _util import isstringlike
+import _rfc3986
+
+debug = logging.getLogger("mechanize.cookies").debug
+
+
+def reraise_unmasked_exceptions(unmasked=()):
+    # There are a few catch-all except: statements in this module, for
+    # catching input that's bad in unexpected ways.
+    # This function re-raises some exceptions we don't want to trap.
+    import mechanize, warnings
+    if not mechanize.USE_BARE_EXCEPT:
+        raise
+    unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError)
+    etype = sys.exc_info()[0]
+    if issubclass(etype, unmasked):
+        raise
+    # swallowed an exception
+    import traceback, StringIO
+    f = StringIO.StringIO()
+    traceback.print_exc(None, f)
+    msg = f.getvalue()
+    warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2)
+
+
+IPV4_RE = re.compile(r"\.\d+$")
+def is_HDN(text):
+    """Return True if text is a host domain name."""
+    # XXX
+    # This may well be wrong.  Which RFC is HDN defined in, if any (for
+    #  the purposes of RFC 2965)?
+    # For the current implementation, what about IPv6?  Remember to look
+    #  at other uses of IPV4_RE also, if change this.
+    return not (IPV4_RE.search(text) or
+                text == "" or
+                text[0] == "." or text[-1] == ".")
+
+def domain_match(A, B):
+    """Return True if domain A domain-matches domain B, according to RFC 2965.
+
+    A and B may be host domain names or IP addresses.
+
+    RFC 2965, section 1:
+
+    Host names can be specified either as an IP address or a HDN string.
+    Sometimes we compare one host name with another.  (Such comparisons SHALL
+    be case-insensitive.)  Host A's name domain-matches host B's if
+
+         *  their host name strings string-compare equal; or
+
+         * A is a HDN string and has the form NB, where N is a non-empty
+            name string, B has the form .B', and B' is a HDN string.  (So,
+            x.y.com domain-matches .Y.com but not Y.com.)
+
+    Note that domain-match is not a commutative operation: a.b.c.com
+    domain-matches .c.com, but not the reverse.
+
+    """
+    # Note that, if A or B are IP addresses, the only relevant part of the
+    # definition of the domain-match algorithm is the direct string-compare.
+    A = A.lower()
+    B = B.lower()
+    if A == B:
+        return True
+    if not is_HDN(A):
+        return False
+    i = A.rfind(B)
+    has_form_nb = not (i == -1 or i == 0)
+    return (
+        has_form_nb and
+        B.startswith(".") and
+        is_HDN(B[1:])
+        )
+
+def liberal_is_HDN(text):
+    """Return True if text is a sort-of-like a host domain name.
+
+    For accepting/blocking domains.
+
+    """
+    return not IPV4_RE.search(text)
+
+def user_domain_match(A, B):
+    """For blocking/accepting domains.
+
+    A and B may be host domain names or IP addresses.
+
+    """
+    A = A.lower()
+    B = B.lower()
+    if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
+        if A == B:
+            # equal IP addresses
+            return True
+        return False
+    initial_dot = B.startswith(".")
+    if initial_dot and A.endswith(B):
+        return True
+    if not initial_dot and A == B:
+        return True
+    return False
+
+cut_port_re = re.compile(r":\d+$")
+def request_host(request):
+    """Return request-host, as defined by RFC 2965.
+
+    Variation from RFC: returned value is lowercased, for convenient
+    comparison.
+
+    """
+    url = request.get_full_url()
+    host = _rfc3986.urlsplit(url)[1]
+    if host is None:
+        host = request.get_header("Host", "")
+
+    # remove port, if present
+    host = cut_port_re.sub("", host, 1)
+    return host.lower()
+
+def eff_request_host(request):
+    """Return a tuple (request-host, effective request-host name).
+
+    As defined by RFC 2965, except both are lowercased.
+
+    """
+    erhn = req_host = request_host(request)
+    if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
+        erhn = req_host + ".local"
+    return req_host, erhn
+
+def request_path(request):
+    """request-URI, as defined by RFC 2965."""
+    url = request.get_full_url()
+    path, query, frag = _rfc3986.urlsplit(url)[2:]
+    path = escape_path(path)
+    req_path = _rfc3986.urlunsplit((None, None, path, query, frag))
+    if not req_path.startswith("/"):
+        req_path = "/"+req_path
+    return req_path
+
+def request_port(request):
+    host = request.get_host()
+    i = host.find(':')
+    if i >= 0:
+        port = host[i+1:]
+        try:
+            int(port)
+        except ValueError:
+            debug("nonnumeric port: '%s'", port)
+            return None
+    else:
+        port = DEFAULT_HTTP_PORT
+    return port
+
+# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
+# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
+HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
+ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
+def uppercase_escaped_char(match):
+    return "%%%s" % match.group(1).upper()
+def escape_path(path):
+    """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
+    # There's no knowing what character encoding was used to create URLs
+    # containing %-escapes, but since we have to pick one to escape invalid
+    # path characters, we pick UTF-8, as recommended in the HTML 4.0
+    # specification:
+    # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
+    # And here, kind of: draft-fielding-uri-rfc2396bis-03
+    # (And in draft IRI specification: draft-duerst-iri-05)
+    # (And here, for new URI schemes: RFC 2718)
+    if isinstance(path, types.UnicodeType):
+        path = path.encode("utf-8")
+    path = urllib.quote(path, HTTP_PATH_SAFE)
+    path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
+    return path
+
+def reach(h):
+    """Return reach of host h, as defined by RFC 2965, section 1.
+
+    The reach R of a host name H is defined as follows:
+
+       *  If
+
+          -  H is the host domain name of a host; and,
+
+          -  H has the form A.B; and
+
+          -  A has no embedded (that is, interior) dots; and
+
+          -  B has at least one embedded dot, or B is the string "local".
+             then the reach of H is .B.
+
+       *  Otherwise, the reach of H is H.
+
+    >>> reach("www.acme.com")
+    '.acme.com'
+    >>> reach("acme.com")
+    'acme.com'
+    >>> reach("acme.local")
+    '.local'
+
+    """
+    i = h.find(".")
+    if i >= 0:
+        #a = h[:i]  # this line is only here to show what a is
+        b = h[i+1:]
+        i = b.find(".")
+        if is_HDN(h) and (i >= 0 or b == "local"):
+            return "."+b
+    return h
+
+def is_third_party(request):
+    """
+
+    RFC 2965, section 3.3.6:
+
+        An unverifiable transaction is to a third-party host if its request-
+        host U does not domain-match the reach R of the request-host O in the
+        origin transaction.
+
+    """
+    req_host = request_host(request)
+    # the origin request's request-host was stuffed into request by
+    # _urllib2_support.AbstractHTTPHandler
+    return not domain_match(req_host, reach(request.origin_req_host))
+
+
+class Cookie:
+    """HTTP Cookie.
+
+    This class represents both Netscape and RFC 2965 cookies.
+
+    This is deliberately a very simple class.  It just holds attributes.  It's
+    possible to construct Cookie instances that don't comply with the cookie
+    standards.  CookieJar.make_cookies is the factory function for Cookie
+    objects -- it deals with cookie parsing, supplying defaults, and
+    normalising to the representation used in this class.  CookiePolicy is
+    responsible for checking them to see whether they should be accepted from
+    and returned to the server.
+
+    version: integer;
+    name: string;
+    value: string (may be None);
+    port: string; None indicates no attribute was supplied (eg. "Port", rather
+     than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list
+     string (eg. "80,8080")
+    port_specified: boolean; true if a value was supplied with the Port
+     cookie-attribute
+    domain: string;
+    domain_specified: boolean; true if Domain was explicitly set
+    domain_initial_dot: boolean; true if Domain as set in HTTP header by server
+     started with a dot (yes, this really is necessary!)
+    path: string;
+    path_specified: boolean; true if Path was explicitly set
+    secure:  boolean; true if should only be returned over secure connection
+    expires: integer; seconds since epoch (RFC 2965 cookies should calculate
+     this value from the Max-Age attribute)
+    discard: boolean, true if this is a session cookie; (if no expires value,
+     this should be true)
+    comment: string;
+    comment_url: string;
+    rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not
+     Set-Cookie2:) header, but had a version cookie-attribute of 1
+    rest: mapping of other cookie-attributes
+
+    Note that the port may be present in the headers, but unspecified ("Port"
+    rather than"Port=80", for example); if this is the case, port is None.
+
+    """
+
+    def __init__(self, version, name, value,
+                 port, port_specified,
+                 domain, domain_specified, domain_initial_dot,
+                 path, path_specified,
+                 secure,
+                 expires,
+                 discard,
+                 comment,
+                 comment_url,
+                 rest,
+                 rfc2109=False,
+                 ):
+
+        if version is not None: version = int(version)
+        if expires is not None: expires = int(expires)
+        if port is None and port_specified is True:
+            raise ValueError("if port is None, port_specified must be false")
+
+        self.version = version
+        self.name = name
+        self.value = value
+        self.port = port
+        self.port_specified = port_specified
+        # normalise case, as per RFC 2965 section 3.3.3
+        self.domain = domain.lower()
+        self.domain_specified = domain_specified
+        # Sigh.  We need to know whether the domain given in the
+        # cookie-attribute had an initial dot, in order to follow RFC 2965
+        # (as clarified in draft errata).  Needed for the returned $Domain
+        # value.
+        self.domain_initial_dot = domain_initial_dot
+        self.path = path
+        self.path_specified = path_specified
+        self.secure = secure
+        self.expires = expires
+        self.discard = discard
+        self.comment = comment
+        self.comment_url = comment_url
+        self.rfc2109 = rfc2109
+
+        self._rest = copy.copy(rest)
+
+    def has_nonstandard_attr(self, name):
+        return self._rest.has_key(name)
+    def get_nonstandard_attr(self, name, default=None):
+        return self._rest.get(name, default)
+    def set_nonstandard_attr(self, name, value):
+        self._rest[name] = value
+    def nonstandard_attr_keys(self):
+        return self._rest.keys()
+
+    def is_expired(self, now=None):
+        if now is None: now = time.time()
+        return (self.expires is not None) and (self.expires <= now)
+
+    def __str__(self):
+        if self.port is None: p = ""
+        else: p = ":"+self.port
+        limit = self.domain + p + self.path
+        if self.value is not None:
+            namevalue = "%s=%s" % (self.name, self.value)
+        else:
+            namevalue = self.name
+        return "<Cookie %s for %s>" % (namevalue, limit)
+
+    def __repr__(self):
+        args = []
+        for name in ["version", "name", "value",
+                     "port", "port_specified",
+                     "domain", "domain_specified", "domain_initial_dot",
+                     "path", "path_specified",
+                     "secure", "expires", "discard", "comment", "comment_url",
+                     ]:
+            attr = getattr(self, name)
+            args.append("%s=%s" % (name, repr(attr)))
+        args.append("rest=%s" % repr(self._rest))
+        args.append("rfc2109=%s" % repr(self.rfc2109))
+        return "Cookie(%s)" % ", ".join(args)
+
+
+class CookiePolicy:
+    """Defines which cookies get accepted from and returned to server.
+
+    May also modify cookies.
+
+    The subclass DefaultCookiePolicy defines the standard rules for Netscape
+    and RFC 2965 cookies -- override that if you want a customised policy.
+
+    As well as implementing set_ok and return_ok, implementations of this
+    interface must also supply the following attributes, indicating which
+    protocols should be used, and how.  These can be read and set at any time,
+    though whether that makes complete sense from the protocol point of view is
+    doubtful.
+
+    Public attributes:
+
+    netscape: implement netscape protocol
+    rfc2965: implement RFC 2965 protocol
+    rfc2109_as_netscape:
+       WARNING: This argument will change or go away if is not accepted into
+                the Python standard library in this form!
+     If true, treat RFC 2109 cookies as though they were Netscape cookies.  The
+     default is for this attribute to be None, which means treat 2109 cookies
+     as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is,
+     by default), and as Netscape cookies otherwise.
+    hide_cookie2: don't add Cookie2 header to requests (the presence of
+     this header indicates to the server that we understand RFC 2965
+     cookies)
+
+    """
+    def set_ok(self, cookie, request):
+        """Return true if (and only if) cookie should be accepted from server.
+
+        Currently, pre-expired cookies never get this far -- the CookieJar
+        class deletes such cookies itself.
+
+        cookie: mechanize.Cookie object
+        request: object implementing the interface defined by
+         CookieJar.extract_cookies.__doc__
+
+        """
+        raise NotImplementedError()
+
+    def return_ok(self, cookie, request):
+        """Return true if (and only if) cookie should be returned to server.
+
+        cookie: mechanize.Cookie object
+        request: object implementing the interface defined by
+         CookieJar.add_cookie_header.__doc__
+
+        """
+        raise NotImplementedError()
+
+    def domain_return_ok(self, domain, request):
+        """Return false if cookies should not be returned, given cookie domain.
+
+        This is here as an optimization, to remove the need for checking every
+        cookie with a particular domain (which may involve reading many files).
+        The default implementations of domain_return_ok and path_return_ok
+        (return True) leave all the work to return_ok.
+
+        If domain_return_ok returns true for the cookie domain, path_return_ok
+        is called for the cookie path.  Otherwise, path_return_ok and return_ok
+        are never called for that cookie domain.  If path_return_ok returns
+        true, return_ok is called with the Cookie object itself for a full
+        check.  Otherwise, return_ok is never called for that cookie path.
+
+        Note that domain_return_ok is called for every *cookie* domain, not
+        just for the *request* domain.  For example, the function might be
+        called with both ".acme.com" and "www.acme.com" if the request domain is
+        "www.acme.com".  The same goes for path_return_ok.
+
+        For argument documentation, see the docstring for return_ok.
+
+        """
+        return True
+
+    def path_return_ok(self, path, request):
+        """Return false if cookies should not be returned, given cookie path.
+
+        See the docstring for domain_return_ok.
+
+        """
+        return True
+
+
+class DefaultCookiePolicy(CookiePolicy):
+    """Implements the standard rules for accepting and returning cookies.
+
+    Both RFC 2965 and Netscape cookies are covered.  RFC 2965 handling is
+    switched off by default.
+
+    The easiest way to provide your own policy is to override this class and
+    call its methods in your overriden implementations before adding your own
+    additional checks.
+
+    import mechanize
+    class MyCookiePolicy(mechanize.DefaultCookiePolicy):
+        def set_ok(self, cookie, request):
+            if not mechanize.DefaultCookiePolicy.set_ok(
+                self, cookie, request):
+                return False
+            if i_dont_want_to_store_this_cookie():
+                return False
+            return True
+
+    In addition to the features required to implement the CookiePolicy
+    interface, this class allows you to block and allow domains from setting
+    and receiving cookies.  There are also some strictness switches that allow
+    you to tighten up the rather loose Netscape protocol rules a little bit (at
+    the cost of blocking some benign cookies).
+
+    A domain blacklist and whitelist is provided (both off by default).  Only
+    domains not in the blacklist and present in the whitelist (if the whitelist
+    is active) participate in cookie setting and returning.  Use the
+    blocked_domains constructor argument, and blocked_domains and
+    set_blocked_domains methods (and the corresponding argument and methods for
+    allowed_domains).  If you set a whitelist, you can turn it off again by
+    setting it to None.
+
+    Domains in block or allow lists that do not start with a dot must
+    string-compare equal.  For example, "acme.com" matches a blacklist entry of
+    "acme.com", but "www.acme.com" does not.  Domains that do start with a dot
+    are matched by more specific domains too.  For example, both "www.acme.com"
+    and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does
+    not).  IP addresses are an exception, and must match exactly.  For example,
+    if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is
+    blocked, but 193.168.1.2 is not.
+
+    Additional Public Attributes:
+
+    General strictness switches
+
+    strict_domain: don't allow sites to set two-component domains with
+     country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc.
+     This is far from perfect and isn't guaranteed to work!
+
+    RFC 2965 protocol strictness switches
+
+    strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable
+     transactions (usually, an unverifiable transaction is one resulting from
+     a redirect or an image hosted on another site); if this is false, cookies
+     are NEVER blocked on the basis of verifiability
+
+    Netscape protocol strictness switches
+
+    strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions
+     even to Netscape cookies
+    strict_ns_domain: flags indicating how strict to be with domain-matching
+     rules for Netscape cookies:
+      DomainStrictNoDots: when setting cookies, host prefix must not contain a
+       dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because
+       www.foo contains a dot)
+      DomainStrictNonDomain: cookies that did not explicitly specify a Domain
+       cookie-attribute can only be returned to a domain that string-compares
+       equal to the domain that set the cookie (eg. rockets.acme.com won't
+       be returned cookies from acme.com that had no Domain cookie-attribute)
+      DomainRFC2965Match: when setting cookies, require a full RFC 2965
+       domain-match
+      DomainLiberal and DomainStrict are the most useful combinations of the
+       above flags, for convenience
+    strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that
+     have names starting with '$'
+    strict_ns_set_path: don't allow setting cookies whose path doesn't
+     path-match request URI
+
+    """
+
+    DomainStrictNoDots = 1
+    DomainStrictNonDomain = 2
+    DomainRFC2965Match = 4
+
+    DomainLiberal = 0
+    DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
+
+    def __init__(self,
+                 blocked_domains=None, allowed_domains=None,
+                 netscape=True, rfc2965=False,
+                 # WARNING: this argument will change or go away if is not
+                 # accepted into the Python standard library in this form!
+                 # default, ie. treat 2109 as netscape iff not rfc2965
+                 rfc2109_as_netscape=None,
+                 hide_cookie2=False,
+                 strict_domain=False,
+                 strict_rfc2965_unverifiable=True,
+                 strict_ns_unverifiable=False,
+                 strict_ns_domain=DomainLiberal,
+                 strict_ns_set_initial_dollar=False,
+                 strict_ns_set_path=False,
+                 ):
+        """
+        Constructor arguments should be used as keyword arguments only.
+
+        blocked_domains: sequence of domain names that we never accept cookies
+         from, nor return cookies to
+        allowed_domains: if not None, this is a sequence of the only domains
+         for which we accept and return cookies
+
+        For other arguments, see CookiePolicy.__doc__ and
+        DefaultCookiePolicy.__doc__..
+
+        """
+        self.netscape = netscape
+        self.rfc2965 = rfc2965
+        self.rfc2109_as_netscape = rfc2109_as_netscape
+        self.hide_cookie2 = hide_cookie2
+        self.strict_domain = strict_domain
+        self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
+        self.strict_ns_unverifiable = strict_ns_unverifiable
+        self.strict_ns_domain = strict_ns_domain
+        self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
+        self.strict_ns_set_path = strict_ns_set_path
+
+        if blocked_domains is not None:
+            self._blocked_domains = tuple(blocked_domains)
+        else:
+            self._blocked_domains = ()
+
+        if allowed_domains is not None:
+            allowed_domains = tuple(allowed_domains)
+        self._allowed_domains = allowed_domains
+
+    def blocked_domains(self):
+        """Return the sequence of blocked domains (as a tuple)."""
+        return self._blocked_domains
+    def set_blocked_domains(self, blocked_domains):
+        """Set the sequence of blocked domains."""
+        self._blocked_domains = tuple(blocked_domains)
+
+    def is_blocked(self, domain):
+        for blocked_domain in self._blocked_domains:
+            if user_domain_match(domain, blocked_domain):
+                return True
+        return False
+
+    def allowed_domains(self):
+        """Return None, or the sequence of allowed domains (as a tuple)."""
+        return self._allowed_domains
+    def set_allowed_domains(self, allowed_domains):
+        """Set the sequence of allowed domains, or None."""
+        if allowed_domains is not None:
+            allowed_domains = tuple(allowed_domains)
+        self._allowed_domains = allowed_domains
+
+    def is_not_allowed(self, domain):
+        if self._allowed_domains is None:
+            return False
+        for allowed_domain in self._allowed_domains:
+            if user_domain_match(domain, allowed_domain):
+                return False
+        return True
+
+    def set_ok(self, cookie, request):
+        """
+        If you override set_ok, be sure to call this method.  If it returns
+        false, so should your subclass (assuming your subclass wants to be more
+        strict about which cookies to accept).
+
+        """
+        debug(" - checking cookie %s", cookie)
+
+        assert cookie.name is not None
+
+        for n in "version", "verifiability", "name", "path", "domain", "port":
+            fn_name = "set_ok_"+n
+            fn = getattr(self, fn_name)
+            if not fn(cookie, request):
+                return False
+
+        return True
+
+    def set_ok_version(self, cookie, request):
+        if cookie.version is None:
+            # Version is always set to 0 by parse_ns_headers if it's a Netscape
+            # cookie, so this must be an invalid RFC 2965 cookie.
+            debug("   Set-Cookie2 without version attribute (%s)", cookie)
+            return False
+        if cookie.version > 0 and not self.rfc2965:
+            debug("   RFC 2965 cookies are switched off")
+            return False
+        elif cookie.version == 0 and not self.netscape:
+            debug("   Netscape cookies are switched off")
+            return False
+        return True
+
+    def set_ok_verifiability(self, cookie, request):
+        if request.unverifiable and is_third_party(request):
+            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
+                debug("   third-party RFC 2965 cookie during "
+                             "unverifiable transaction")
+                return False
+            elif cookie.version == 0 and self.strict_ns_unverifiable:
+                debug("   third-party Netscape cookie during "
+                             "unverifiable transaction")
+                return False
+        return True
+
+    def set_ok_name(self, cookie, request):
+        # Try and stop servers setting V0 cookies designed to hack other
+        # servers that know both V0 and V1 protocols.
+        if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
+            cookie.name.startswith("$")):
+            debug("   illegal name (starts with '$'): '%s'", cookie.name)
+            return False
+        return True
+
+    def set_ok_path(self, cookie, request):
+        if cookie.path_specified:
+            req_path = request_path(request)
+            if ((cookie.version > 0 or
+                 (cookie.version == 0 and self.strict_ns_set_path)) and
+                not req_path.startswith(cookie.path)):
+                debug("   path attribute %s is not a prefix of request "
+                      "path %s", cookie.path, req_path)
+                return False
+        return True
+
+    def set_ok_countrycode_domain(self, cookie, request):
+        """Return False if explicit cookie domain is not acceptable.
+
+        Called by set_ok_domain, for convenience of overriding by
+        subclasses.
+
+        """
+        if cookie.domain_specified and self.strict_domain:
+            domain = cookie.domain
+            # since domain was specified, we know that:
+            assert domain.startswith(".")
+            if domain.count(".") == 2:
+                # domain like .foo.bar
+                i = domain.rfind(".")
+                tld = domain[i+1:]
+                sld = domain[1:i]
+                if (sld.lower() in [
+                    "co", "ac",
+                    "com", "edu", "org", "net", "gov", "mil", "int",
+                    "aero", "biz", "cat", "coop", "info", "jobs", "mobi",
+                    "museum", "name", "pro", "travel",
+                    ] and
+                    len(tld) == 2):
+                    # domain like .co.uk
+                    return False
+        return True
+
+    def set_ok_domain(self, cookie, request):
+        if self.is_blocked(cookie.domain):
+            debug("   domain %s is in user block-list", cookie.domain)
+            return False
+        if self.is_not_allowed(cookie.domain):
+            debug("   domain %s is not in user allow-list", cookie.domain)
+            return False
+        if not self.set_ok_countrycode_domain(cookie, request):
+            debug("   country-code second level domain %s", cookie.domain)
+            return False
+        if cookie.domain_specified:
+            req_host, erhn = eff_request_host(request)
+            domain = cookie.domain
+            if domain.startswith("."):
+                undotted_domain = domain[1:]
+            else:
+                undotted_domain = domain
+            embedded_dots = (undotted_domain.find(".") >= 0)
+            if not embedded_dots and domain != ".local":
+                debug("   non-local domain %s contains no embedded dot",
+                      domain)
+                return False
+            if cookie.version == 0:
+                if (not erhn.endswith(domain) and
+                    (not erhn.startswith(".") and
+                     not ("."+erhn).endswith(domain))):
+                    debug("   effective request-host %s (even with added "
+                          "initial dot) does not end end with %s",
+                          erhn, domain)
+                    return False
+            if (cookie.version > 0 or
+                (self.strict_ns_domain & self.DomainRFC2965Match)):
+                if not domain_match(erhn, domain):
+                    debug("   effective request-host %s does not domain-match "
+                          "%s", erhn, domain)
+                    return False
+            if (cookie.version > 0 or
+                (self.strict_ns_domain & self.DomainStrictNoDots)):
+                host_prefix = req_host[:-len(domain)]
+                if (host_prefix.find(".") >= 0 and
+                    not IPV4_RE.search(req_host)):
+                    debug("   host prefix %s for domain %s contains a dot",
+                          host_prefix, domain)
+                    return False
+        return True
+
+    def set_ok_port(self, cookie, request):
+        if cookie.port_specified:
+            req_port = request_port(request)
+            if req_port is None:
+                req_port = "80"
+            else:
+                req_port = str(req_port)
+            for p in cookie.port.split(","):
+                try:
+                    int(p)
+                except ValueError:
+                    debug("   bad port %s (not numeric)", p)
+                    return False
+                if p == req_port:
+                    break
+            else:
+                debug("   request port (%s) not found in %s",
+                      req_port, cookie.port)
+                return False
+        return True
+
+    def return_ok(self, cookie, request):
+        """
+        If you override return_ok, be sure to call this method.  If it returns
+        false, so should your subclass (assuming your subclass wants to be more
+        strict about which cookies to return).
+
+        """
+        # Path has already been checked by path_return_ok, and domain blocking
+        # done by domain_return_ok.
+        debug(" - checking cookie %s", cookie)
+
+        for n in "version", "verifiability", "secure", "expires", "port", "domain":
+            fn_name = "return_ok_"+n
+            fn = getattr(self, fn_name)
+            if not fn(cookie, request):
+                return False
+        return True
+
+    def return_ok_version(self, cookie, request):
+        if cookie.version > 0 and not self.rfc2965:
+            debug("   RFC 2965 cookies are switched off")
+            return False
+        elif cookie.version == 0 and not self.netscape:
+            debug("   Netscape cookies are switched off")
+            return False
+        return True
+
+    def return_ok_verifiability(self, cookie, request):
+        if request.unverifiable and is_third_party(request):
+            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
+                debug("   third-party RFC 2965 cookie during unverifiable "
+                      "transaction")
+                return False
+            elif cookie.version == 0 and self.strict_ns_unverifiable:
+                debug("   third-party Netscape cookie during unverifiable "
+                      "transaction")
+                return False
+        return True
+
+    def return_ok_secure(self, cookie, request):
+        if cookie.secure and request.get_type() != "https":
+            debug("   secure cookie with non-secure request")
+            return False
+        return True
+
+    def return_ok_expires(self, cookie, request):
+        if cookie.is_expired(self._now):
+            debug("   cookie expired")
+            return False
+        return True
+
+    def return_ok_port(self, cookie, request):
+        if cookie.port:
+            req_port = request_port(request)
+            if req_port is None:
+                req_port = "80"
+            for p in cookie.port.split(","):
+                if p == req_port:
+                    break
+            else:
+                debug("   request port %s does not match cookie port %s",
+                      req_port, cookie.port)
+                return False
+        return True
+
+    def return_ok_domain(self, cookie, request):
+        req_host, erhn = eff_request_host(request)
+        domain = cookie.domain
+
+        # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
+        if (cookie.version == 0 and
+            (self.strict_ns_domain & self.DomainStrictNonDomain) and
+            not cookie.domain_specified and domain != erhn):
+            debug("   cookie with unspecified domain does not string-compare "
+                  "equal to request domain")
+            return False
+
+        if cookie.version > 0 and not domain_match(erhn, domain):
+            debug("   effective request-host name %s does not domain-match "
+                  "RFC 2965 cookie domain %s", erhn, domain)
+            return False
+        if cookie.version == 0 and not ("."+erhn).endswith(domain):
+            debug("   request-host %s does not match Netscape cookie domain "
+                  "%s", req_host, domain)
+            return False
+        return True
+
+    def domain_return_ok(self, domain, request):
+        # Liberal check of domain.  This is here as an optimization to avoid
+        # having to load lots of MSIE cookie files unless necessary.
+
+        # Munge req_host and erhn to always start with a dot, so as to err on
+        # the side of letting cookies through.
+        dotted_req_host, dotted_erhn = eff_request_host(request)
+        if not dotted_req_host.startswith("."):
+            dotted_req_host = "."+dotted_req_host
+        if not dotted_erhn.startswith("."):
+            dotted_erhn = "."+dotted_erhn
+        if not (dotted_req_host.endswith(domain) or
+                dotted_erhn.endswith(domain)):
+            #debug("   request domain %s does not match cookie domain %s",
+            #      req_host, domain)
+            return False
+
+        if self.is_blocked(domain):
+            debug("   domain %s is in user block-list", domain)
+            return False
+        if self.is_not_allowed(domain):
+            debug("   domain %s is not in user allow-list", domain)
+            return False
+
+        return True
+
+    def path_return_ok(self, path, request):
+        debug("- checking cookie path=%s", path)
+        req_path = request_path(request)
+        if not req_path.startswith(path):
+            debug("  %s does not path-match %s", req_path, path)
+            return False
+        return True
+
+
+def vals_sorted_by_key(adict):
+    keys = adict.keys()
+    keys.sort()
+    return map(adict.get, keys)
+
+class MappingIterator:
+    """Iterates over nested mapping, depth-first, in sorted order by key."""
+    def __init__(self, mapping):
+        self._s = [(vals_sorted_by_key(mapping), 0, None)]  # LIFO stack
+
+    def __iter__(self): return self
+
+    def next(self):
+        # this is hairy because of lack of generators
+        while 1:
+            try:
+                vals, i, prev_item = self._s.pop()
+            except IndexError:
+                raise StopIteration()
+            if i < len(vals):
+                item = vals[i]
+                i = i + 1
+                self._s.append((vals, i, prev_item))
+                try:
+                    item.items
+                except AttributeError:
+                    # non-mapping
+                    break
+                else:
+                    # mapping
+                    self._s.append((vals_sorted_by_key(item), 0, item))
+                    continue
+        return item
+
+
+# Used as second parameter to dict.get method, to distinguish absent
+# dict key from one with a None value.
+class Absent: pass
+
+class CookieJar:
+    """Collection of HTTP cookies.
+
+    You may not need to know about this class: try mechanize.urlopen().
+
+    The major methods are extract_cookies and add_cookie_header; these are all
+    you are likely to need.
+
+    CookieJar supports the iterator protocol:
+
+    for cookie in cookiejar:
+        # do something with cookie
+
+    Methods:
+
+    add_cookie_header(request)
+    extract_cookies(response, request)
+    make_cookies(response, request)
+    set_cookie_if_ok(cookie, request)
+    set_cookie(cookie)
+    clear_session_cookies()
+    clear_expired_cookies()
+    clear(domain=None, path=None, name=None)
+
+    Public attributes
+
+    policy: CookiePolicy object
+
+    """
+
+    non_word_re = re.compile(r"\W")
+    quote_re = re.compile(r"([\"\\])")
+    strict_domain_re = re.compile(r"\.?[^.]*")
+    domain_re = re.compile(r"[^.]*")
+    dots_re = re.compile(r"^\.+")
+
+    def __init__(self, policy=None):
+        """
+        See CookieJar.__doc__ for argument documentation.
+
+        """
+        if policy is None:
+            policy = DefaultCookiePolicy()
+        self._policy = policy
+
+        self._cookies = {}
+
+        # for __getitem__ iteration in pre-2.2 Pythons
+        self._prev_getitem_index = 0
+
+    def set_policy(self, policy):
+        self._policy = policy
+
+    def _cookies_for_domain(self, domain, request):
+        cookies = []
+        if not self._policy.domain_return_ok(domain, request):
+            return []
+        debug("Checking %s for cookies to return", domain)
+        cookies_by_path = self._cookies[domain]
+        for path in cookies_by_path.keys():
+            if not self._policy.path_return_ok(path, request):
+                continue
+            cookies_by_name = cookies_by_path[path]
+            for cookie in cookies_by_name.values():
+                if not self._policy.return_ok(cookie, request):
+                    debug("   not returning cookie")
+                    continue
+                debug("   it's a match")
+                cookies.append(cookie)
+        return cookies
+
+    def _cookies_for_request(self, request):
+        """Return a list of cookies to be returned to server."""
+        cookies = []
+        for domain in self._cookies.keys():
+            cookies.extend(self._cookies_for_domain(domain, request))
+        return cookies
+
+    def _cookie_attrs(self, cookies):
+        """Return a list of cookie-attributes to be returned to server.
+
+        like ['foo="bar"; $Path="/"', ...]
+
+        The $Version attribute is also added when appropriate (currently only
+        once per request).
+
+        """
+        # add cookies in order of most specific (ie. longest) path first
+        def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
+        cookies.sort(decreasing_size)
+
+        version_set = False
+
+        attrs = []
+        for cookie in cookies:
+            # set version of Cookie header
+            # XXX
+            # What should it be if multiple matching Set-Cookie headers have
+            #  different versions themselves?
+            # Answer: there is no answer; was supposed to be settled by
+            #  RFC 2965 errata, but that may never appear...
+            version = cookie.version
+            if not version_set:
+                version_set = True
+                if version > 0:
+                    attrs.append("$Version=%s" % version)
+
+            # quote cookie value if necessary
+            # (not for Netscape protocol, which already has any quotes
+            #  intact, due to the poorly-specified Netscape Cookie: syntax)
+            if ((cookie.value is not None) and
+                self.non_word_re.search(cookie.value) and version > 0):
+                value = self.quote_re.sub(r"\\\1", cookie.value)
+            else:
+                value = cookie.value
+
+            # add cookie-attributes to be returned in Cookie header
+            if cookie.value is None:
+                attrs.append(cookie.name)
+            else:
+                attrs.append("%s=%s" % (cookie.name, value))
+            if version > 0:
+                if cookie.path_specified:
+                    attrs.append('$Path="%s"' % cookie.path)
+                if cookie.domain.startswith("."):
+                    domain = cookie.domain
+                    if (not cookie.domain_initial_dot and
+                        domain.startswith(".")):
+                        domain = domain[1:]
+                    attrs.append('$Domain="%s"' % domain)
+                if cookie.port is not None:
+                    p = "$Port"
+                    if cookie.port_specified:
+                        p = p + ('="%s"' % cookie.port)
+                    attrs.append(p)
+
+        return attrs
+
+    def add_cookie_header(self, request):
+        """Add correct Cookie: header to request (urllib2.Request object).
+
+        The Cookie2 header is also added unless policy.hide_cookie2 is true.
+
+        The request object (usually a urllib2.Request instance) must support
+        the methods get_full_url, get_host, get_type, has_header, get_header,
+        header_items and add_unredirected_header, as documented by urllib2, and
+        the port attribute (the port number).  Actually,
+        RequestUpgradeProcessor will automatically upgrade your Request object
+        to one with has_header, get_header, header_items and
+        add_unredirected_header, if it lacks those methods, for compatibility
+        with pre-2.4 versions of urllib2.
+
+        """
+        debug("add_cookie_header")
+        self._policy._now = self._now = int(time.time())
+
+        req_host, erhn = eff_request_host(request)
+        strict_non_domain = (
+            self._policy.strict_ns_domain & self._policy.DomainStrictNonDomain)
+
+        cookies = self._cookies_for_request(request)
+
+        attrs = self._cookie_attrs(cookies)
+        if attrs:
+            if not request.has_header("Cookie"):
+                request.add_unredirected_header("Cookie", "; ".join(attrs))
+
+        # if necessary, advertise that we know RFC 2965
+        if self._policy.rfc2965 and not self._policy.hide_cookie2:
+            for cookie in cookies:
+                if cookie.version != 1 and not request.has_header("Cookie2"):
+                    request.add_unredirected_header("Cookie2", '$Version="1"')
+                    break
+
+        self.clear_expired_cookies()
+
+    def _normalized_cookie_tuples(self, attrs_set):
+        """Return list of tuples containing normalised cookie information.
+
+        attrs_set is the list of lists of key,value pairs extracted from
+        the Set-Cookie or Set-Cookie2 headers.
+
+        Tuples are name, value, standard, rest, where name and value are the
+        cookie name and value, standard is a dictionary containing the standard
+        cookie-attributes (discard, secure, version, expires or max-age,
+        domain, path and port) and rest is a dictionary containing the rest of
+        the cookie-attributes.
+
+        """
+        cookie_tuples = []
+
+        boolean_attrs = "discard", "secure"
+        value_attrs = ("version",
+                       "expires", "max-age",
+                       "domain", "path", "port",
+                       "comment", "commenturl")
+
+        for cookie_attrs in attrs_set:
+            name, value = cookie_attrs[0]
+
+            # Build dictionary of standard cookie-attributes (standard) and
+            # dictionary of other cookie-attributes (rest).
+
+            # Note: expiry time is normalised to seconds since epoch.  V0
+            # cookies should have the Expires cookie-attribute, and V1 cookies
+            # should have Max-Age, but since V1 includes RFC 2109 cookies (and
+            # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
+            # accept either (but prefer Max-Age).
+            max_age_set = False
+
+            bad_cookie = False
+
+            standard = {}
+            rest = {}
+            for k, v in cookie_attrs[1:]:
+                lc = k.lower()
+                # don't lose case distinction for unknown fields
+                if lc in value_attrs or lc in boolean_attrs:
+                    k = lc
+                if k in boolean_attrs and v is None:
+                    # boolean cookie-attribute is present, but has no value
+                    # (like "discard", rather than "port=80")
+                    v = True
+                if standard.has_key(k):
+                    # only first value is significant
+                    continue
+                if k == "domain":
+                    if v is None:
+                        debug("   missing value for domain attribute")
+                        bad_cookie = True
+                        break
+                    # RFC 2965 section 3.3.3
+                    v = v.lower()
+                if k == "expires":
+                    if max_age_set:
+                        # Prefer max-age to expires (like Mozilla)
+                        continue
+                    if v is None:
+                        debug("   missing or invalid value for expires "
+                              "attribute: treating as session cookie")
+                        continue
+                if k == "max-age":
+                    max_age_set = True
+                    try:
+                        v = int(v)
+                    except ValueError:
+                        debug("   missing or invalid (non-numeric) value for "
+                              "max-age attribute")
+                        bad_cookie = True
+                        break
+                    # convert RFC 2965 Max-Age to seconds since epoch
+                    # XXX Strictly you're supposed to follow RFC 2616
+                    #   age-calculation rules.  Remember that zero Max-Age is a
+                    #   is a request to discard (old and new) cookie, though.
+                    k = "expires"
+                    v = self._now + v
+                if (k in value_attrs) or (k in boolean_attrs):
+                    if (v is None and
+                        k not in ["port", "comment", "commenturl"]):
+                        debug("   missing value for %s attribute" % k)
+                        bad_cookie = True
+                        break
+                    standard[k] = v
+                else:
+                    rest[k] = v
+
+            if bad_cookie:
+                continue
+
+            cookie_tuples.append((name, value, standard, rest))
+
+        return cookie_tuples
+
+    def _cookie_from_cookie_tuple(self, tup, request):
+        # standard is dict of standard cookie-attributes, rest is dict of the
+        # rest of them
+        name, value, standard, rest = tup
+
+        domain = standard.get("domain", Absent)
+        path = standard.get("path", Absent)
+        port = standard.get("port", Absent)
+        expires = standard.get("expires", Absent)
+
+        # set the easy defaults
+        version = standard.get("version", None)
+        if version is not None: version = int(version)
+        secure = standard.get("secure", False)
+        # (discard is also set if expires is Absent)
+        discard = standard.get("discard", False)
+        comment = standard.get("comment", None)
+        comment_url = standard.get("commenturl", None)
+
+        # set default path
+        if path is not Absent and path != "":
+            path_specified = True
+            path = escape_path(path)
+        else:
+            path_specified = False
+            path = request_path(request)
+            i = path.rfind("/")
+            if i != -1:
+                if version == 0:
+                    # Netscape spec parts company from reality here
+                    path = path[:i]
+                else:
+                    path = path[:i+1]
+            if len(path) == 0: path = "/"
+
+        # set default domain
+        domain_specified = domain is not Absent
+        # but first we have to remember whether it starts with a dot
+        domain_initial_dot = False
+        if domain_specified:
+            domain_initial_dot = bool(domain.startswith("."))
+        if domain is Absent:
+            req_host, erhn = eff_request_host(request)
+            domain = erhn
+        elif not domain.startswith("."):
+            domain = "."+domain
+
+        # set default port
+        port_specified = False
+        if port is not Absent:
+            if port is None:
+                # Port attr present, but has no value: default to request port.
+                # Cookie should then only be sent back on that port.
+                port = request_port(request)
+            else:
+                port_specified = True
+                port = re.sub(r"\s+", "", port)
+        else:
+            # No port attr present.  Cookie can be sent back on any port.
+            port = None
+
+        # set default expires and discard
+        if expires is Absent:
+            expires = None
+            discard = True
+        elif expires <= self._now:
+            # Expiry date in past is request to delete cookie.  This can't be
+            # in DefaultCookiePolicy, because can't delete cookies there.
+            try:
+                self.clear(domain, path, name)
+            except KeyError:
+                pass
+            debug("Expiring cookie, domain='%s', path='%s', name='%s'",
+                  domain, path, name)
+            return None
+
+        return Cookie(version,
+                      name, value,
+                      port, port_specified,
+                      domain, domain_specified, domain_initial_dot,
+                      path, path_specified,
+                      secure,
+                      expires,
+                      discard,
+                      comment,
+                      comment_url,
+                      rest)
+
+    def _cookies_from_attrs_set(self, attrs_set, request):
+        cookie_tuples = self._normalized_cookie_tuples(attrs_set)
+
+        cookies = []
+        for tup in cookie_tuples:
+            cookie = self._cookie_from_cookie_tuple(tup, request)
+            if cookie: cookies.append(cookie)
+        return cookies
+
+    def _process_rfc2109_cookies(self, cookies):
+        if self._policy.rfc2109_as_netscape is None:
+            rfc2109_as_netscape = not self._policy.rfc2965
+        else:
+            rfc2109_as_netscape = self._policy.rfc2109_as_netscape
+        for cookie in cookies:
+            if cookie.version == 1:
+                cookie.rfc2109 = True
+                if rfc2109_as_netscape: 
+                    # treat 2109 cookies as Netscape cookies rather than
+                    # as RFC2965 cookies
+                    cookie.version = 0
+
+    def make_cookies(self, response, request):
+        """Return sequence of Cookie objects extracted from response object.
+
+        See extract_cookies.__doc__ for the interfaces required of the
+        response and request arguments.
+
+        """
+        # get cookie-attributes for RFC 2965 and Netscape protocols
+        headers = response.info()
+        rfc2965_hdrs = headers.getheaders("Set-Cookie2")
+        ns_hdrs = headers.getheaders("Set-Cookie")
+
+        rfc2965 = self._policy.rfc2965
+        netscape = self._policy.netscape
+
+        if ((not rfc2965_hdrs and not ns_hdrs) or
+            (not ns_hdrs and not rfc2965) or
+            (not rfc2965_hdrs and not netscape) or
+            (not netscape and not rfc2965)):
+            return []  # no relevant cookie headers: quick exit
+
+        try:
+            cookies = self._cookies_from_attrs_set(
+                split_header_words(rfc2965_hdrs), request)
+        except:
+            reraise_unmasked_exceptions()
+            cookies = []
+
+        if ns_hdrs and netscape:
+            try:
+                # RFC 2109 and Netscape cookies
+                ns_cookies = self._cookies_from_attrs_set(
+                    parse_ns_headers(ns_hdrs), request)
+            except:
+                reraise_unmasked_exceptions()
+                ns_cookies = []
+            self._process_rfc2109_cookies(ns_cookies)
+
+            # Look for Netscape cookies (from Set-Cookie headers) that match
+            # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
+            # For each match, keep the RFC 2965 cookie and ignore the Netscape
+            # cookie (RFC 2965 section 9.1).  Actually, RFC 2109 cookies are
+            # bundled in with the Netscape cookies for this purpose, which is
+            # reasonable behaviour.
+            if rfc2965:
+                lookup = {}
+                for cookie in cookies:
+                    lookup[(cookie.domain, cookie.path, cookie.name)] = None
+
+                def no_matching_rfc2965(ns_cookie, lookup=lookup):
+                    key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
+                    return not lookup.has_key(key)
+                ns_cookies = filter(no_matching_rfc2965, ns_cookies)
+
+            if ns_cookies:
+                cookies.extend(ns_cookies)
+
+        return cookies
+
+    def set_cookie_if_ok(self, cookie, request):
+        """Set a cookie if policy says it's OK to do so.
+
+        cookie: mechanize.Cookie instance
+        request: see extract_cookies.__doc__ for the required interface
+
+        """
+        self._policy._now = self._now = int(time.time())
+
+        if self._policy.set_ok(cookie, request):
+            self.set_cookie(cookie)
+
+    def set_cookie(self, cookie):
+        """Set a cookie, without checking whether or not it should be set.
+
+        cookie: mechanize.Cookie instance
+        """
+        c = self._cookies
+        if not c.has_key(cookie.domain): c[cookie.domain] = {}
+        c2 = c[cookie.domain]
+        if not c2.has_key(cookie.path): c2[cookie.path] = {}
+        c3 = c2[cookie.path]
+        c3[cookie.name] = cookie
+
+    def extract_cookies(self, response, request):
+        """Extract cookies from response, where allowable given the request.
+
+        Look for allowable Set-Cookie: and Set-Cookie2: headers in the response
+        object passed as argument.  Any of these headers that are found are
+        used to update the state of the object (subject to the policy.set_ok
+        method's approval).
+
+        The response object (usually be the result of a call to
+        mechanize.urlopen, or similar) should support an info method, which
+        returns a mimetools.Message object (in fact, the 'mimetools.Message
+        object' may be any object that provides a getallmatchingheaders
+        method).
+
+        The request object (usually a urllib2.Request instance) must support
+        the methods get_full_url and get_host, as documented by urllib2, and
+        the port attribute (the port number).  The request is used to set
+        default values for cookie-attributes as well as for checking that the
+        cookie is OK to be set.
+
+        """
+        debug("extract_cookies: %s", response.info())
+        self._policy._now = self._now = int(time.time())
+
+        for cookie in self.make_cookies(response, request):
+            if self._policy.set_ok(cookie, request):
+                debug(" setting cookie: %s", cookie)
+                self.set_cookie(cookie)
+
+    def clear(self, domain=None, path=None, name=None):
+        """Clear some cookies.
+
+        Invoking this method without arguments will clear all cookies.  If
+        given a single argument, only cookies belonging to that domain will be
+        removed.  If given two arguments, cookies belonging to the specified
+        path within that domain are removed.  If given three arguments, then
+        the cookie with the specified name, path and domain is removed.
+
+        Raises KeyError if no matching cookie exists.
+
+        """
+        if name is not None:
+            if (domain is None) or (path is None):
+                raise ValueError(
+                    "domain and path must be given to remove a cookie by name")
+            del self._cookies[domain][path][name]
+        elif path is not None:
+            if domain is None:
+                raise ValueError(
+                    "domain must be given to remove cookies by path")
+            del self._cookies[domain][path]
+        elif domain is not None:
+            del self._cookies[domain]
+        else:
+            self._cookies = {}
+
+    def clear_session_cookies(self):
+        """Discard all session cookies.
+
+        Discards all cookies held by object which had either no Max-Age or
+        Expires cookie-attribute or an explicit Discard cookie-attribute, or
+        which otherwise have ended up with a true discard attribute.  For
+        interactive browsers, the end of a session usually corresponds to
+        closing the browser window.
+
+        Note that the save method won't save session cookies anyway, unless you
+        ask otherwise by passing a true ignore_discard argument.
+
+        """
+        for cookie in self:
+            if cookie.discard:
+                self.clear(cookie.domain, cookie.path, cookie.name)
+
+    def clear_expired_cookies(self):
+        """Discard all expired cookies.
+
+        You probably don't need to call this method: expired cookies are never
+        sent back to the server (provided you're using DefaultCookiePolicy),
+        this method is called by CookieJar itself every so often, and the save
+        method won't save expired cookies anyway (unless you ask otherwise by
+        passing a true ignore_expires argument).
+
+        """
+        now = time.time()
+        for cookie in self:
+            if cookie.is_expired(now):
+                self.clear(cookie.domain, cookie.path, cookie.name)
+
+    def __getitem__(self, i):
+        if i == 0:
+            self._getitem_iterator = self.__iter__()
+        elif self._prev_getitem_index != i-1: raise IndexError(
+            "CookieJar.__getitem__ only supports sequential iteration")
+        self._prev_getitem_index = i
+        try:
+            return self._getitem_iterator.next()
+        except StopIteration:
+            raise IndexError()
+
+    def __iter__(self):
+        return MappingIterator(self._cookies)
+
+    def __len__(self):
+        """Return number of contained cookies."""
+        i = 0
+        for cookie in self: i = i + 1
+        return i
+
+    def __repr__(self):
+        r = []
+        for cookie in self: r.append(repr(cookie))
+        return "<%s[%s]>" % (self.__class__, ", ".join(r))
+
+    def __str__(self):
+        r = []
+        for cookie in self: r.append(str(cookie))
+        return "<%s[%s]>" % (self.__class__, ", ".join(r))
+
+
+class LoadError(Exception): pass
+
+class FileCookieJar(CookieJar):
+    """CookieJar that can be loaded from and saved to a file.
+
+    Additional methods
+
+    save(filename=None, ignore_discard=False, ignore_expires=False)
+    load(filename=None, ignore_discard=False, ignore_expires=False)
+    revert(filename=None, ignore_discard=False, ignore_expires=False)
+
+    Additional public attributes
+
+    filename: filename for loading and saving cookies
+
+    Additional public readable attributes
+
+    delayload: request that cookies are lazily loaded from disk; this is only
+     a hint since this only affects performance, not behaviour (unless the
+     cookies on disk are changing); a CookieJar object may ignore it (in fact,
+     only MSIECookieJar lazily loads cookies at the moment)
+
+    """
+
+    def __init__(self, filename=None, delayload=False, policy=None):
+        """
+        See FileCookieJar.__doc__ for argument documentation.
+
+        Cookies are NOT loaded from the named file until either the load or
+        revert method is called.
+
+        """
+        CookieJar.__init__(self, policy)
+        if filename is not None and not isstringlike(filename):
+            raise ValueError("filename must be string-like")
+        self.filename = filename
+        self.delayload = bool(delayload)
+
+    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+        """Save cookies to a file.
+
+        filename: name of file in which to save cookies
+        ignore_discard: save even cookies set to be discarded
+        ignore_expires: save even cookies that have expired
+
+        The file is overwritten if it already exists, thus wiping all its
+        cookies.  Saved cookies can be restored later using the load or revert
+        methods.  If filename is not specified, self.filename is used; if
+        self.filename is None, ValueError is raised.
+
+        """
+        raise NotImplementedError()
+
+    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+        """Load cookies from a file.
+
+        Old cookies are kept unless overwritten by newly loaded ones.
+
+        Arguments are as for .save().
+
+        If filename is not specified, self.filename is used; if self.filename
+        is None, ValueError is raised.  The named file must be in the format
+        understood by the class, or LoadError will be raised.  This format will
+        be identical to that written by the save method, unless the load format
+        is not sufficiently well understood (as is the case for MSIECookieJar).
+
+        """
+        if filename is None:
+            if self.filename is not None: filename = self.filename
+            else: raise ValueError(MISSING_FILENAME_TEXT)
+
+        f = open(filename)
+        try:
+            self._really_load(f, filename, ignore_discard, ignore_expires)
+        finally:
+            f.close()
+
+    def revert(self, filename=None,
+               ignore_discard=False, ignore_expires=False):
+        """Clear all cookies and reload cookies from a saved file.
+
+        Raises LoadError (or IOError) if reversion is not successful; the
+        object's state will not be altered if this happens.
+
+        """
+        if filename is None:
+            if self.filename is not None: filename = self.filename
+            else: raise ValueError(MISSING_FILENAME_TEXT)
+
+        old_state = copy.deepcopy(self._cookies)
+        self._cookies = {}
+        try:
+            self.load(filename, ignore_discard, ignore_expires)
+        except (LoadError, IOError):
+            self._cookies = old_state
+            raise
diff --git a/src/calibre/utils/mechanize/_debug.py b/src/calibre/utils/mechanize/_debug.py
new file mode 100644
index 0000000000..596b11477e
--- /dev/null
+++ b/src/calibre/utils/mechanize/_debug.py
@@ -0,0 +1,28 @@
+import logging
+
+from urllib2 import BaseHandler
+from _response import response_seek_wrapper
+
+
+class HTTPResponseDebugProcessor(BaseHandler):
+    handler_order = 900  # before redirections, after everything else
+
+    def http_response(self, request, response):
+        if not hasattr(response, "seek"):
+            response = response_seek_wrapper(response)
+        info = logging.getLogger("mechanize.http_responses").info
+        try:
+            info(response.read())
+        finally:
+            response.seek(0)
+        info("*****************************************************")
+        return response
+
+    https_response = http_response
+
+class HTTPRedirectDebugProcessor(BaseHandler):
+    def http_request(self, request):
+        if hasattr(request, "redirect_dict"):
+            info = logging.getLogger("mechanize.http_redirects").info
+            info("redirecting to %s", request.get_full_url())
+        return request
diff --git a/src/calibre/utils/mechanize/_gzip.py b/src/calibre/utils/mechanize/_gzip.py
new file mode 100644
index 0000000000..46a98a3858
--- /dev/null
+++ b/src/calibre/utils/mechanize/_gzip.py
@@ -0,0 +1,103 @@
+import urllib2
+from cStringIO import StringIO
+import _response
+
+# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library
+class GzipConsumer:
+
+    def __init__(self, consumer):
+        self.__consumer = consumer
+        self.__decoder = None
+        self.__data = ""
+
+    def __getattr__(self, key):
+        return getattr(self.__consumer, key)
+
+    def feed(self, data):
+        if self.__decoder is None:
+            # check if we have a full gzip header
+            data = self.__data + data
+            try:
+                i = 10
+                flag = ord(data[3])
+                if flag & 4: # extra
+                    x = ord(data[i]) + 256*ord(data[i+1])
+                    i = i + 2 + x
+                if flag & 8: # filename
+                    while ord(data[i]):
+                        i = i + 1
+                    i = i + 1
+                if flag & 16: # comment
+                    while ord(data[i]):
+                        i = i + 1
+                    i = i + 1
+                if flag & 2: # crc
+                    i = i + 2
+                if len(data) < i:
+                    raise IndexError("not enough data")
+                if data[:3] != "\x1f\x8b\x08":
+                    raise IOError("invalid gzip data")
+                data = data[i:]
+            except IndexError:
+                self.__data = data
+                return # need more data
+            import zlib
+            self.__data = ""
+            self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
+        data = self.__decoder.decompress(data)
+        if data:
+            self.__consumer.feed(data)
+
+    def close(self):
+        if self.__decoder:
+            data = self.__decoder.flush()
+            if data:
+                self.__consumer.feed(data)
+        self.__consumer.close()
+
+
+# --------------------------------------------------------------------
+
+# the rest of this module is John Lee's stupid code, not
+# Fredrik's nice code :-)
+
+class stupid_gzip_consumer:
+    def __init__(self): self.data = []
+    def feed(self, data): self.data.append(data)
+
+class stupid_gzip_wrapper(_response.closeable_response):
+    def __init__(self, response):
+        self._response = response
+
+        c = stupid_gzip_consumer()
+        gzc = GzipConsumer(c)
+        gzc.feed(response.read())
+        self.__data = StringIO("".join(c.data))
+
+    def read(self, size=-1):
+        return self.__data.read(size)
+    def readline(self, size=-1):
+        return self.__data.readline(size)
+    def readlines(self, sizehint=-1):
+        return self.__data.readlines(size)
+
+    def __getattr__(self, name):
+        # delegate unknown methods/attributes
+        return getattr(self._response, name)
+
+class HTTPGzipProcessor(urllib2.BaseHandler):
+    handler_order = 200  # response processing before HTTPEquivProcessor
+
+    def http_request(self, request):
+        request.add_header("Accept-Encoding", "gzip")
+        return request
+
+    def http_response(self, request, response):
+        # post-process response
+        enc_hdrs = response.info().getheaders("Content-encoding")
+        for enc_hdr in enc_hdrs:
+            if ("gzip" in enc_hdr) or ("compress" in enc_hdr):
+                return stupid_gzip_wrapper(response)
+        return response
+
+    https_response = http_response
diff --git a/src/calibre/utils/mechanize/_headersutil.py b/src/calibre/utils/mechanize/_headersutil.py
new file mode 100644
index 0000000000..d8fe47a0e7
--- /dev/null
+++ b/src/calibre/utils/mechanize/_headersutil.py
@@ -0,0 +1,226 @@
+"""Utility functions for HTTP header value parsing and construction.
+
+Copyright 1997-1998, Gisle Aas
+Copyright 2002-2006, John J. Lee
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import os, re
+from types import StringType
+from types import UnicodeType
+STRING_TYPES = StringType, UnicodeType
+
+from _util import http2time
+import _rfc3986
+
+def is_html(ct_headers, url, allow_xhtml=False):
+    """
+    ct_headers: Sequence of Content-Type headers
+    url: Response URL
+
+    """
+    if not ct_headers:
+        # guess
+        ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
+        html_exts = [".htm", ".html"]
+        if allow_xhtml:
+            html_exts += [".xhtml"]
+        return ext in html_exts
+    # use first header
+    ct = split_header_words(ct_headers)[0][0][0]
+    html_types = ["text/html"]
+    if allow_xhtml:
+        html_types += [
+            "text/xhtml", "text/xml",
+            "application/xml", "application/xhtml+xml",
+            ]
+    return ct in html_types
+
+def unmatched(match):
+    """Return unmatched part of re.Match object."""
+    start, end = match.span(0)
+    return match.string[:start]+match.string[end:]
+
+token_re =        re.compile(r"^\s*([^=\s;,]+)")
+quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
+value_re =        re.compile(r"^\s*=\s*([^\s;,]*)")
+escape_re = re.compile(r"\\(.)")
+def split_header_words(header_values):
+    r"""Parse header values into a list of lists containing key,value pairs.
+
+    The function knows how to deal with ",", ";" and "=" as well as quoted
+    values after "=".  A list of space separated tokens are parsed as if they
+    were separated by ";".
+
+    If the header_values passed as argument contains multiple values, then they
+    are treated as if they were a single value separated by comma ",".
+
+    This means that this function is useful for parsing header fields that
+    follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
+    the requirement for tokens).
+
+      headers           = #header
+      header            = (token | parameter) *( [";"] (token | parameter))
+
+      token             = 1*<any CHAR except CTLs or separators>
+      separators        = "(" | ")" | "<" | ">" | "@"
+                        | "," | ";" | ":" | "\" | <">
+                        | "/" | "[" | "]" | "?" | "="
+                        | "{" | "}" | SP | HT
+
+      quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
+      qdtext            = <any TEXT except <">>
+      quoted-pair       = "\" CHAR
+
+      parameter         = attribute "=" value
+      attribute         = token
+      value             = token | quoted-string
+
+    Each header is represented by a list of key/value pairs.  The value for a
+    simple token (not part of a parameter) is None.  Syntactically incorrect
+    headers will not necessarily be parsed as you would want.
+
+    This is easier to describe with some examples:
+
+    >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
+    [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
+    >>> split_header_words(['text/html; charset="iso-8859-1"'])
+    [[('text/html', None), ('charset', 'iso-8859-1')]]
+    >>> split_header_words([r'Basic realm="\"foo\bar\""'])
+    [[('Basic', None), ('realm', '"foobar"')]]
+
+    """
+    assert type(header_values) not in STRING_TYPES
+    result = []
+    for text in header_values:
+        orig_text = text
+        pairs = []
+        while text:
+            m = token_re.search(text)
+            if m:
+                text = unmatched(m)
+                name = m.group(1)
+                m = quoted_value_re.search(text)
+                if m:  # quoted value
+                    text = unmatched(m)
+                    value = m.group(1)
+                    value = escape_re.sub(r"\1", value)
+                else:
+                    m = value_re.search(text)
+                    if m:  # unquoted value
+                        text = unmatched(m)
+                        value = m.group(1)
+                        value = value.rstrip()
+                    else:
+                        # no value, a lone token
+                        value = None
+                pairs.append((name, value))
+            elif text.lstrip().startswith(","):
+                # concatenated headers, as per RFC 2616 section 4.2
+                text = text.lstrip()[1:]
+                if pairs: result.append(pairs)
+                pairs = []
+            else:
+                # skip junk
+                non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
+                assert nr_junk_chars > 0, (
+                    "split_header_words bug: '%s', '%s', %s" %
+                    (orig_text, text, pairs))
+                text = non_junk
+        if pairs: result.append(pairs)
+    return result
+
+join_escape_re = re.compile(r"([\"\\])")
+def join_header_words(lists):
+    """Do the inverse of the conversion done by split_header_words.
+
+    Takes a list of lists of (key, value) pairs and produces a single header
+    value.  Attribute values are quoted if needed.
+
+    >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
+    'text/plain; charset="iso-8859/1"'
+    >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
+    'text/plain, charset="iso-8859/1"'
+
+    """
+    headers = []
+    for pairs in lists:
+        attr = []
+        for k, v in pairs:
+            if v is not None:
+                if not re.search(r"^\w+$", v):
+                    v = join_escape_re.sub(r"\\\1", v)  # escape " and \
+                    v = '"%s"' % v
+                if k is None:  # Netscape cookies may have no name
+                    k = v
+                else:
+                    k = "%s=%s" % (k, v)
+            attr.append(k)
+        if attr: headers.append("; ".join(attr))
+    return ", ".join(headers)
+
+def parse_ns_headers(ns_headers):
+    """Ad-hoc parser for Netscape protocol cookie-attributes.
+
+    The old Netscape cookie format for Set-Cookie can for instance contain
+    an unquoted "," in the expires field, so we have to use this ad-hoc
+    parser instead of split_header_words.
+
+    XXX This may not make the best possible effort to parse all the crap
+    that Netscape Cookie headers contain.  Ronald Tschalar's HTTPClient
+    parser is probably better, so could do worse than following that if
+    this ever gives any trouble.
+
+    Currently, this is also used for parsing RFC 2109 cookies.
+
+    """
+    known_attrs = ("expires", "domain", "path", "secure",
+                   # RFC 2109 attrs (may turn up in Netscape cookies, too)
+                   "port", "max-age")
+
+    result = []
+    for ns_header in ns_headers:
+        pairs = []
+        version_set = False
+        params = re.split(r";\s*", ns_header)
+        for ii in range(len(params)):
+            param = params[ii]
+            param = param.rstrip()
+            if param == "": continue
+            if "=" not in param:
+                k, v = param, None
+            else:
+                k, v = re.split(r"\s*=\s*", param, 1)
+                k = k.lstrip()
+            if ii != 0:
+                lc = k.lower()
+                if lc in known_attrs:
+                    k = lc
+                if k == "version":
+                    # This is an RFC 2109 cookie.
+                    version_set = True
+                if k == "expires":
+                    # convert expires date to seconds since epoch
+                    if v.startswith('"'): v = v[1:]
+                    if v.endswith('"'): v = v[:-1]
+                    v = http2time(v)  # None if invalid
+            pairs.append((k, v))
+
+        if pairs:
+            if not version_set:
+                pairs.append(("version", "0"))
+            result.append(pairs)
+
+    return result
+
+
+def _test():
+   import doctest, _headersutil
+   return doctest.testmod(_headersutil)
+
+if __name__ == "__main__":
+   _test()
diff --git a/src/calibre/utils/mechanize/_html.py b/src/calibre/utils/mechanize/_html.py
new file mode 100644
index 0000000000..2d562c98bf
--- /dev/null
+++ b/src/calibre/utils/mechanize/_html.py
@@ -0,0 +1,607 @@
+"""HTML handling.
+
+Copyright 2003-2006 John J. Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import re, copy, htmlentitydefs
+import sgmllib, HTMLParser, ClientForm
+
+import _request
+from _headersutil import split_header_words, is_html as _is_html
+import _rfc3986
+
+DEFAULT_ENCODING = "latin-1"
+
+
+# the base classe is purely for backwards compatibility
+class ParseError(ClientForm.ParseError): pass
+
+
+class CachingGeneratorFunction(object):
+    """Caching wrapper around a no-arguments iterable."""
+
+    def __init__(self, iterable):
+        self._cache = []
+        # wrap iterable to make it non-restartable (otherwise, repeated
+        # __call__ would give incorrect results)
+        self._iterator = iter(iterable)
+
+    def __call__(self):
+        cache = self._cache
+        for item in cache:
+            yield item
+        for item in self._iterator:
+            cache.append(item)
+            yield item
+
+
+class EncodingFinder:
+    def __init__(self, default_encoding):
+        self._default_encoding = default_encoding
+    def encoding(self, response):
+        # HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV
+        # headers may be in the response.  HTTP-EQUIV headers come last,
+        # so try in order from first to last.
+        for ct in response.info().getheaders("content-type"):
+            for k, v in split_header_words([ct])[0]:
+                if k == "charset":
+                    return v
+        return self._default_encoding
+
+class ResponseTypeFinder:
+    def __init__(self, allow_xhtml):
+        self._allow_xhtml = allow_xhtml
+    def is_html(self, response, encoding):
+        ct_hdrs = response.info().getheaders("content-type")
+        url = response.geturl()
+        # XXX encoding
+        return _is_html(ct_hdrs, url, self._allow_xhtml)
+
+
+# idea for this argument-processing trick is from Peter Otten
+class Args:
+    def __init__(self, args_map):
+        self.dictionary = dict(args_map)
+    def __getattr__(self, key):
+        try:
+            return self.dictionary[key]
+        except KeyError:
+            return getattr(self.__class__, key)
+
+def form_parser_args(
+    select_default=False,
+    form_parser_class=None,
+    request_class=None,
+    backwards_compat=False,
+    ):
+    return Args(locals())
+
+
+class Link:
+    def __init__(self, base_url, url, text, tag, attrs):
+        assert None not in [url, tag, attrs]
+        self.base_url = base_url
+        self.absolute_url = _rfc3986.urljoin(base_url, url)
+        self.url, self.text, self.tag, self.attrs = url, text, tag, attrs
+    def __cmp__(self, other):
+        try:
+            for name in "url", "text", "tag", "attrs":
+                if getattr(self, name) != getattr(other, name):
+                    return -1
+        except AttributeError:
+            return -1
+        return 0
+    def __repr__(self):
+        return "Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)" % (
+            self.base_url, self.url, self.text, self.tag, self.attrs)
+
+
+class LinksFactory:
+
+    def __init__(self,
+                 link_parser_class=None,
+                 link_class=Link,
+                 urltags=None,
+                 ):
+        import _pullparser
+        if link_parser_class is None:
+            link_parser_class = _pullparser.TolerantPullParser
+        self.link_parser_class = link_parser_class
+        self.link_class = link_class
+        if urltags is None:
+            urltags = {
+                "a": "href",
+                "area": "href",
+                "frame": "src",
+                "iframe": "src",
+                }
+        self.urltags = urltags
+        self._response = None
+        self._encoding = None
+
+    def set_response(self, response, base_url, encoding):
+        self._response = response
+        self._encoding = encoding
+        self._base_url = base_url
+
+    def links(self):
+        """Return an iterator that provides links of the document."""
+        response = self._response
+        encoding = self._encoding
+        base_url = self._base_url
+        p = self.link_parser_class(response, encoding=encoding)
+
+        try:
+            for token in p.tags(*(self.urltags.keys()+["base"])):
+                if token.type == "endtag":
+                    continue
+                if token.data == "base":
+                    base_href = dict(token.attrs).get("href")
+                    if base_href is not None:
+                        base_url = base_href
+                    continue
+                attrs = dict(token.attrs)
+                tag = token.data
+                name = attrs.get("name")
+                text = None
+                # XXX use attr_encoding for ref'd doc if that doc does not
+                #  provide one by other means
+                #attr_encoding = attrs.get("charset")
+                url = attrs.get(self.urltags[tag])  # XXX is "" a valid URL?
+                if not url:
+                    # Probably an <A NAME="blah"> link or <AREA NOHREF...>.
+                    # For our purposes a link is something with a URL, so
+                    # ignore this.
+                    continue
+
+                url = _rfc3986.clean_url(url, encoding)
+                if tag == "a":
+                    if token.type != "startendtag":
+                        # hmm, this'd break if end tag is missing
+                        text = p.get_compressed_text(("endtag", tag))
+                    # but this doesn't work for eg.
+                    # <a href="blah"><b>Andy</b></a>
+                    #text = p.get_compressed_text()
+
+                yield Link(base_url, url, text, tag, token.attrs)
+        except sgmllib.SGMLParseError, exc:
+            raise ParseError(exc)
+
+class FormsFactory:
+
+    """Makes a sequence of objects satisfying ClientForm.HTMLForm interface.
+
+    After calling .forms(), the .global_form attribute is a form object
+    containing all controls not a descendant of any FORM element.
+
+    For constructor argument docs, see ClientForm.ParseResponse
+    argument docs.
+
+    """
+
+    def __init__(self,
+                 select_default=False,
+                 form_parser_class=None,
+                 request_class=None,
+                 backwards_compat=False,
+                 ):
+        import ClientForm
+        self.select_default = select_default
+        if form_parser_class is None:
+            form_parser_class = ClientForm.FormParser
+        self.form_parser_class = form_parser_class
+        if request_class is None:
+            request_class = _request.Request
+        self.request_class = request_class
+        self.backwards_compat = backwards_compat
+        self._response = None
+        self.encoding = None
+        self.global_form = None
+
+    def set_response(self, response, encoding):
+        self._response = response
+        self.encoding = encoding
+        self.global_form = None
+
+    def forms(self):
+        import ClientForm
+        encoding = self.encoding
+        try:
+            forms = ClientForm.ParseResponseEx(
+                self._response,
+                select_default=self.select_default,
+                form_parser_class=self.form_parser_class,
+                request_class=self.request_class,
+                encoding=encoding,
+                _urljoin=_rfc3986.urljoin,
+                _urlparse=_rfc3986.urlsplit,
+                _urlunparse=_rfc3986.urlunsplit,
+                )
+        except ClientForm.ParseError, exc:
+            raise ParseError(exc)
+        self.global_form = forms[0]
+        return forms[1:]
+
+class TitleFactory:
+    def __init__(self):
+        self._response = self._encoding = None
+
+    def set_response(self, response, encoding):
+        self._response = response
+        self._encoding = encoding
+
+    def title(self):
+        import _pullparser
+        p = _pullparser.TolerantPullParser(
+            self._response, encoding=self._encoding)
+        try:
+            try:
+                p.get_tag("title")
+            except _pullparser.NoMoreTokensError:
+                return None
+            else:
+                return p.get_text()
+        except sgmllib.SGMLParseError, exc:
+            raise ParseError(exc)
+
+
+def unescape(data, entities, encoding):
+    if data is None or "&" not in data:
+        return data
+
+    def replace_entities(match):
+        ent = match.group()
+        if ent[1] == "#":
+            return unescape_charref(ent[2:-1], encoding)
+
+        repl = entities.get(ent[1:-1])
+        if repl is not None:
+            repl = unichr(repl)
+            if type(repl) != type(""):
+                try:
+                    repl = repl.encode(encoding)
+                except UnicodeError:
+                    repl = ent
+        else:
+            repl = ent
+        return repl
+
+    return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
+
+def unescape_charref(data, encoding):
+    name, base = data, 10
+    if name.startswith("x"):
+        name, base= name[1:], 16
+    uc = unichr(int(name, base))
+    if encoding is None:
+        return uc
+    else:
+        try:
+            repl = uc.encode(encoding)
+        except UnicodeError:
+            repl = "&#%s;" % data
+        return repl
+
+
+# bizarre import gymnastics for bundled BeautifulSoup
+import _beautifulsoup
+import ClientForm
+RobustFormParser, NestingRobustFormParser = ClientForm._create_bs_classes(
+    _beautifulsoup.BeautifulSoup, _beautifulsoup.ICantBelieveItsBeautifulSoup
+    )
+# monkeypatch sgmllib to fix http://www.python.org/sf/803422 :-(
+import sgmllib
+sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
+
+class MechanizeBs(_beautifulsoup.BeautifulSoup):
+    _entitydefs = htmlentitydefs.name2codepoint
+    # don't want the magic Microsoft-char workaround
+    PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
+                       lambda(x):x.group(1) + ' />'),
+                      (re.compile('<!\s+([^<>]*)>'),
+                       lambda(x):'<!' + x.group(1) + '>')
+                      ]
+
+    def __init__(self, encoding, text=None, avoidParserProblems=True,
+                 initialTextIsEverything=True):
+        self._encoding = encoding
+        _beautifulsoup.BeautifulSoup.__init__(
+            self, text, avoidParserProblems, initialTextIsEverything)
+
+    def handle_charref(self, ref):
+        t = unescape("&#%s;"%ref, self._entitydefs, self._encoding)
+        self.handle_data(t)
+    def handle_entityref(self, ref):
+        t = unescape("&%s;"%ref, self._entitydefs, self._encoding)
+        self.handle_data(t)
+    def unescape_attrs(self, attrs):
+        escaped_attrs = []
+        for key, val in attrs:
+            val = unescape(val, self._entitydefs, self._encoding)
+            escaped_attrs.append((key, val))
+        return escaped_attrs
+
+class RobustLinksFactory:
+
+    compress_re = re.compile(r"\s+")
+
+    def __init__(self,
+                 link_parser_class=None,
+                 link_class=Link,
+                 urltags=None,
+                 ):
+        import _beautifulsoup
+        if link_parser_class is None:
+            link_parser_class = MechanizeBs
+        self.link_parser_class = link_parser_class
+        self.link_class = link_class
+        if urltags is None:
+            urltags = {
+                "a": "href",
+                "area": "href",
+                "frame": "src",
+                "iframe": "src",
+                }
+        self.urltags = urltags
+        self._bs = None
+        self._encoding = None
+        self._base_url = None
+
+    def set_soup(self, soup, base_url, encoding):
+        self._bs = soup
+        self._base_url = base_url
+        self._encoding = encoding
+
+    def links(self):
+        import _beautifulsoup
+        bs = self._bs
+        base_url = self._base_url
+        encoding = self._encoding
+        gen = bs.recursiveChildGenerator()
+        for ch in bs.recursiveChildGenerator():
+            if (isinstance(ch, _beautifulsoup.Tag) and
+                ch.name in self.urltags.keys()+["base"]):
+                link = ch
+                attrs = bs.unescape_attrs(link.attrs)
+                attrs_dict = dict(attrs)
+                if link.name == "base":
+                    base_href = attrs_dict.get("href")
+                    if base_href is not None:
+                        base_url = base_href
+                    continue
+                url_attr = self.urltags[link.name]
+                url = attrs_dict.get(url_attr)
+                if not url:
+                    continue
+                url = _rfc3986.clean_url(url, encoding)
+                text = link.firstText(lambda t: True)
+                if text is _beautifulsoup.Null:
+                    # follow _pullparser's weird behaviour rigidly
+                    if link.name == "a":
+                        text = ""
+                    else:
+                        text = None
+                else:
+                    text = self.compress_re.sub(" ", text.strip())
+                yield Link(base_url, url, text, link.name, attrs)
+
+
+class RobustFormsFactory(FormsFactory):
+    def __init__(self, *args, **kwds):
+        import ClientForm
+        args = form_parser_args(*args, **kwds)
+        if args.form_parser_class is None:
+            args.form_parser_class = RobustFormParser
+        FormsFactory.__init__(self, **args.dictionary)
+
+    def set_response(self, response, encoding):
+        self._response = response
+        self.encoding = encoding
+
+
+class RobustTitleFactory:
+    def __init__(self):
+        self._bs = self._encoding = None
+
+    def set_soup(self, soup, encoding):
+        self._bs = soup
+        self._encoding = encoding
+
+    def title(self):
+        import _beautifulsoup
+        title = self._bs.first("title")
+        if title == _beautifulsoup.Null:
+            return None
+        else:
+            return title.firstText(lambda t: True)
+
+
+class Factory:
+    """Factory for forms, links, etc.
+
+    This interface may expand in future.
+
+    Public methods:
+
+    set_request_class(request_class)
+    set_response(response)
+    forms()
+    links()
+
+    Public attributes:
+
+    Note that accessing these attributes may raise ParseError.
+
+    encoding: string specifying the encoding of response if it contains a text
+     document (this value is left unspecified for documents that do not have
+     an encoding, e.g. an image file)
+    is_html: true if response contains an HTML document (XHTML may be
+     regarded as HTML too)
+    title: page title, or None if no title or not HTML
+    global_form: form object containing all controls that are not descendants
+     of any FORM element, or None if the forms_factory does not support
+     supplying a global form
+
+    """
+
+    LAZY_ATTRS = ["encoding", "is_html", "title", "global_form"]
+
+    def __init__(self, forms_factory, links_factory, title_factory,
+                 encoding_finder=EncodingFinder(DEFAULT_ENCODING),
+                 response_type_finder=ResponseTypeFinder(allow_xhtml=False),
+                 ):
+        """
+
+        Pass keyword arguments only.
+
+        default_encoding: character encoding to use if encoding cannot be
+         determined (or guessed) from the response.  You should turn on
+         HTTP-EQUIV handling if you want the best chance of getting this right
+         without resorting to this default.  The default value of this
+         parameter (currently latin-1) may change in future.
+
+        """
+        self._forms_factory = forms_factory
+        self._links_factory = links_factory
+        self._title_factory = title_factory
+        self._encoding_finder = encoding_finder
+        self._response_type_finder = response_type_finder
+
+        self.set_response(None)
+
+    def set_request_class(self, request_class):
+        """Set urllib2.Request class.
+
+        ClientForm.HTMLForm instances returned by .forms() will return
+        instances of this class when .click()ed.
+
+        """
+        self._forms_factory.request_class = request_class
+
+    def set_response(self, response):
+        """Set response.
+
+        The response must either be None or implement the same interface as
+        objects returned by urllib2.urlopen().
+
+        """
+        self._response = response
+        self._forms_genf = self._links_genf = None
+        self._get_title = None
+        for name in self.LAZY_ATTRS:
+            try:
+                delattr(self, name)
+            except AttributeError:
+                pass
+
+    def __getattr__(self, name):
+        if name not in self.LAZY_ATTRS:
+            return getattr(self.__class__, name)
+
+        if name == "encoding":
+            self.encoding = self._encoding_finder.encoding(
+                copy.copy(self._response))
+            return self.encoding
+        elif name == "is_html":
+            self.is_html = self._response_type_finder.is_html(
+                copy.copy(self._response), self.encoding)
+            return self.is_html
+        elif name == "title":
+            if self.is_html:
+                self.title = self._title_factory.title()
+            else:
+                self.title = None
+            return self.title
+        elif name == "global_form":
+            self.forms()
+            return self.global_form
+
+    def forms(self):
+        """Return iterable over ClientForm.HTMLForm-like objects.
+
+        Raises mechanize.ParseError on failure.
+        """
+        # this implementation sets .global_form as a side-effect, for benefit
+        # of __getattr__ impl
+        if self._forms_genf is None:
+            try:
+                self._forms_genf = CachingGeneratorFunction(
+                    self._forms_factory.forms())
+            except:  # XXXX define exception!
+                self.set_response(self._response)
+                raise
+            self.global_form = getattr(
+                self._forms_factory, "global_form", None)
+        return self._forms_genf()
+
+    def links(self):
+        """Return iterable over mechanize.Link-like objects.
+
+        Raises mechanize.ParseError on failure.
+        """
+        if self._links_genf is None:
+            try:
+                self._links_genf = CachingGeneratorFunction(
+                    self._links_factory.links())
+            except:  # XXXX define exception!
+                self.set_response(self._response)
+                raise
+        return self._links_genf()
+
+class DefaultFactory(Factory):
+    """Based on sgmllib."""
+    def __init__(self, i_want_broken_xhtml_support=False):
+        Factory.__init__(
+            self,
+            forms_factory=FormsFactory(),
+            links_factory=LinksFactory(),
+            title_factory=TitleFactory(),
+            response_type_finder=ResponseTypeFinder(
+                allow_xhtml=i_want_broken_xhtml_support),
+            )
+
+    def set_response(self, response):
+        Factory.set_response(self, response)
+        if response is not None:
+            self._forms_factory.set_response(
+                copy.copy(response), self.encoding)
+            self._links_factory.set_response(
+                copy.copy(response), response.geturl(), self.encoding)
+            self._title_factory.set_response(
+                copy.copy(response), self.encoding)
+
+class RobustFactory(Factory):
+    """Based on BeautifulSoup, hopefully a bit more robust to bad HTML than is
+    DefaultFactory.
+
+    """
+    def __init__(self, i_want_broken_xhtml_support=False,
+                 soup_class=None):
+        Factory.__init__(
+            self,
+            forms_factory=RobustFormsFactory(),
+            links_factory=RobustLinksFactory(),
+            title_factory=RobustTitleFactory(),
+            response_type_finder=ResponseTypeFinder(
+                allow_xhtml=i_want_broken_xhtml_support),
+            )
+        if soup_class is None:
+            soup_class = MechanizeBs
+        self._soup_class = soup_class
+
+    def set_response(self, response):
+        import _beautifulsoup
+        Factory.set_response(self, response)
+        if response is not None:
+            data = response.read()
+            soup = self._soup_class(self.encoding, data)
+            self._forms_factory.set_response(
+                copy.copy(response), self.encoding)
+            self._links_factory.set_soup(
+                soup, response.geturl(), self.encoding)
+            self._title_factory.set_soup(soup, self.encoding)
diff --git a/src/calibre/utils/mechanize/_http.py b/src/calibre/utils/mechanize/_http.py
new file mode 100644
index 0000000000..d73f3f44e5
--- /dev/null
+++ b/src/calibre/utils/mechanize/_http.py
@@ -0,0 +1,729 @@
+"""HTTP related handlers.
+
+Note that some other HTTP handlers live in more specific modules: _auth.py,
+_gzip.py, etc.
+
+
+Copyright 2002-2006 John J Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import copy, time, tempfile, htmlentitydefs, re, logging, socket, \
+       urllib2, urllib, httplib, sgmllib
+from urllib2 import URLError, HTTPError, BaseHandler
+from cStringIO import StringIO
+
+from _request import Request
+from _util import isstringlike
+from _response import closeable_response, response_seek_wrapper
+from _html import unescape, unescape_charref
+from _headersutil import is_html
+from _clientcookie import CookieJar, request_host
+import _rfc3986
+
+debug = logging.getLogger("mechanize").debug
+
+# monkeypatch urllib2.HTTPError to show URL
+## def urllib2_str(self):
+##     return 'HTTP Error %s: %s (%s)' % (
+##         self.code, self.msg, self.geturl())
+## urllib2.HTTPError.__str__ = urllib2_str
+
+
+CHUNK = 1024  # size of chunks fed to HTML HEAD parser, in bytes
+DEFAULT_ENCODING = 'latin-1'
+
+
+# This adds "refresh" to the list of redirectables and provides a redirection
+# algorithm that doesn't go into a loop in the presence of cookies
+# (Python 2.4 has this new algorithm, 2.3 doesn't).
+class HTTPRedirectHandler(BaseHandler):
+    # maximum number of redirections to any single URL
+    # this is needed because of the state that cookies introduce
+    max_repeats = 4
+    # maximum total number of redirections (regardless of URL) before
+    # assuming we're in a loop
+    max_redirections = 10
+
+    # Implementation notes:
+
+    # To avoid the server sending us into an infinite loop, the request
+    # object needs to track what URLs we have already seen.  Do this by
+    # adding a handler-specific attribute to the Request object.  The value
+    # of the dict is used to count the number of times the same URL has
+    # been visited.  This is needed because visiting the same URL twice
+    # does not necessarily imply a loop, thanks to state introduced by
+    # cookies.
+
+    # Always unhandled redirection codes:
+    # 300 Multiple Choices: should not handle this here.
+    # 304 Not Modified: no need to handle here: only of interest to caches
+    #     that do conditional GETs
+    # 305 Use Proxy: probably not worth dealing with here
+    # 306 Unused: what was this for in the previous versions of protocol??
+
+    def redirect_request(self, newurl, req, fp, code, msg, headers):
+        """Return a Request or None in response to a redirect.
+
+        This is called by the http_error_30x methods when a redirection
+        response is received.  If a redirection should take place, return a
+        new Request to allow http_error_30x to perform the redirect;
+        otherwise, return None to indicate that an HTTPError should be
+        raised.
+
+        """
+        if code in (301, 302, 303, "refresh") or \
+               (code == 307 and not req.has_data()):
+            # Strictly (according to RFC 2616), 301 or 302 in response to
+            # a POST MUST NOT cause a redirection without confirmation
+            # from the user (of urllib2, in this case).  In practice,
+            # essentially all clients do redirect in this case, so we do
+            # the same.
+            # XXX really refresh redirections should be visiting; tricky to
+            #  fix, so this will wait until post-stable release
+            new = Request(newurl,
+                          headers=req.headers,
+                          origin_req_host=req.get_origin_req_host(),
+                          unverifiable=True,
+                          visit=False,
+                          )
+            new._origin_req = getattr(req, "_origin_req", req)
+            return new
+        else:
+            raise HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+    def http_error_302(self, req, fp, code, msg, headers):
+        # Some servers (incorrectly) return multiple Location headers
+        # (so probably same goes for URI).  Use first header.
+        if headers.has_key('location'):
+            newurl = headers.getheaders('location')[0]
+        elif headers.has_key('uri'):
+            newurl = headers.getheaders('uri')[0]
+        else:
+            return
+        newurl = _rfc3986.clean_url(newurl, "latin-1")
+        newurl = _rfc3986.urljoin(req.get_full_url(), newurl)
+
+        # XXX Probably want to forget about the state of the current
+        # request, although that might interact poorly with other
+        # handlers that also use handler-specific request attributes
+        new = self.redirect_request(newurl, req, fp, code, msg, headers)
+        if new is None:
+            return
+
+        # loop detection
+        # .redirect_dict has a key url if url was previously visited.
+        if hasattr(req, 'redirect_dict'):
+            visited = new.redirect_dict = req.redirect_dict
+            if (visited.get(newurl, 0) >= self.max_repeats or
+                len(visited) >= self.max_redirections):
+                raise HTTPError(req.get_full_url(), code,
+                                self.inf_msg + msg, headers, fp)
+        else:
+            visited = new.redirect_dict = req.redirect_dict = {}
+        visited[newurl] = visited.get(newurl, 0) + 1
+
+        # Don't close the fp until we are sure that we won't use it
+        # with HTTPError.  
+        fp.read()
+        fp.close()
+
+        return self.parent.open(new)
+
+    http_error_301 = http_error_303 = http_error_307 = http_error_302
+    http_error_refresh = http_error_302
+
+    inf_msg = "The HTTP server returned a redirect error that would " \
+              "lead to an infinite loop.\n" \
+              "The last 30x error message was:\n"
+
+
+# XXX would self.reset() work, instead of raising this exception?
+class EndOfHeadError(Exception): pass
+class AbstractHeadParser:
+    # only these elements are allowed in or before HEAD of document
+    head_elems = ("html", "head",
+                  "title", "base",
+                  "script", "style", "meta", "link", "object")
+    _entitydefs = htmlentitydefs.name2codepoint
+    _encoding = DEFAULT_ENCODING
+
+    def __init__(self):
+        self.http_equiv = []
+
+    def start_meta(self, attrs):
+        http_equiv = content = None
+        for key, value in attrs:
+            if key == "http-equiv":
+                http_equiv = self.unescape_attr_if_required(value)
+            elif key == "content":
+                content = self.unescape_attr_if_required(value)
+        if http_equiv is not None and content is not None:
+            self.http_equiv.append((http_equiv, content))
+
+    def end_head(self):
+        raise EndOfHeadError()
+
+    def handle_entityref(self, name):
+        #debug("%s", name)
+        self.handle_data(unescape(
+            '&%s;' % name, self._entitydefs, self._encoding))
+
+    def handle_charref(self, name):
+        #debug("%s", name)
+        self.handle_data(unescape_charref(name, self._encoding))
+
+    def unescape_attr(self, name):
+        #debug("%s", name)
+        return unescape(name, self._entitydefs, self._encoding)
+
+    def unescape_attrs(self, attrs):
+        #debug("%s", attrs)
+        escaped_attrs = {}
+        for key, val in attrs.items():
+            escaped_attrs[key] = self.unescape_attr(val)
+        return escaped_attrs
+
+    def unknown_entityref(self, ref):
+        self.handle_data("&%s;" % ref)
+
+    def unknown_charref(self, ref):
+        self.handle_data("&#%s;" % ref)
+
+
+try:
+    import HTMLParser
+except ImportError:
+    pass
+else:
+    class XHTMLCompatibleHeadParser(AbstractHeadParser,
+                                    HTMLParser.HTMLParser):
+        def __init__(self):
+            HTMLParser.HTMLParser.__init__(self)
+            AbstractHeadParser.__init__(self)
+
+        def handle_starttag(self, tag, attrs):
+            if tag not in self.head_elems:
+                raise EndOfHeadError()
+            try:
+                method = getattr(self, 'start_' + tag)
+            except AttributeError:
+                try:
+                    method = getattr(self, 'do_' + tag)
+                except AttributeError:
+                    pass # unknown tag
+                else:
+                    method(attrs)
+            else:
+                method(attrs)
+
+        def handle_endtag(self, tag):
+            if tag not in self.head_elems:
+                raise EndOfHeadError()
+            try:
+                method = getattr(self, 'end_' + tag)
+            except AttributeError:
+                pass # unknown tag
+            else:
+                method()
+
+        def unescape(self, name):
+            # Use the entitydefs passed into constructor, not
+            # HTMLParser.HTMLParser's entitydefs.
+            return self.unescape_attr(name)
+
+        def unescape_attr_if_required(self, name):
+            return name  # HTMLParser.HTMLParser already did it
+
+class HeadParser(AbstractHeadParser, sgmllib.SGMLParser):
+
+    def _not_called(self):
+        assert False
+
+    def __init__(self):
+        sgmllib.SGMLParser.__init__(self)
+        AbstractHeadParser.__init__(self)
+
+    def handle_starttag(self, tag, method, attrs):
+        if tag not in self.head_elems:
+            raise EndOfHeadError()
+        if tag == "meta":
+            method(attrs)
+
+    def unknown_starttag(self, tag, attrs):
+        self.handle_starttag(tag, self._not_called, attrs)
+
+    def handle_endtag(self, tag, method):
+        if tag in self.head_elems:
+            method()
+        else:
+            raise EndOfHeadError()
+
+    def unescape_attr_if_required(self, name):
+        return self.unescape_attr(name)
+
+def parse_head(fileobj, parser):
+    """Return a list of key, value pairs."""
+    while 1:
+        data = fileobj.read(CHUNK)
+        try:
+            parser.feed(data)
+        except EndOfHeadError:
+            break
+        if len(data) != CHUNK:
+            # this should only happen if there is no HTML body, or if
+            # CHUNK is big
+            break
+    return parser.http_equiv
+
+class HTTPEquivProcessor(BaseHandler):
+    """Append META HTTP-EQUIV headers to regular HTTP headers."""
+
+    handler_order = 300  # before handlers that look at HTTP headers
+
+    def __init__(self, head_parser_class=HeadParser,
+                 i_want_broken_xhtml_support=False,
+                 ):
+        self.head_parser_class = head_parser_class
+        self._allow_xhtml = i_want_broken_xhtml_support
+
+    def http_response(self, request, response):
+        if not hasattr(response, "seek"):
+            response = response_seek_wrapper(response)
+        http_message = response.info()
+        url = response.geturl()
+        ct_hdrs = http_message.getheaders("content-type")
+        if is_html(ct_hdrs, url, self._allow_xhtml):
+            try:
+                try:
+                    html_headers = parse_head(response, self.head_parser_class())
+                finally:
+                    response.seek(0)
+            except (HTMLParser.HTMLParseError,
+                    sgmllib.SGMLParseError):
+                pass
+            else:
+                for hdr, val in html_headers:
+                    # add a header
+                    http_message.dict[hdr.lower()] = val
+                    text = hdr + ": " + val
+                    for line in text.split("\n"):
+                        http_message.headers.append(line + "\n")
+        return response
+
+    https_response = http_response
+
+class HTTPCookieProcessor(BaseHandler):
+    """Handle HTTP cookies.
+
+    Public attributes:
+
+    cookiejar: CookieJar instance
+
+    """
+    def __init__(self, cookiejar=None):
+        if cookiejar is None:
+            cookiejar = CookieJar()
+        self.cookiejar = cookiejar
+
+    def http_request(self, request):
+        self.cookiejar.add_cookie_header(request)
+        return request
+
+    def http_response(self, request, response):
+        self.cookiejar.extract_cookies(response, request)
+        return response
+
+    https_request = http_request
+    https_response = http_response
+
+try:
+    import robotparser
+except ImportError:
+    pass
+else:
+    class MechanizeRobotFileParser(robotparser.RobotFileParser):
+
+        def __init__(self, url='', opener=None):
+            import _opener
+            robotparser.RobotFileParser.__init__(self, url)
+            self._opener = opener
+
+        def set_opener(self, opener=None):
+            if opener is None:
+                opener = _opener.OpenerDirector()
+            self._opener = opener
+
+        def read(self):
+            """Reads the robots.txt URL and feeds it to the parser."""
+            if self._opener is None:
+                self.set_opener()
+            req = Request(self.url, unverifiable=True, visit=False)
+            try:
+                f = self._opener.open(req)
+            except HTTPError, f:
+                pass
+            except (IOError, socket.error, OSError), exc:
+                robotparser._debug("ignoring error opening %r: %s" %
+                                   (self.url, exc))
+                return
+            lines = []
+            line = f.readline()
+            while line:
+                lines.append(line.strip())
+                line = f.readline()
+            status = f.code
+            if status == 401 or status == 403:
+                self.disallow_all = True
+                robotparser._debug("disallow all")
+            elif status >= 400:
+                self.allow_all = True
+                robotparser._debug("allow all")
+            elif status == 200 and lines:
+                robotparser._debug("parse lines")
+                self.parse(lines)
+
+    class RobotExclusionError(urllib2.HTTPError):
+        def __init__(self, request, *args):
+            apply(urllib2.HTTPError.__init__, (self,)+args)
+            self.request = request
+
+    class HTTPRobotRulesProcessor(BaseHandler):
+        # before redirections, after everything else
+        handler_order = 800
+
+        try:
+            from httplib import HTTPMessage
+        except:
+            from mimetools import Message
+            http_response_class = Message
+        else:
+            http_response_class = HTTPMessage
+
+        def __init__(self, rfp_class=MechanizeRobotFileParser):
+            self.rfp_class = rfp_class
+            self.rfp = None
+            self._host = None
+
+        def http_request(self, request):
+            scheme = request.get_type()
+            if scheme not in ["http", "https"]:
+                # robots exclusion only applies to HTTP
+                return request
+
+            if request.get_selector() == "/robots.txt":
+                # /robots.txt is always OK to fetch
+                return request
+
+            host = request.get_host()
+
+            # robots.txt requests don't need to be allowed by robots.txt :-)
+            origin_req = getattr(request, "_origin_req", None)
+            if (origin_req is not None and
+                origin_req.get_selector() == "/robots.txt" and
+                origin_req.get_host() == host
+                ):
+                return request
+
+            if host != self._host:
+                self.rfp = self.rfp_class()
+                try:
+                    self.rfp.set_opener(self.parent)
+                except AttributeError:
+                    debug("%r instance does not support set_opener" %
+                          self.rfp.__class__)
+                self.rfp.set_url(scheme+"://"+host+"/robots.txt")
+                self.rfp.read()
+                self._host = host
+
+            ua = request.get_header("User-agent", "")
+            if self.rfp.can_fetch(ua, request.get_full_url()):
+                return request
+            else:
+                # XXX This should really have raised URLError.  Too late now...
+                msg = "request disallowed by robots.txt"
+                raise RobotExclusionError(
+                    request,
+                    request.get_full_url(),
+                    403, msg,
+                    self.http_response_class(StringIO()), StringIO(msg))
+
+        https_request = http_request
+
+class HTTPRefererProcessor(BaseHandler):
+    """Add Referer header to requests.
+
+    This only makes sense if you use each RefererProcessor for a single
+    chain of requests only (so, for example, if you use a single
+    HTTPRefererProcessor to fetch a series of URLs extracted from a single
+    page, this will break).
+
+    There's a proper implementation of this in mechanize.Browser.
+
+    """
+    def __init__(self):
+        self.referer = None
+
+    def http_request(self, request):
+        if ((self.referer is not None) and
+            not request.has_header("Referer")):
+            request.add_unredirected_header("Referer", self.referer)
+        return request
+
+    def http_response(self, request, response):
+        self.referer = response.geturl()
+        return response
+
+    https_request = http_request
+    https_response = http_response
+
+
+def clean_refresh_url(url):
+    # e.g. Firefox 1.5 does (something like) this
+    if ((url.startswith('"') and url.endswith('"')) or
+        (url.startswith("'") and url.endswith("'"))):
+        url = url[1:-1]
+    return _rfc3986.clean_url(url, "latin-1")  # XXX encoding
+
+def parse_refresh_header(refresh):
+    """
+    >>> parse_refresh_header("1; url=http://example.com/")
+    (1.0, 'http://example.com/')
+    >>> parse_refresh_header("1; url='http://example.com/'")
+    (1.0, 'http://example.com/')
+    >>> parse_refresh_header("1")
+    (1.0, None)
+    >>> parse_refresh_header("blah")
+    Traceback (most recent call last):
+    ValueError: invalid literal for float(): blah
+
+    """
+
+    ii = refresh.find(";")
+    if ii != -1:
+        pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
+        jj = newurl_spec.find("=")
+        key = None
+        if jj != -1:
+            key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
+            newurl = clean_refresh_url(newurl)
+        if key is None or key.strip().lower() != "url":
+            raise ValueError()
+    else:
+        pause, newurl = float(refresh), None
+    return pause, newurl
+
+class HTTPRefreshProcessor(BaseHandler):
+    """Perform HTTP Refresh redirections.
+
+    Note that if a non-200 HTTP code has occurred (for example, a 30x
+    redirect), this processor will do nothing.
+
+    By default, only zero-time Refresh headers are redirected.  Use the
+    max_time attribute / constructor argument to allow Refresh with longer
+    pauses.  Use the honor_time attribute / constructor argument to control
+    whether the requested pause is honoured (with a time.sleep()) or
+    skipped in favour of immediate redirection.
+
+    Public attributes:
+
+    max_time: see above
+    honor_time: see above
+
+    """
+    handler_order = 1000
+
+    def __init__(self, max_time=0, honor_time=True):
+        self.max_time = max_time
+        self.honor_time = honor_time
+
+    def http_response(self, request, response):
+        code, msg, hdrs = response.code, response.msg, response.info()
+
+        if code == 200 and hdrs.has_key("refresh"):
+            refresh = hdrs.getheaders("refresh")[0]
+            try:
+                pause, newurl = parse_refresh_header(refresh)
+            except ValueError:
+                debug("bad Refresh header: %r" % refresh)
+                return response
+            if newurl is None:
+                newurl = response.geturl()
+            if (self.max_time is None) or (pause <= self.max_time):
+                if pause > 1E-3 and self.honor_time:
+                    time.sleep(pause)
+                hdrs["location"] = newurl
+                # hardcoded http is NOT a bug
+                response = self.parent.error(
+                    "http", request, response,
+                    "refresh", msg, hdrs)
+
+        return response
+
+    https_response = http_response
+
+class HTTPErrorProcessor(BaseHandler):
+    """Process HTTP error responses.
+
+    The purpose of this handler is to to allow other response processors a
+    look-in by removing the call to parent.error() from
+    AbstractHTTPHandler.
+
+    For non-200 error codes, this just passes the job on to the
+    Handler.<proto>_error_<code> methods, via the OpenerDirector.error
+    method.  Eventually, urllib2.HTTPDefaultErrorHandler will raise an
+    HTTPError if no other handler handles the error.
+
+    """
+    handler_order = 1000  # after all other processors
+
+    def http_response(self, request, response):
+        code, msg, hdrs = response.code, response.msg, response.info()
+
+        if code != 200:
+            # hardcoded http is NOT a bug
+            response = self.parent.error(
+                "http", request, response, code, msg, hdrs)
+
+        return response
+
+    https_response = http_response
+
+
+class HTTPDefaultErrorHandler(BaseHandler):
+    def http_error_default(self, req, fp, code, msg, hdrs):
+        # why these error methods took the code, msg, headers args in the first
+        # place rather than a response object, I don't know, but to avoid
+        # multiple wrapping, we're discarding them
+
+        if isinstance(fp, urllib2.HTTPError):
+            response = fp
+        else:
+            response = urllib2.HTTPError(
+                req.get_full_url(), code, msg, hdrs, fp)
+        assert code == response.code
+        assert msg == response.msg
+        assert hdrs == response.hdrs
+        raise response
+
+
+class AbstractHTTPHandler(BaseHandler):
+
+    def __init__(self, debuglevel=0):
+        self._debuglevel = debuglevel
+
+    def set_http_debuglevel(self, level):
+        self._debuglevel = level
+
+    def do_request_(self, request):
+        host = request.get_host()
+        if not host:
+            raise URLError('no host given')
+
+        if request.has_data():  # POST
+            data = request.get_data()
+            if not request.has_header('Content-type'):
+                request.add_unredirected_header(
+                    'Content-type',
+                    'application/x-www-form-urlencoded')
+
+        scheme, sel = urllib.splittype(request.get_selector())
+        sel_host, sel_path = urllib.splithost(sel)
+        if not request.has_header('Host'):
+            request.add_unredirected_header('Host', sel_host or host)
+        for name, value in self.parent.addheaders:
+            name = name.capitalize()
+            if not request.has_header(name):
+                request.add_unredirected_header(name, value)
+
+        return request
+
+    def do_open(self, http_class, req):
+        """Return an addinfourl object for the request, using http_class.
+
+        http_class must implement the HTTPConnection API from httplib.
+        The addinfourl return value is a file-like object.  It also
+        has methods and attributes including:
+            - info(): return a mimetools.Message object for the headers
+            - geturl(): return the original request URL
+            - code: HTTP status code
+        """
+        host = req.get_host()
+        if not host:
+            raise URLError('no host given')
+
+        h = http_class(host) # will parse host:port
+        h.set_debuglevel(self._debuglevel)
+
+        headers = dict(req.headers)
+        headers.update(req.unredirected_hdrs)
+        # We want to make an HTTP/1.1 request, but the addinfourl
+        # class isn't prepared to deal with a persistent connection.
+        # It will try to read all remaining data from the socket,
+        # which will block while the server waits for the next request.
+        # So make sure the connection gets closed after the (only)
+        # request.
+        headers["Connection"] = "close"
+        headers = dict(
+            [(name.title(), val) for name, val in headers.items()])
+        try:
+            h.request(req.get_method(), req.get_selector(), req.data, headers)
+            r = h.getresponse()
+        except socket.error, err: # XXX what error?
+            raise URLError(err)
+
+        # Pick apart the HTTPResponse object to get the addinfourl
+        # object initialized properly.
+
+        # Wrap the HTTPResponse object in socket's file object adapter
+        # for Windows.  That adapter calls recv(), so delegate recv()
+        # to read().  This weird wrapping allows the returned object to
+        # have readline() and readlines() methods.
+
+        # XXX It might be better to extract the read buffering code
+        # out of socket._fileobject() and into a base class.
+
+        r.recv = r.read
+        fp = socket._fileobject(r)
+
+        resp = closeable_response(fp, r.msg, req.get_full_url(),
+                                  r.status, r.reason)
+        return resp
+
+
+class HTTPHandler(AbstractHTTPHandler):
+    def http_open(self, req):
+        return self.do_open(httplib.HTTPConnection, req)
+
+    http_request = AbstractHTTPHandler.do_request_
+
+if hasattr(httplib, 'HTTPS'):
+
+    class HTTPSConnectionFactory:
+        def __init__(self, key_file, cert_file):
+            self._key_file = key_file
+            self._cert_file = cert_file
+        def __call__(self, hostport):
+            return httplib.HTTPSConnection(
+                hostport,
+                key_file=self._key_file, cert_file=self._cert_file)
+
+    class HTTPSHandler(AbstractHTTPHandler):
+        def __init__(self, client_cert_manager=None):
+            AbstractHTTPHandler.__init__(self)
+            self.client_cert_manager = client_cert_manager
+
+        def https_open(self, req):
+            if self.client_cert_manager is not None:
+                key_file, cert_file = self.client_cert_manager.find_key_cert(
+                    req.get_full_url())
+                conn_factory = HTTPSConnectionFactory(key_file, cert_file)
+            else:
+                conn_factory = httplib.HTTPSConnection
+            return self.do_open(conn_factory, req)
+
+        https_request = AbstractHTTPHandler.do_request_
diff --git a/src/calibre/utils/mechanize/_lwpcookiejar.py b/src/calibre/utils/mechanize/_lwpcookiejar.py
new file mode 100644
index 0000000000..f8d49cf2d4
--- /dev/null
+++ b/src/calibre/utils/mechanize/_lwpcookiejar.py
@@ -0,0 +1,185 @@
+"""Load / save to libwww-perl (LWP) format files.
+
+Actually, the format is slightly extended from that used by LWP's
+(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
+not recorded by LWP.
+
+It uses the version string "2.0", though really there isn't an LWP Cookies
+2.0 format.  This indicates that there is extra information in here
+(domain_dot and port_spec) while still being compatible with libwww-perl,
+I hope.
+
+Copyright 2002-2006 John J Lee <jjl@pobox.com>
+Copyright 1997-1999 Gisle Aas (original libwww-perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import time, re, logging
+
+from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
+     MISSING_FILENAME_TEXT, LoadError
+from _headersutil import join_header_words, split_header_words
+from _util import iso2time, time2isoz
+
+debug = logging.getLogger("mechanize").debug
+
+
+def lwp_cookie_str(cookie):
+    """Return string representation of Cookie in an the LWP cookie file format.
+
+    Actually, the format is extended a bit -- see module docstring.
+
+    """
+    h = [(cookie.name, cookie.value),
+         ("path", cookie.path),
+         ("domain", cookie.domain)]
+    if cookie.port is not None: h.append(("port", cookie.port))
+    if cookie.path_specified: h.append(("path_spec", None))
+    if cookie.port_specified: h.append(("port_spec", None))
+    if cookie.domain_initial_dot: h.append(("domain_dot", None))
+    if cookie.secure: h.append(("secure", None))
+    if cookie.expires: h.append(("expires",
+                               time2isoz(float(cookie.expires))))
+    if cookie.discard: h.append(("discard", None))
+    if cookie.comment: h.append(("comment", cookie.comment))
+    if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
+    if cookie.rfc2109: h.append(("rfc2109", None))
+
+    keys = cookie.nonstandard_attr_keys()
+    keys.sort()
+    for k in keys:
+        h.append((k, str(cookie.get_nonstandard_attr(k))))
+
+    h.append(("version", str(cookie.version)))
+
+    return join_header_words([h])
+
+class LWPCookieJar(FileCookieJar):
+    """
+    The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
+    "Set-Cookie3" is the format used by the libwww-perl libary, not known
+    to be compatible with any browser, but which is easy to read and
+    doesn't lose information about RFC 2965 cookies.
+
+    Additional methods
+
+    as_lwp_str(ignore_discard=True, ignore_expired=True)
+
+    """
+
+    magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
+
+    def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
+        """Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
+
+        ignore_discard and ignore_expires: see docstring for FileCookieJar.save
+
+        """
+        now = time.time()
+        r = []
+        for cookie in self:
+            if not ignore_discard and cookie.discard:
+                debug("   Not saving %s: marked for discard", cookie.name)
+                continue
+            if not ignore_expires and cookie.is_expired(now):
+                debug("   Not saving %s: expired", cookie.name)
+                continue
+            r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
+        return "\n".join(r+[""])
+
+    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+        if filename is None:
+            if self.filename is not None: filename = self.filename
+            else: raise ValueError(MISSING_FILENAME_TEXT)
+
+        f = open(filename, "w")
+        try:
+            debug("Saving LWP cookies file")
+            # There really isn't an LWP Cookies 2.0 format, but this indicates
+            # that there is extra information in here (domain_dot and
+            # port_spec) while still being compatible with libwww-perl, I hope.
+            f.write("#LWP-Cookies-2.0\n")
+            f.write(self.as_lwp_str(ignore_discard, ignore_expires))
+        finally:
+            f.close()
+
+    def _really_load(self, f, filename, ignore_discard, ignore_expires):
+        magic = f.readline()
+        if not re.search(self.magic_re, magic):
+            msg = "%s does not seem to contain cookies" % filename
+            raise LoadError(msg)
+
+        now = time.time()
+
+        header = "Set-Cookie3:"
+        boolean_attrs = ("port_spec", "path_spec", "domain_dot",
+                         "secure", "discard", "rfc2109")
+        value_attrs = ("version",
+                       "port", "path", "domain",
+                       "expires",
+                       "comment", "commenturl")
+
+        try:
+            while 1:
+                line = f.readline()
+                if line == "": break
+                if not line.startswith(header):
+                    continue
+                line = line[len(header):].strip()
+
+                for data in split_header_words([line]):
+                    name, value = data[0]
+                    standard = {}
+                    rest = {}
+                    for k in boolean_attrs:
+                        standard[k] = False
+                    for k, v in data[1:]:
+                        if k is not None:
+                            lc = k.lower()
+                        else:
+                            lc = None
+                        # don't lose case distinction for unknown fields
+                        if (lc in value_attrs) or (lc in boolean_attrs):
+                            k = lc
+                        if k in boolean_attrs:
+                            if v is None: v = True
+                            standard[k] = v
+                        elif k in value_attrs:
+                            standard[k] = v
+                        else:
+                            rest[k] = v
+
+                    h = standard.get
+                    expires = h("expires")
+                    discard = h("discard")
+                    if expires is not None:
+                        expires = iso2time(expires)
+                    if expires is None:
+                        discard = True
+                    domain = h("domain")
+                    domain_specified = domain.startswith(".")
+                    c = Cookie(h("version"), name, value,
+                               h("port"), h("port_spec"),
+                               domain, domain_specified, h("domain_dot"),
+                               h("path"), h("path_spec"),
+                               h("secure"),
+                               expires,
+                               discard,
+                               h("comment"),
+                               h("commenturl"),
+                               rest,
+                               h("rfc2109"),
+                               ) 
+                    if not ignore_discard and c.discard:
+                        continue
+                    if not ignore_expires and c.is_expired(now):
+                        continue
+                    self.set_cookie(c)
+        except:
+            reraise_unmasked_exceptions((IOError,))
+            raise LoadError("invalid Set-Cookie3 format file %s" % filename)
+
diff --git a/src/calibre/utils/mechanize/_mechanize.py b/src/calibre/utils/mechanize/_mechanize.py
new file mode 100644
index 0000000000..a9b8d9e0b5
--- /dev/null
+++ b/src/calibre/utils/mechanize/_mechanize.py
@@ -0,0 +1,656 @@
+"""Stateful programmatic WWW navigation, after Perl's WWW::Mechanize.
+
+Copyright 2003-2006 John J. Lee <jjl@pobox.com>
+Copyright 2003 Andy Lester (original Perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import urllib2, sys, copy, re
+
+from _useragent import UserAgentBase
+from _html import DefaultFactory
+import _response
+import _request
+import _rfc3986
+
+__version__ = (0, 1, 7, "b", None)  # 0.1.7b
+
+class BrowserStateError(Exception): pass
+class LinkNotFoundError(Exception): pass
+class FormNotFoundError(Exception): pass
+
+
+class History:
+    """
+
+    Though this will become public, the implied interface is not yet stable.
+
+    """
+    def __init__(self):
+        self._history = []  # LIFO
+    def add(self, request, response):
+        self._history.append((request, response))
+    def back(self, n, _response):
+        response = _response  # XXX move Browser._response into this class?
+        while n > 0 or response is None:
+            try:
+                request, response = self._history.pop()
+            except IndexError:
+                raise BrowserStateError("already at start of history")
+            n -= 1
+        return request, response
+    def clear(self):
+        del self._history[:]
+    def close(self):
+        for request, response in self._history:
+            if response is not None:
+                response.close()
+        del self._history[:]
+
+
+class HTTPRefererProcessor(urllib2.BaseHandler):
+    def http_request(self, request):
+        # See RFC 2616 14.36.  The only times we know the source of the
+        # request URI has a URI associated with it are redirect, and
+        # Browser.click() / Browser.submit() / Browser.follow_link().
+        # Otherwise, it's the user's job to add any Referer header before
+        # .open()ing.
+        if hasattr(request, "redirect_dict"):
+            request = self.parent._add_referer_header(
+                request, origin_request=False)
+        return request
+
+    https_request = http_request
+
+
+class Browser(UserAgentBase):
+    """Browser-like class with support for history, forms and links.
+
+    BrowserStateError is raised whenever the browser is in the wrong state to
+    complete the requested operation - eg., when .back() is called when the
+    browser history is empty, or when .follow_link() is called when the current
+    response does not contain HTML data.
+
+    Public attributes:
+
+    request: current request (mechanize.Request or urllib2.Request)
+    form: currently selected form (see .select_form())
+
+    """
+
+    handler_classes = copy.copy(UserAgentBase.handler_classes)
+    handler_classes["_referer"] = HTTPRefererProcessor
+    default_features = copy.copy(UserAgentBase.default_features)
+    default_features.append("_referer")
+
+    def __init__(self,
+                 factory=None,
+                 history=None,
+                 request_class=None,
+                 ):
+        """
+
+        Only named arguments should be passed to this constructor.
+
+        factory: object implementing the mechanize.Factory interface.
+        history: object implementing the mechanize.History interface.  Note
+         this interface is still experimental and may change in future.
+        request_class: Request class to use.  Defaults to mechanize.Request
+         by default for Pythons older than 2.4, urllib2.Request otherwise.
+
+        The Factory and History objects passed in are 'owned' by the Browser,
+        so they should not be shared across Browsers.  In particular,
+        factory.set_response() should not be called except by the owning
+        Browser itself.
+
+        Note that the supplied factory's request_class is overridden by this
+        constructor, to ensure only one Request class is used.
+
+        """
+        self._handle_referer = True
+
+        if history is None:
+            history = History()
+        self._history = history
+
+        if request_class is None:
+            if not hasattr(urllib2.Request, "add_unredirected_header"):
+                request_class = _request.Request
+            else:
+                request_class = urllib2.Request  # Python >= 2.4
+
+        if factory is None:
+            factory = DefaultFactory()
+        factory.set_request_class(request_class)
+        self._factory = factory
+        self.request_class = request_class
+
+        self.request = None
+        self._set_response(None, False)
+
+        # do this last to avoid __getattr__ problems
+        UserAgentBase.__init__(self)
+
+    def close(self):
+        UserAgentBase.close(self)
+        if self._response is not None:
+            self._response.close()    
+        if self._history is not None:
+            self._history.close()
+            self._history = None
+
+        # make use after .close easy to spot
+        self.form = None
+        self.request = self._response = None
+        self.request = self.response = self.set_response = None
+        self.geturl =  self.reload = self.back = None
+        self.clear_history = self.set_cookie = self.links = self.forms = None
+        self.viewing_html = self.encoding = self.title = None
+        self.select_form = self.click = self.submit = self.click_link = None
+        self.follow_link = self.find_link = None
+
+    def set_handle_referer(self, handle):
+        """Set whether to add Referer header to each request.
+
+        This base class does not implement this feature (so don't turn this on
+        if you're using this base class directly), but the subclass
+        mechanize.Browser does.
+
+        """
+        self._set_handler("_referer", handle)
+        self._handle_referer = bool(handle)
+
+    def _add_referer_header(self, request, origin_request=True):
+        if self.request is None:
+            return request
+        scheme = request.get_type()
+        original_scheme = self.request.get_type()
+        if scheme not in ["http", "https"]:
+            return request
+        if not origin_request and not self.request.has_header("Referer"):
+            return request
+
+        if (self._handle_referer and
+            original_scheme in ["http", "https"] and
+            not (original_scheme == "https" and scheme != "https")):
+            # strip URL fragment (RFC 2616 14.36)
+            parts = _rfc3986.urlsplit(self.request.get_full_url())
+            parts = parts[:-1]+(None,)
+            referer = _rfc3986.urlunsplit(parts)
+            request.add_unredirected_header("Referer", referer)
+        return request
+
+    def open_novisit(self, url, data=None):
+        """Open a URL without visiting it.
+
+        The browser state (including .request, .response(), history, forms and
+        links) are all left unchanged by calling this function.
+
+        The interface is the same as for .open().
+
+        This is useful for things like fetching images.
+
+        See also .retrieve().
+
+        """
+        return self._mech_open(url, data, visit=False)
+
+    def open(self, url, data=None):
+        return self._mech_open(url, data)
+
+    def _mech_open(self, url, data=None, update_history=True, visit=None):
+        try:
+            url.get_full_url
+        except AttributeError:
+            # string URL -- convert to absolute URL if required
+            scheme, authority = _rfc3986.urlsplit(url)[:2]
+            if scheme is None:
+                # relative URL
+                if self._response is None:
+                    raise BrowserStateError(
+                        "can't fetch relative reference: "
+                        "not viewing any document")
+                url = _rfc3986.urljoin(self._response.geturl(), url)
+
+        request = self._request(url, data, visit)
+        visit = request.visit
+        if visit is None:
+            visit = True
+
+        if visit:
+            self._visit_request(request, update_history)
+
+        success = True
+        try:
+            response = UserAgentBase.open(self, request, data)
+        except urllib2.HTTPError, error:
+            success = False
+            if error.fp is None:  # not a response
+                raise
+            response = error
+##         except (IOError, socket.error, OSError), error:
+##             # Yes, urllib2 really does raise all these :-((
+##             # See test_urllib2.py for examples of socket.gaierror and OSError,
+##             # plus note that FTPHandler raises IOError.
+##             # XXX I don't seem to have an example of exactly socket.error being
+##             #  raised, only socket.gaierror...
+##             # I don't want to start fixing these here, though, since this is a
+##             # subclass of OpenerDirector, and it would break old code.  Even in
+##             # Python core, a fix would need some backwards-compat. hack to be
+##             # acceptable.
+##             raise
+
+        if visit:
+            self._set_response(response, False)
+            response = copy.copy(self._response)
+        elif response is not None:
+            response = _response.upgrade_response(response)
+
+        if not success:
+            raise response
+        return response
+
+    def __str__(self):
+        text = []
+        text.append("<%s " % self.__class__.__name__)
+        if self._response:
+            text.append("visiting %s" % self._response.geturl())
+        else:
+            text.append("(not visiting a URL)")
+        if self.form:
+            text.append("\n selected form:\n %s\n" % str(self.form))
+        text.append(">")
+        return "".join(text)
+
+    def response(self):
+        """Return a copy of the current response.
+
+        The returned object has the same interface as the object returned by
+        .open() (or urllib2.urlopen()).
+
+        """
+        return copy.copy(self._response)
+
+    def set_response(self, response):
+        """Replace current response with (a copy of) response.
+
+        response may be None.
+
+        This is intended mostly for HTML-preprocessing.
+        """
+        self._set_response(response, True)
+
+    def _set_response(self, response, close_current):
+        # sanity check, necessary but far from sufficient
+        if not (response is None or
+                (hasattr(response, "info") and hasattr(response, "geturl") and
+                 hasattr(response, "read")
+                 )
+                ):
+            raise ValueError("not a response object")
+
+        self.form = None
+        if response is not None:
+            response = _response.upgrade_response(response)
+        if close_current and self._response is not None:
+            self._response.close()
+        self._response = response
+        self._factory.set_response(response)
+
+    def visit_response(self, response, request=None):
+        """Visit the response, as if it had been .open()ed.
+
+        Unlike .set_response(), this updates history rather than replacing the
+        current response.
+        """
+        if request is None:
+            request = _request.Request(response.geturl())
+        self._visit_request(request, True)
+        self._set_response(response, False)
+
+    def _visit_request(self, request, update_history):
+        if self._response is not None:
+            self._response.close()
+        if self.request is not None and update_history:
+            self._history.add(self.request, self._response)
+        self._response = None
+        # we want self.request to be assigned even if UserAgentBase.open
+        # fails
+        self.request = request
+
+    def geturl(self):
+        """Get URL of current document."""
+        if self._response is None:
+            raise BrowserStateError("not viewing any document")
+        return self._response.geturl()
+
+    def reload(self):
+        """Reload current document, and return response object."""
+        if self.request is None:
+            raise BrowserStateError("no URL has yet been .open()ed")
+        if self._response is not None:
+            self._response.close()
+        return self._mech_open(self.request, update_history=False)
+
+    def back(self, n=1):
+        """Go back n steps in history, and return response object.
+
+        n: go back this number of steps (default 1 step)
+
+        """
+        if self._response is not None:
+            self._response.close()
+        self.request, response = self._history.back(n, self._response)
+        self.set_response(response)
+        if not response.read_complete:
+            return self.reload()
+        return copy.copy(response)
+
+    def clear_history(self):
+        self._history.clear()
+
+    def set_cookie(self, cookie_string):
+        """Request to set a cookie.
+
+        Note that it is NOT necessary to call this method under ordinary
+        circumstances: cookie handling is normally entirely automatic.  The
+        intended use case is rather to simulate the setting of a cookie by
+        client script in a web page (e.g. JavaScript).  In that case, use of
+        this method is necessary because mechanize currently does not support
+        JavaScript, VBScript, etc.
+
+        The cookie is added in the same way as if it had arrived with the
+        current response, as a result of the current request.  This means that,
+        for example, it is not appropriate to set the cookie based on the
+        current request, no cookie will be set.
+
+        The cookie will be returned automatically with subsequent responses
+        made by the Browser instance whenever that's appropriate.
+
+        cookie_string should be a valid value of the Set-Cookie header.
+
+        For example:
+
+        browser.set_cookie(
+            "sid=abcdef; expires=Wednesday, 09-Nov-06 23:12:40 GMT")
+
+        Currently, this method does not allow for adding RFC 2986 cookies.
+        This limitation will be lifted if anybody requests it.
+
+        """
+        if self._response is None:
+            raise BrowserStateError("not viewing any document")
+        if self.request.get_type() not in ["http", "https"]:
+            raise BrowserStateError("can't set cookie for non-HTTP/HTTPS "
+                                    "transactions")
+        cookiejar = self._ua_handlers["_cookies"].cookiejar
+        response = self.response()  # copy
+        headers = response.info()
+        headers["Set-cookie"] = cookie_string
+        cookiejar.extract_cookies(response, self.request)
+
+    def links(self, **kwds):
+        """Return iterable over links (mechanize.Link objects)."""
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        links = self._factory.links()
+        if kwds:
+            return self._filter_links(links, **kwds)
+        else:
+            return links
+
+    def forms(self):
+        """Return iterable over forms.
+
+        The returned form objects implement the ClientForm.HTMLForm interface.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        return self._factory.forms()
+
+    def global_form(self):
+        """Return the global form object, or None if the factory implementation
+        did not supply one.
+
+        The "global" form object contains all controls that are not descendants of
+        any FORM element.
+
+        The returned form object implements the ClientForm.HTMLForm interface.
+
+        This is a separate method since the global form is not regarded as part
+        of the sequence of forms in the document -- mostly for
+        backwards-compatibility.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        return self._factory.global_form
+
+    def viewing_html(self):
+        """Return whether the current response contains HTML data."""
+        if self._response is None:
+            raise BrowserStateError("not viewing any document")
+        return self._factory.is_html
+
+    def encoding(self):
+        """"""
+        if self._response is None:
+            raise BrowserStateError("not viewing any document")
+        return self._factory.encoding
+
+    def title(self):
+        """Return title, or None if there is no title element in the document.
+
+        Tags are stripped or textified as described in docs for
+        PullParser.get_text() method of pullparser module.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        return self._factory.title
+
+    def select_form(self, name=None, predicate=None, nr=None):
+        """Select an HTML form for input.
+
+        This is a bit like giving a form the "input focus" in a browser.
+
+        If a form is selected, the Browser object supports the HTMLForm
+        interface, so you can call methods like .set_value(), .set(), and
+        .click().
+
+        Another way to select a form is to assign to the .form attribute.  The
+        form assigned should be one of the objects returned by the .forms()
+        method.
+
+        At least one of the name, predicate and nr arguments must be supplied.
+        If no matching form is found, mechanize.FormNotFoundError is raised.
+
+        If name is specified, then the form must have the indicated name.
+
+        If predicate is specified, then the form must match that function.  The
+        predicate function is passed the HTMLForm as its single argument, and
+        should return a boolean value indicating whether the form matched.
+
+        nr, if supplied, is the sequence number of the form (where 0 is the
+        first).  Note that control 0 is the first form matching all the other
+        arguments (if supplied); it is not necessarily the first control in the
+        form.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        if (name is None) and (predicate is None) and (nr is None):
+            raise ValueError(
+                "at least one argument must be supplied to specify form")
+
+        orig_nr = nr
+        for form in self.forms():
+            if name is not None and name != form.name:
+                continue
+            if predicate is not None and not predicate(form):
+                continue
+            if nr:
+                nr -= 1
+                continue
+            self.form = form
+            break  # success
+        else:
+            # failure
+            description = []
+            if name is not None: description.append("name '%s'" % name)
+            if predicate is not None:
+                description.append("predicate %s" % predicate)
+            if orig_nr is not None: description.append("nr %d" % orig_nr)
+            description = ", ".join(description)
+            raise FormNotFoundError("no form matching "+description)
+
+    def click(self, *args, **kwds):
+        """See ClientForm.HTMLForm.click for documentation."""
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        request = self.form.click(*args, **kwds)
+        return self._add_referer_header(request)
+
+    def submit(self, *args, **kwds):
+        """Submit current form.
+
+        Arguments are as for ClientForm.HTMLForm.click().
+
+        Return value is same as for Browser.open().
+
+        """
+        return self.open(self.click(*args, **kwds))
+
+    def click_link(self, link=None, **kwds):
+        """Find a link and return a Request object for it.
+
+        Arguments are as for .find_link(), except that a link may be supplied
+        as the first argument.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        if not link:
+            link = self.find_link(**kwds)
+        else:
+            if kwds:
+                raise ValueError(
+                    "either pass a Link, or keyword arguments, not both")
+        request = self.request_class(link.absolute_url)
+        return self._add_referer_header(request)
+
+    def follow_link(self, link=None, **kwds):
+        """Find a link and .open() it.
+
+        Arguments are as for .click_link().
+
+        Return value is same as for Browser.open().
+
+        """
+        return self.open(self.click_link(link, **kwds))
+
+    def find_link(self, **kwds):
+        """Find a link in current page.
+
+        Links are returned as mechanize.Link objects.
+
+        # Return third link that .search()-matches the regexp "python"
+        # (by ".search()-matches", I mean that the regular expression method
+        # .search() is used, rather than .match()).
+        find_link(text_regex=re.compile("python"), nr=2)
+
+        # Return first http link in the current page that points to somewhere
+        # on python.org whose link text (after tags have been removed) is
+        # exactly "monty python".
+        find_link(text="monty python",
+                  url_regex=re.compile("http.*python.org"))
+
+        # Return first link with exactly three HTML attributes.
+        find_link(predicate=lambda link: len(link.attrs) == 3)
+
+        Links include anchors (<a>), image maps (<area>), and frames (<frame>,
+        <iframe>).
+
+        All arguments must be passed by keyword, not position.  Zero or more
+        arguments may be supplied.  In order to find a link, all arguments
+        supplied must match.
+
+        If a matching link is not found, mechanize.LinkNotFoundError is raised.
+
+        text: link text between link tags: eg. <a href="blah">this bit</a> (as
+         returned by pullparser.get_compressed_text(), ie. without tags but
+         with opening tags "textified" as per the pullparser docs) must compare
+         equal to this argument, if supplied
+        text_regex: link text between tag (as defined above) must match the
+         regular expression object or regular expression string passed as this
+         argument, if supplied
+        name, name_regex: as for text and text_regex, but matched against the
+         name HTML attribute of the link tag
+        url, url_regex: as for text and text_regex, but matched against the
+         URL of the link tag (note this matches against Link.url, which is a
+         relative or absolute URL according to how it was written in the HTML)
+        tag: element name of opening tag, eg. "a"
+        predicate: a function taking a Link object as its single argument,
+         returning a boolean result, indicating whether the links
+        nr: matches the nth link that matches all other criteria (default 0)
+
+        """
+        try:
+            return self._filter_links(self._factory.links(), **kwds).next()
+        except StopIteration:
+            raise LinkNotFoundError()
+
+    def __getattr__(self, name):
+        # pass through ClientForm / DOMForm methods and attributes
+        form = self.__dict__.get("form")
+        if form is None:
+            raise AttributeError(
+                "%s instance has no attribute %s (perhaps you forgot to "
+                ".select_form()?)" % (self.__class__, name))
+        return getattr(form, name)
+
+    def _filter_links(self, links,
+                    text=None, text_regex=None,
+                    name=None, name_regex=None,
+                    url=None, url_regex=None,
+                    tag=None,
+                    predicate=None,
+                    nr=0
+                    ):
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+
+        found_links = []
+        orig_nr = nr
+
+        for link in links:
+            if url is not None and url != link.url:
+                continue
+            if url_regex is not None and not re.search(url_regex, link.url):
+                continue
+            if (text is not None and
+                (link.text is None or text != link.text)):
+                continue
+            if (text_regex is not None and
+                (link.text is None or not re.search(text_regex, link.text))):
+                continue
+            if name is not None and name != dict(link.attrs).get("name"):
+                continue
+            if name_regex is not None:
+                link_name = dict(link.attrs).get("name")
+                if link_name is None or not re.search(name_regex, link_name):
+                    continue
+            if tag is not None and tag != link.tag:
+                continue
+            if predicate is not None and not predicate(link):
+                continue
+            if nr:
+                nr -= 1
+                continue
+            yield link
+            nr = orig_nr
diff --git a/src/calibre/utils/mechanize/_mozillacookiejar.py b/src/calibre/utils/mechanize/_mozillacookiejar.py
new file mode 100644
index 0000000000..8dbdb2031f
--- /dev/null
+++ b/src/calibre/utils/mechanize/_mozillacookiejar.py
@@ -0,0 +1,159 @@
+"""Mozilla / Netscape cookie loading / saving.
+
+Copyright 2002-2006 John J Lee <jjl@pobox.com>
+Copyright 1997-1999 Gisle Aas (original libwww-perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import re, time, logging
+
+from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
+     MISSING_FILENAME_TEXT, LoadError
+debug = logging.getLogger("ClientCookie").debug
+
+
+class MozillaCookieJar(FileCookieJar):
+    """
+
+    WARNING: you may want to backup your browser's cookies file if you use
+    this class to save cookies.  I *think* it works, but there have been
+    bugs in the past!
+
+    This class differs from CookieJar only in the format it uses to save and
+    load cookies to and from a file.  This class uses the Mozilla/Netscape
+    `cookies.txt' format.  lynx uses this file format, too.
+
+    Don't expect cookies saved while the browser is running to be noticed by
+    the browser (in fact, Mozilla on unix will overwrite your saved cookies if
+    you change them on disk while it's running; on Windows, you probably can't
+    save at all while the browser is running).
+
+    Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
+    Netscape cookies on saving.
+
+    In particular, the cookie version and port number information is lost,
+    together with information about whether or not Path, Port and Discard were
+    specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
+    domain as set in the HTTP header started with a dot (yes, I'm aware some
+    domains in Netscape files start with a dot and some don't -- trust me, you
+    really don't want to know any more about this).
+
+    Note that though Mozilla and Netscape use the same format, they use
+    slightly different headers.  The class saves cookies using the Netscape
+    header by default (Mozilla can cope with that).
+
+    """
+    magic_re = "#( Netscape)? HTTP Cookie File"
+    header = """\
+    # Netscape HTTP Cookie File
+    # http://www.netscape.com/newsref/std/cookie_spec.html
+    # This is a generated file!  Do not edit.
+
+"""
+
+    def _really_load(self, f, filename, ignore_discard, ignore_expires):
+        now = time.time()
+
+        magic = f.readline()
+        if not re.search(self.magic_re, magic):
+            f.close()
+            raise LoadError(
+                "%s does not look like a Netscape format cookies file" %
+                filename)
+
+        try:
+            while 1:
+                line = f.readline()
+                if line == "": break
+
+                # last field may be absent, so keep any trailing tab
+                if line.endswith("\n"): line = line[:-1]
+
+                # skip comments and blank lines XXX what is $ for?
+                if (line.strip().startswith("#") or
+                    line.strip().startswith("$") or
+                    line.strip() == ""):
+                    continue
+
+                domain, domain_specified, path, secure, expires, name, value = \
+                        line.split("\t")
+                secure = (secure == "TRUE")
+                domain_specified = (domain_specified == "TRUE")
+                if name == "":
+                    name = value
+                    value = None
+
+                initial_dot = domain.startswith(".")
+                assert domain_specified == initial_dot
+
+                discard = False
+                if expires == "":
+                    expires = None
+                    discard = True
+
+                # assume path_specified is false
+                c = Cookie(0, name, value,
+                           None, False,
+                           domain, domain_specified, initial_dot,
+                           path, False,
+                           secure,
+                           expires,
+                           discard,
+                           None,
+                           None,
+                           {})
+                if not ignore_discard and c.discard:
+                    continue
+                if not ignore_expires and c.is_expired(now):
+                    continue
+                self.set_cookie(c)
+
+        except:
+            reraise_unmasked_exceptions((IOError,))
+            raise LoadError("invalid Netscape format file %s: %s" %
+                          (filename, line))
+
+    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+        if filename is None:
+            if self.filename is not None: filename = self.filename
+            else: raise ValueError(MISSING_FILENAME_TEXT)
+
+        f = open(filename, "w")
+        try:
+            debug("Saving Netscape cookies.txt file")
+            f.write(self.header)
+            now = time.time()
+            for cookie in self:
+                if not ignore_discard and cookie.discard:
+                    debug("   Not saving %s: marked for discard", cookie.name)
+                    continue
+                if not ignore_expires and cookie.is_expired(now):
+                    debug("   Not saving %s: expired", cookie.name)
+                    continue
+                if cookie.secure: secure = "TRUE"
+                else: secure = "FALSE"
+                if cookie.domain.startswith("."): initial_dot = "TRUE"
+                else: initial_dot = "FALSE"
+                if cookie.expires is not None:
+                    expires = str(cookie.expires)
+                else:
+                    expires = ""
+                if cookie.value is None:
+                    # cookies.txt regards 'Set-Cookie: foo' as a cookie
+                    # with no name, whereas cookielib regards it as a
+                    # cookie with no value.
+                    name = ""
+                    value = cookie.name
+                else:
+                    name = cookie.name
+                    value = cookie.value
+                f.write(
+                    "\t".join([cookie.domain, initial_dot, cookie.path,
+                               secure, expires, name, value])+
+                    "\n")
+        finally:
+            f.close()
diff --git a/src/calibre/utils/mechanize/_msiecookiejar.py b/src/calibre/utils/mechanize/_msiecookiejar.py
new file mode 100644
index 0000000000..f590a844ef
--- /dev/null
+++ b/src/calibre/utils/mechanize/_msiecookiejar.py
@@ -0,0 +1,387 @@
+"""Microsoft Internet Explorer cookie loading on Windows.
+
+Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
+Copyright 2002-2006 John J Lee <jjl@pobox.com> (The Python port)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+# XXX names and comments are not great here
+
+import os, re, time, struct, logging
+if os.name == "nt":
+    import _winreg
+
+from _clientcookie import FileCookieJar, CookieJar, Cookie, \
+     MISSING_FILENAME_TEXT, LoadError
+
+debug = logging.getLogger("mechanize").debug
+
+
+def regload(path, leaf):
+    key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0,
+                          _winreg.KEY_ALL_ACCESS)
+    try:
+        value = _winreg.QueryValueEx(key, leaf)[0]
+    except WindowsError:
+        value = None
+    return value
+
+WIN32_EPOCH = 0x019db1ded53e8000L  # 1970 Jan 01 00:00:00 in Win32 FILETIME
+
+def epoch_time_offset_from_win32_filetime(filetime):
+    """Convert from win32 filetime to seconds-since-epoch value.
+
+    MSIE stores create and expire times as Win32 FILETIME, which is 64
+    bits of 100 nanosecond intervals since Jan 01 1601.
+
+    mechanize expects time in 32-bit value expressed in seconds since the
+    epoch (Jan 01 1970).
+
+    """
+    if filetime < WIN32_EPOCH:
+        raise ValueError("filetime (%d) is before epoch (%d)" %
+                         (filetime, WIN32_EPOCH))
+
+    return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
+
+def binary_to_char(c): return "%02X" % ord(c)
+def binary_to_str(d): return "".join(map(binary_to_char, list(d)))
+
+class MSIEBase:
+    magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
+    padding = "\x0d\xf0\xad\x0b"
+
+    msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
+    cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
+                           "(.+\@[\x21-\xFF]+\.txt)")
+
+    # path under HKEY_CURRENT_USER from which to get location of index.dat
+    reg_path = r"software\microsoft\windows" \
+               r"\currentversion\explorer\shell folders"
+    reg_key = "Cookies"
+
+    def __init__(self):
+        self._delayload_domains = {}
+
+    def _delayload_domain(self, domain):
+        # if necessary, lazily load cookies for this domain
+        delayload_info = self._delayload_domains.get(domain)
+        if delayload_info is not None:
+            cookie_file, ignore_discard, ignore_expires = delayload_info
+            try:
+                self.load_cookie_data(cookie_file,
+                                      ignore_discard, ignore_expires)
+            except (LoadError, IOError):
+                debug("error reading cookie file, skipping: %s", cookie_file)
+            else:
+                del self._delayload_domains[domain]
+
+    def _load_cookies_from_file(self, filename):
+        debug("Loading MSIE cookies file: %s", filename)
+        cookies = []
+
+        cookies_fh = open(filename)
+
+        try:
+            while 1:
+                key = cookies_fh.readline()
+                if key == "": break
+
+                rl = cookies_fh.readline
+                def getlong(rl=rl): return long(rl().rstrip())
+                def getstr(rl=rl): return rl().rstrip()
+
+                key = key.rstrip()
+                value = getstr()
+                domain_path = getstr()
+                flags = getlong()  # 0x2000 bit is for secure I think
+                lo_expire = getlong()
+                hi_expire = getlong()
+                lo_create = getlong()
+                hi_create = getlong()
+                sep = getstr()
+
+                if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
+                          hi_create, lo_create, sep) or (sep != "*"):
+                    break
+
+                m = self.msie_domain_re.search(domain_path)
+                if m:
+                    domain = m.group(1)
+                    path = m.group(2)
+
+                    cookies.append({"KEY": key, "VALUE": value, "DOMAIN": domain,
+                                    "PATH": path, "FLAGS": flags, "HIXP": hi_expire,
+                                    "LOXP": lo_expire, "HICREATE": hi_create,
+                                    "LOCREATE": lo_create})
+        finally:
+            cookies_fh.close()
+
+        return cookies
+
+    def load_cookie_data(self, filename,
+                         ignore_discard=False, ignore_expires=False):
+        """Load cookies from file containing actual cookie data.
+
+        Old cookies are kept unless overwritten by newly loaded ones.
+
+        You should not call this method if the delayload attribute is set.
+
+        I think each of these files contain all cookies for one user, domain,
+        and path.
+
+        filename: file containing cookies -- usually found in a file like
+         C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
+
+        """
+        now = int(time.time())
+
+        cookie_data = self._load_cookies_from_file(filename)
+
+        for cookie in cookie_data:
+            flags = cookie["FLAGS"]
+            secure = ((flags & 0x2000) != 0)
+            filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
+            expires = epoch_time_offset_from_win32_filetime(filetime)
+            if expires < now:
+                discard = True
+            else:
+                discard = False
+            domain = cookie["DOMAIN"]
+            initial_dot = domain.startswith(".")
+            if initial_dot:
+                domain_specified = True
+            else:
+                # MSIE 5 does not record whether the domain cookie-attribute
+                # was specified.
+                # Assuming it wasn't is conservative, because with strict
+                # domain matching this will match less frequently; with regular
+                # Netscape tail-matching, this will match at exactly the same
+                # times that domain_specified = True would.  It also means we
+                # don't have to prepend a dot to achieve consistency with our
+                # own & Mozilla's domain-munging scheme.
+                domain_specified = False
+
+            # assume path_specified is false
+            # XXX is there other stuff in here? -- eg. comment, commentURL?
+            c = Cookie(0,
+                       cookie["KEY"], cookie["VALUE"],
+                       None, False,
+                       domain, domain_specified, initial_dot,
+                       cookie["PATH"], False,
+                       secure,
+                       expires,
+                       discard,
+                       None,
+                       None,
+                       {"flags": flags})
+            if not ignore_discard and c.discard:
+                continue
+            if not ignore_expires and c.is_expired(now):
+                continue
+            CookieJar.set_cookie(self, c)
+
+    def load_from_registry(self, ignore_discard=False, ignore_expires=False,
+                           username=None):
+        """
+        username: only required on win9x
+
+        """
+        cookies_dir = regload(self.reg_path, self.reg_key)
+        filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
+        self.load(filename, ignore_discard, ignore_expires, username)
+
+    def _really_load(self, index, filename, ignore_discard, ignore_expires,
+                     username):
+        now = int(time.time())
+
+        if username is None:
+            username = os.environ['USERNAME'].lower()
+
+        cookie_dir = os.path.dirname(filename)
+
+        data = index.read(256)
+        if len(data) != 256:
+            raise LoadError("%s file is too short" % filename)
+
+        # Cookies' index.dat file starts with 32 bytes of signature
+        # followed by an offset to the first record, stored as a little-
+        # endian DWORD.
+        sig, size, data = data[:32], data[32:36], data[36:]
+        size = struct.unpack("<L", size)[0]
+
+        # check that sig is valid
+        if not self.magic_re.match(sig) or size != 0x4000:
+            raise LoadError("%s ['%s' %s] does not seem to contain cookies" %
+                          (str(filename), sig, size))
+
+        # skip to start of first record
+        index.seek(size, 0)
+
+        sector = 128  # size of sector in bytes
+
+        while 1:
+            data = ""
+
+            # Cookies are usually in two contiguous sectors, so read in two
+            # sectors and adjust if not a Cookie.
+            to_read = 2 * sector
+            d = index.read(to_read)
+            if len(d) != to_read:
+                break
+            data = data + d
+
+            # Each record starts with a 4-byte signature and a count
+            # (little-endian DWORD) of sectors for the record.
+            sig, size, data = data[:4], data[4:8], data[8:]
+            size = struct.unpack("<L", size)[0]
+
+            to_read = (size - 2) * sector
+
+##             from urllib import quote
+##             print "data", quote(data)
+##             print "sig", quote(sig)
+##             print "size in sectors", size
+##             print "size in bytes", size*sector
+##             print "size in units of 16 bytes", (size*sector) / 16
+##             print "size to read in bytes", to_read
+##             print
+
+            if sig != "URL ":
+                assert (sig in ("HASH", "LEAK",
+                                self.padding, "\x00\x00\x00\x00"),
+                        "unrecognized MSIE index.dat record: %s" %
+                        binary_to_str(sig))
+                if sig == "\x00\x00\x00\x00":
+                    # assume we've got all the cookies, and stop
+                    break
+                if sig == self.padding:
+                    continue
+                # skip the rest of this record
+                assert to_read >= 0
+                if size != 2:
+                    assert to_read != 0
+                    index.seek(to_read, 1)
+                continue
+
+            # read in rest of record if necessary
+            if size > 2:
+                more_data = index.read(to_read)
+                if len(more_data) != to_read: break
+                data = data + more_data
+
+            cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
+                         "(%s\@[\x21-\xFF]+\.txt)" % username)
+            m = re.search(cookie_re, data, re.I)
+            if m:
+                cookie_file = os.path.join(cookie_dir, m.group(2))
+                if not self.delayload:
+                    try:
+                        self.load_cookie_data(cookie_file,
+                                              ignore_discard, ignore_expires)
+                    except (LoadError, IOError):
+                        debug("error reading cookie file, skipping: %s",
+                              cookie_file)
+                else:
+                    domain = m.group(1)
+                    i = domain.find("/")
+                    if i != -1:
+                        domain = domain[:i]
+
+                    self._delayload_domains[domain] = (
+                        cookie_file, ignore_discard, ignore_expires)
+
+
+class MSIECookieJar(MSIEBase, FileCookieJar):
+    """FileCookieJar that reads from the Windows MSIE cookies database.
+
+    MSIECookieJar can read the cookie files of Microsoft Internet Explorer
+    (MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and
+    Windows 98.  Other configurations may also work, but are untested.  Saving
+    cookies in MSIE format is NOT supported.  If you save cookies, they'll be
+    in the usual Set-Cookie3 format, which you can read back in using an
+    instance of the plain old CookieJar class.  Don't save using the same
+    filename that you loaded cookies from, because you may succeed in
+    clobbering your MSIE cookies index file!
+
+    You should be able to have LWP share Internet Explorer's cookies like
+    this (note you need to supply a username to load_from_registry if you're on
+    Windows 9x or Windows ME):
+
+    cj = MSIECookieJar(delayload=1)
+    # find cookies index file in registry and load cookies from it
+    cj.load_from_registry()
+    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
+    response = opener.open("http://example.com/")
+
+    Iterating over a delayloaded MSIECookieJar instance will not cause any
+    cookies to be read from disk.  To force reading of all cookies from disk,
+    call read_all_cookies.  Note that the following methods iterate over self:
+    clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
+    and as_string.
+
+    Additional methods:
+
+    load_from_registry(ignore_discard=False, ignore_expires=False,
+                       username=None)
+    load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
+    read_all_cookies()
+
+    """
+    def __init__(self, filename=None, delayload=False, policy=None):
+        MSIEBase.__init__(self)
+        FileCookieJar.__init__(self, filename, delayload, policy)
+
+    def set_cookie(self, cookie):
+        if self.delayload:
+            self._delayload_domain(cookie.domain)
+        CookieJar.set_cookie(self, cookie)
+
+    def _cookies_for_request(self, request):
+        """Return a list of cookies to be returned to server."""
+        domains = self._cookies.copy()
+        domains.update(self._delayload_domains)
+        domains = domains.keys()
+
+        cookies = []
+        for domain in domains:
+            cookies.extend(self._cookies_for_domain(domain, request))
+        return cookies
+
+    def _cookies_for_domain(self, domain, request):
+        if not self._policy.domain_return_ok(domain, request):
+            return []
+        debug("Checking %s for cookies to return", domain)
+        if self.delayload:
+            self._delayload_domain(domain)
+        return CookieJar._cookies_for_domain(self, domain, request)
+
+    def read_all_cookies(self):
+        """Eagerly read in all cookies."""
+        if self.delayload:
+            for domain in self._delayload_domains.keys():
+                self._delayload_domain(domain)
+
+    def load(self, filename, ignore_discard=False, ignore_expires=False,
+             username=None):
+        """Load cookies from an MSIE 'index.dat' cookies index file.
+
+        filename: full path to cookie index file
+        username: only required on win9x
+
+        """
+        if filename is None:
+            if self.filename is not None: filename = self.filename
+            else: raise ValueError(MISSING_FILENAME_TEXT)
+
+        index = open(filename, "rb")
+
+        try:
+            self._really_load(index, filename, ignore_discard, ignore_expires,
+                              username)
+        finally:
+            index.close()
diff --git a/src/calibre/utils/mechanize/_opener.py b/src/calibre/utils/mechanize/_opener.py
new file mode 100644
index 0000000000..145350f124
--- /dev/null
+++ b/src/calibre/utils/mechanize/_opener.py
@@ -0,0 +1,421 @@
+"""Integration with Python standard library module urllib2: OpenerDirector
+class.
+
+Copyright 2004-2006 John J Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import os, urllib2, bisect, urllib, httplib, types, tempfile
+try:
+    import threading as _threading
+except ImportError:
+    import dummy_threading as _threading
+try:
+    set
+except NameError:
+    import sets
+    set = sets.Set
+
+import _http
+import _upgrade
+import _rfc3986
+import _response
+from _util import isstringlike
+from _request import Request
+
+
+class ContentTooShortError(urllib2.URLError):
+    def __init__(self, reason, result):
+        urllib2.URLError.__init__(self, reason)
+        self.result = result
+
+
+class OpenerDirector(urllib2.OpenerDirector):
+    def __init__(self):
+        urllib2.OpenerDirector.__init__(self)
+        # really none of these are (sanely) public -- the lack of initial
+        # underscore on some is just due to following urllib2
+        self.process_response = {}
+        self.process_request = {}
+        self._any_request = {}
+        self._any_response = {}
+        self._handler_index_valid = True
+        self._tempfiles = []
+
+    def add_handler(self, handler):
+        if handler in self.handlers:
+            return
+        # XXX why does self.handlers need to be sorted?
+        bisect.insort(self.handlers, handler)
+        handler.add_parent(self)
+        self._handler_index_valid = False
+
+    def _maybe_reindex_handlers(self):
+        if self._handler_index_valid:
+            return
+
+        handle_error = {}
+        handle_open = {}
+        process_request = {}
+        process_response = {}
+        any_request = set()
+        any_response = set()
+        unwanted = []
+
+        for handler in self.handlers:
+            added = False
+            for meth in dir(handler):
+                if meth in ["redirect_request", "do_open", "proxy_open"]:
+                    # oops, coincidental match
+                    continue
+
+                if meth == "any_request":
+                    any_request.add(handler)
+                    added = True
+                    continue
+                elif meth == "any_response":
+                    any_response.add(handler)
+                    added = True
+                    continue
+
+                ii = meth.find("_")
+                scheme = meth[:ii]
+                condition = meth[ii+1:]
+
+                if condition.startswith("error"):
+                    jj = meth[ii+1:].find("_") + ii + 1
+                    kind = meth[jj+1:]
+                    try:
+                        kind = int(kind)
+                    except ValueError:
+                        pass
+                    lookup = handle_error.setdefault(scheme, {})
+                elif condition == "open":
+                    kind = scheme
+                    lookup = handle_open
+                elif condition == "request":
+                    kind = scheme
+                    lookup = process_request
+                elif condition == "response":
+                    kind = scheme
+                    lookup = process_response
+                else:
+                    continue
+
+                lookup.setdefault(kind, set()).add(handler)
+                added = True
+
+            if not added:
+                unwanted.append(handler)
+
+        for handler in unwanted:
+            self.handlers.remove(handler)
+
+        # sort indexed methods
+        # XXX could be cleaned up
+        for lookup in [process_request, process_response]:
+            for scheme, handlers in lookup.iteritems():
+                lookup[scheme] = handlers
+        for scheme, lookup in handle_error.iteritems():
+            for code, handlers in lookup.iteritems():
+                handlers = list(handlers)
+                handlers.sort()
+                lookup[code] = handlers
+        for scheme, handlers in handle_open.iteritems():
+            handlers = list(handlers)
+            handlers.sort()
+            handle_open[scheme] = handlers
+
+        # cache the indexes
+        self.handle_error = handle_error
+        self.handle_open = handle_open
+        self.process_request = process_request
+        self.process_response = process_response
+        self._any_request = any_request
+        self._any_response = any_response
+
+    def _request(self, url_or_req, data, visit):
+        if isstringlike(url_or_req):
+            req = Request(url_or_req, data, visit=visit)
+        else:
+            # already a urllib2.Request or mechanize.Request instance
+            req = url_or_req
+            if data is not None:
+                req.add_data(data)
+            # XXX yuck, give request a .visit attribute if it doesn't have one
+            try:
+                req.visit
+            except AttributeError:
+                req.visit = None
+            if visit is not None:
+                req.visit = visit
+        return req
+
+    def open(self, fullurl, data=None):
+        req = self._request(fullurl, data, None)
+        req_scheme = req.get_type()
+
+        self._maybe_reindex_handlers()
+
+        # pre-process request
+        # XXX should we allow a Processor to change the URL scheme
+        #   of the request?
+        request_processors = set(self.process_request.get(req_scheme, []))
+        request_processors.update(self._any_request)
+        request_processors = list(request_processors)
+        request_processors.sort()
+        for processor in request_processors:
+            for meth_name in ["any_request", req_scheme+"_request"]:
+                meth = getattr(processor, meth_name, None)
+                if meth:
+                    req = meth(req)
+
+        # In Python >= 2.4, .open() supports processors already, so we must
+        # call ._open() instead.
+        urlopen = getattr(urllib2.OpenerDirector, "_open",
+                          urllib2.OpenerDirector.open)
+        response = urlopen(self, req, data)
+
+        # post-process response
+        response_processors = set(self.process_response.get(req_scheme, []))
+        response_processors.update(self._any_response)
+        response_processors = list(response_processors)
+        response_processors.sort()
+        for processor in response_processors:
+            for meth_name in ["any_response", req_scheme+"_response"]:
+                meth = getattr(processor, meth_name, None)
+                if meth:
+                    response = meth(req, response)
+
+        return response
+
+    def error(self, proto, *args):
+        if proto in ['http', 'https']:
+            # XXX http[s] protocols are special-cased
+            dict = self.handle_error['http'] # https is not different than http
+            proto = args[2]  # YUCK!
+            meth_name = 'http_error_%s' % proto
+            http_err = 1
+            orig_args = args
+        else:
+            dict = self.handle_error
+            meth_name = proto + '_error'
+            http_err = 0
+        args = (dict, proto, meth_name) + args
+        result = apply(self._call_chain, args)
+        if result:
+            return result
+
+        if http_err:
+            args = (dict, 'default', 'http_error_default') + orig_args
+            return apply(self._call_chain, args)
+
+    BLOCK_SIZE = 1024*8
+    def retrieve(self, fullurl, filename=None, reporthook=None, data=None):
+        """Returns (filename, headers).
+
+        For remote objects, the default filename will refer to a temporary
+        file.  Temporary files are removed when the OpenerDirector.close()
+        method is called.
+
+        For file: URLs, at present the returned filename is None.  This may
+        change in future.
+
+        If the actual number of bytes read is less than indicated by the
+        Content-Length header, raises ContentTooShortError (a URLError
+        subclass).  The exception's .result attribute contains the (filename,
+        headers) that would have been returned.
+
+        """
+        req = self._request(fullurl, data, False)
+        scheme = req.get_type()
+        fp = self.open(req)
+        headers = fp.info()
+        if filename is None and scheme == 'file':
+            # XXX req.get_selector() seems broken here, return None,
+            #   pending sanity :-/
+            return None, headers
+            #return urllib.url2pathname(req.get_selector()), headers
+        if filename:
+            tfp = open(filename, 'wb')
+        else:
+            path = _rfc3986.urlsplit(fullurl)[2]
+            suffix = os.path.splitext(path)[1]
+            fd, filename = tempfile.mkstemp(suffix)
+            self._tempfiles.append(filename)
+            tfp = os.fdopen(fd, 'wb')
+
+        result = filename, headers
+        bs = self.BLOCK_SIZE
+        size = -1
+        read = 0
+        blocknum = 0
+        if reporthook:
+            if "content-length" in headers:
+                size = int(headers["Content-Length"])
+            reporthook(blocknum, bs, size)
+        while 1:
+            block = fp.read(bs)
+            if block == "":
+                break
+            read += len(block)
+            tfp.write(block)
+            blocknum += 1
+            if reporthook:
+                reporthook(blocknum, bs, size)
+        fp.close()
+        tfp.close()
+        del fp
+        del tfp
+
+        # raise exception if actual size does not match content-length header
+        if size >= 0 and read < size:
+            raise ContentTooShortError(
+                "retrieval incomplete: "
+                "got only %i out of %i bytes" % (read, size),
+                result
+                )
+
+        return result
+
+    def close(self):
+        urllib2.OpenerDirector.close(self)
+
+        # make it very obvious this object is no longer supposed to be used
+        self.open = self.error = self.retrieve = self.add_handler = None
+
+        if self._tempfiles:
+            for filename in self._tempfiles:
+                try:
+                    os.unlink(filename)
+                except OSError:
+                    pass
+            del self._tempfiles[:]
+
+
+def wrapped_open(urlopen, process_response_object, fullurl, data=None):
+    success = True
+    try:
+        response = urlopen(fullurl, data)
+    except urllib2.HTTPError, error:
+        success = False
+        if error.fp is None:  # not a response
+            raise
+        response = error
+
+    if response is not None:
+        response = process_response_object(response)
+
+    if not success:
+        raise response
+    return response
+
+class ResponseProcessingOpener(OpenerDirector):
+
+    def open(self, fullurl, data=None):
+        def bound_open(fullurl, data=None):
+            return OpenerDirector.open(self, fullurl, data)
+        return wrapped_open(
+            bound_open, self.process_response_object, fullurl, data)
+
+    def process_response_object(self, response):
+        return response
+
+
+class SeekableResponseOpener(ResponseProcessingOpener):
+    def process_response_object(self, response):
+        return _response.seek_wrapped_response(response)
+
+
+class OpenerFactory:
+    """This class's interface is quite likely to change."""
+
+    default_classes = [
+        # handlers
+        urllib2.ProxyHandler,
+        urllib2.UnknownHandler,
+        _http.HTTPHandler,  # derived from new AbstractHTTPHandler
+        _http.HTTPDefaultErrorHandler,
+        _http.HTTPRedirectHandler,  # bugfixed
+        urllib2.FTPHandler,
+        urllib2.FileHandler,
+        # processors
+        _upgrade.HTTPRequestUpgradeProcessor,
+        _http.HTTPCookieProcessor,
+        _http.HTTPErrorProcessor,
+        ]
+    if hasattr(httplib, 'HTTPS'):
+        default_classes.append(_http.HTTPSHandler)
+    handlers = []
+    replacement_handlers = []
+
+    def __init__(self, klass=OpenerDirector):
+        self.klass = klass
+
+    def build_opener(self, *handlers):
+        """Create an opener object from a list of handlers and processors.
+
+        The opener will use several default handlers and processors, including
+        support for HTTP and FTP.
+
+        If any of the handlers passed as arguments are subclasses of the
+        default handlers, the default handlers will not be used.
+
+        """
+        opener = self.klass()
+        default_classes = list(self.default_classes)
+        skip = []
+        for klass in default_classes:
+            for check in handlers:
+                if type(check) == types.ClassType:
+                    if issubclass(check, klass):
+                        skip.append(klass)
+                elif type(check) == types.InstanceType:
+                    if isinstance(check, klass):
+                        skip.append(klass)
+        for klass in skip:
+            default_classes.remove(klass)
+
+        for klass in default_classes:
+            opener.add_handler(klass())
+        for h in handlers:
+            if type(h) == types.ClassType:
+                h = h()
+            opener.add_handler(h)
+
+        return opener
+
+
+build_opener = OpenerFactory().build_opener
+
+_opener = None
+urlopen_lock = _threading.Lock()
+def urlopen(url, data=None):
+    global _opener
+    if _opener is None:
+        urlopen_lock.acquire()
+        try:
+            if _opener is None:
+                _opener = build_opener()
+        finally:
+            urlopen_lock.release()
+    return _opener.open(url, data)
+
+def urlretrieve(url, filename=None, reporthook=None, data=None):
+    global _opener
+    if _opener is None:
+        urlopen_lock.acquire()
+        try:
+            if _opener is None:
+                _opener = build_opener()
+        finally:
+            urlopen_lock.release()
+    return _opener.retrieve(url, filename, reporthook, data)
+
+def install_opener(opener):
+    global _opener
+    _opener = opener
diff --git a/src/calibre/utils/mechanize/_pullparser.py b/src/calibre/utils/mechanize/_pullparser.py
new file mode 100644
index 0000000000..746295d74e
--- /dev/null
+++ b/src/calibre/utils/mechanize/_pullparser.py
@@ -0,0 +1,334 @@
+"""A simple "pull API" for HTML parsing, after Perl's HTML::TokeParser.
+
+Examples
+
+This program extracts all links from a document.  It will print one
+line for each link, containing the URL and the textual description
+between the <A>...</A> tags:
+
+import pullparser, sys
+f = file(sys.argv[1])
+p = pullparser.PullParser(f)
+for token in p.tags("a"):
+    if token.type == "endtag": continue
+    url = dict(token.attrs).get("href", "-")
+    text = p.get_compressed_text(endat=("endtag", "a"))
+    print "%s\t%s" % (url, text)
+
+This program extracts the <TITLE> from the document:
+
+import pullparser, sys
+f = file(sys.argv[1])
+p = pullparser.PullParser(f)
+if p.get_tag("title"):
+    title = p.get_compressed_text()
+    print "Title: %s" % title
+
+
+Copyright 2003-2006 John J. Lee <jjl@pobox.com>
+Copyright 1998-2001 Gisle Aas (original libwww-perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses.
+
+"""
+
+import re, htmlentitydefs
+import sgmllib, HTMLParser
+
+from _html import unescape, unescape_charref
+
+
+class NoMoreTokensError(Exception): pass
+
+class Token:
+    """Represents an HTML tag, declaration, processing instruction etc.
+
+    Behaves as both a tuple-like object (ie. iterable) and has attributes
+    .type, .data and .attrs.
+
+    >>> t = Token("starttag", "a", [("href", "http://www.python.org/")])
+    >>> t == ("starttag", "a", [("href", "http://www.python.org/")])
+    True
+    >>> (t.type, t.data) == ("starttag", "a")
+    True
+    >>> t.attrs == [("href", "http://www.python.org/")]
+    True
+
+    Public attributes
+
+    type: one of "starttag", "endtag", "startendtag", "charref", "entityref",
+     "data", "comment", "decl", "pi", after the corresponding methods of
+     HTMLParser.HTMLParser
+    data: For a tag, the tag name; otherwise, the relevant data carried by the
+     tag, as a string
+    attrs: list of (name, value) pairs representing HTML attributes
+     (or None if token does not represent an opening tag)
+
+    """
+    def __init__(self, type, data, attrs=None):
+        self.type = type
+        self.data = data
+        self.attrs = attrs
+    def __iter__(self):
+        return iter((self.type, self.data, self.attrs))
+    def __eq__(self, other):
+        type, data, attrs = other
+        if (self.type == type and
+            self.data == data and
+            self.attrs == attrs):
+            return True
+        else:
+            return False
+    def __ne__(self, other): return not self.__eq__(other)
+    def __repr__(self):
+        args = ", ".join(map(repr, [self.type, self.data, self.attrs]))
+        return self.__class__.__name__+"(%s)" % args
+
+def iter_until_exception(fn, exception, *args, **kwds):
+    while 1:
+        try:
+            yield fn(*args, **kwds)
+        except exception:
+            raise StopIteration
+
+
+class _AbstractParser:
+    chunk = 1024
+    compress_re = re.compile(r"\s+")
+    def __init__(self, fh, textify={"img": "alt", "applet": "alt"},
+                 encoding="ascii", entitydefs=None):
+        """
+        fh: file-like object (only a .read() method is required) from which to
+         read HTML to be parsed
+        textify: mapping used by .get_text() and .get_compressed_text() methods
+         to represent opening tags as text
+        encoding: encoding used to encode numeric character references by
+         .get_text() and .get_compressed_text() ("ascii" by default)
+
+        entitydefs: mapping like {"amp": "&", ...} containing HTML entity
+         definitions (a sensible default is used).  This is used to unescape
+         entities in .get_text() (and .get_compressed_text()) and attribute
+         values.  If the encoding can not represent the character, the entity
+         reference is left unescaped.  Note that entity references (both
+         numeric - e.g. &#123; or &#xabc; - and non-numeric - e.g. &amp;) are
+         unescaped in attribute values and the return value of .get_text(), but
+         not in data outside of tags.  Instead, entity references outside of
+         tags are represented as tokens.  This is a bit odd, it's true :-/
+
+        If the element name of an opening tag matches a key in the textify
+        mapping then that tag is converted to text.  The corresponding value is
+        used to specify which tag attribute to obtain the text from.  textify
+        maps from element names to either:
+
+          - an HTML attribute name, in which case the HTML attribute value is
+            used as its text value along with the element name in square
+            brackets (eg."alt text goes here[IMG]", or, if the alt attribute
+            were missing, just "[IMG]")
+          - a callable object (eg. a function) which takes a Token and returns
+            the string to be used as its text value
+
+        If textify has no key for an element name, nothing is substituted for
+        the opening tag.
+
+        Public attributes:
+
+        encoding and textify: see above
+
+        """
+        self._fh = fh
+        self._tokenstack = []  # FIFO
+        self.textify = textify
+        self.encoding = encoding
+        if entitydefs is None:
+            entitydefs = htmlentitydefs.name2codepoint
+        self._entitydefs = entitydefs
+
+    def __iter__(self): return self
+
+    def tags(self, *names):
+        return iter_until_exception(self.get_tag, NoMoreTokensError, *names)
+
+    def tokens(self, *tokentypes):
+        return iter_until_exception(self.get_token, NoMoreTokensError, *tokentypes)
+
+    def next(self):
+        try:
+            return self.get_token()
+        except NoMoreTokensError:
+            raise StopIteration()
+
+    def get_token(self, *tokentypes):
+        """Pop the next Token object from the stack of parsed tokens.
+
+        If arguments are given, they are taken to be token types in which the
+        caller is interested: tokens representing other elements will be
+        skipped.  Element names must be given in lower case.
+
+        Raises NoMoreTokensError.
+
+        """
+        while 1:
+            while self._tokenstack:
+                token = self._tokenstack.pop(0)
+                if tokentypes:
+                    if token.type in tokentypes:
+                        return token
+                else:
+                    return token
+            data = self._fh.read(self.chunk)
+            if not data:
+                raise NoMoreTokensError()
+            self.feed(data)
+
+    def unget_token(self, token):
+        """Push a Token back onto the stack."""
+        self._tokenstack.insert(0, token)
+
+    def get_tag(self, *names):
+        """Return the next Token that represents an opening or closing tag.
+
+        If arguments are given, they are taken to be element names in which the
+        caller is interested: tags representing other elements will be skipped.
+        Element names must be given in lower case.
+
+        Raises NoMoreTokensError.
+
+        """
+        while 1:
+            tok = self.get_token()
+            if tok.type not in ["starttag", "endtag", "startendtag"]:
+                continue
+            if names:
+                if tok.data in names:
+                    return tok
+            else:
+                return tok
+
+    def get_text(self, endat=None):
+        """Get some text.
+
+        endat: stop reading text at this tag (the tag is included in the
+         returned text); endtag is a tuple (type, name) where type is
+         "starttag", "endtag" or "startendtag", and name is the element name of
+         the tag (element names must be given in lower case)
+
+        If endat is not given, .get_text() will stop at the next opening or
+        closing tag, or when there are no more tokens (no exception is raised).
+        Note that .get_text() includes the text representation (if any) of the
+        opening tag, but pushes the opening tag back onto the stack.  As a
+        result, if you want to call .get_text() again, you need to call
+        .get_tag() first (unless you want an empty string returned when you
+        next call .get_text()).
+
+        Entity references are translated using the value of the entitydefs
+        constructor argument (a mapping from names to characters like that
+        provided by the standard module htmlentitydefs).  Named entity
+        references that are not in this mapping are left unchanged.
+
+        The textify attribute is used to translate opening tags into text: see
+        the class docstring.
+
+        """
+        text = []
+        tok = None
+        while 1:
+            try:
+                tok = self.get_token()
+            except NoMoreTokensError:
+                # unget last token (not the one we just failed to get)
+                if tok: self.unget_token(tok)
+                break
+            if tok.type == "data":
+                text.append(tok.data)
+            elif tok.type == "entityref":
+                t = unescape("&%s;"%tok.data, self._entitydefs, self.encoding)
+                text.append(t)
+            elif tok.type == "charref":
+                t = unescape_charref(tok.data, self.encoding)
+                text.append(t)
+            elif tok.type in ["starttag", "endtag", "startendtag"]:
+                tag_name = tok.data
+                if tok.type in ["starttag", "startendtag"]:
+                    alt = self.textify.get(tag_name)
+                    if alt is not None:
+                        if callable(alt):
+                            text.append(alt(tok))
+                        elif tok.attrs is not None:
+                            for k, v in tok.attrs:
+                                if k == alt:
+                                    text.append(v)
+                            text.append("[%s]" % tag_name.upper())
+                if endat is None or endat == (tok.type, tag_name):
+                    self.unget_token(tok)
+                    break
+        return "".join(text)
+
+    def get_compressed_text(self, *args, **kwds):
+        """
+        As .get_text(), but collapses each group of contiguous whitespace to a
+        single space character, and removes all initial and trailing
+        whitespace.
+
+        """
+        text = self.get_text(*args, **kwds)
+        text = text.strip()
+        return self.compress_re.sub(" ", text)
+
+    def handle_startendtag(self, tag, attrs):
+        self._tokenstack.append(Token("startendtag", tag, attrs))
+    def handle_starttag(self, tag, attrs):
+        self._tokenstack.append(Token("starttag", tag, attrs))
+    def handle_endtag(self, tag):
+        self._tokenstack.append(Token("endtag", tag))
+    def handle_charref(self, name):
+        self._tokenstack.append(Token("charref", name))
+    def handle_entityref(self, name):
+        self._tokenstack.append(Token("entityref", name))
+    def handle_data(self, data):
+        self._tokenstack.append(Token("data", data))
+    def handle_comment(self, data):
+        self._tokenstack.append(Token("comment", data))
+    def handle_decl(self, decl):
+        self._tokenstack.append(Token("decl", decl))
+    def unknown_decl(self, data):
+        # XXX should this call self.error instead?
+        #self.error("unknown declaration: " + `data`)
+        self._tokenstack.append(Token("decl", data))
+    def handle_pi(self, data):
+        self._tokenstack.append(Token("pi", data))
+
+    def unescape_attr(self, name):
+        return unescape(name, self._entitydefs, self.encoding)
+    def unescape_attrs(self, attrs):
+        escaped_attrs = []
+        for key, val in attrs:
+            escaped_attrs.append((key, self.unescape_attr(val)))
+        return escaped_attrs
+
+class PullParser(_AbstractParser, HTMLParser.HTMLParser):
+    def __init__(self, *args, **kwds):
+        HTMLParser.HTMLParser.__init__(self)
+        _AbstractParser.__init__(self, *args, **kwds)
+    def unescape(self, name):
+        # Use the entitydefs passed into constructor, not
+        # HTMLParser.HTMLParser's entitydefs.
+        return self.unescape_attr(name)
+
+class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser):
+    def __init__(self, *args, **kwds):
+        sgmllib.SGMLParser.__init__(self)
+        _AbstractParser.__init__(self, *args, **kwds)
+    def unknown_starttag(self, tag, attrs):
+        attrs = self.unescape_attrs(attrs)
+        self._tokenstack.append(Token("starttag", tag, attrs))
+    def unknown_endtag(self, tag):
+        self._tokenstack.append(Token("endtag", tag))
+
+
+def _test():
+   import doctest, _pullparser
+   return doctest.testmod(_pullparser)
+
+if __name__ == "__main__":
+   _test()
diff --git a/src/calibre/utils/mechanize/_request.py b/src/calibre/utils/mechanize/_request.py
new file mode 100644
index 0000000000..4322348b9d
--- /dev/null
+++ b/src/calibre/utils/mechanize/_request.py
@@ -0,0 +1,86 @@
+"""Integration with Python standard library module urllib2: Request class.
+
+Copyright 2004-2006 John J Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import urllib2, urllib, logging
+
+from _clientcookie import request_host
+import _rfc3986
+
+warn = logging.getLogger("mechanize").warning
+# don't complain about missing logging handler
+logging.getLogger("mechanize").setLevel(logging.ERROR)
+
+
+class Request(urllib2.Request):
+    def __init__(self, url, data=None, headers={},
+                 origin_req_host=None, unverifiable=False, visit=None):
+        # In mechanize 0.2, the interpretation of a unicode url argument will
+        # change: A unicode url argument will be interpreted as an IRI, and a
+        # bytestring as a URI. For now, we accept unicode or bytestring.  We
+        # don't insist that the value is always a URI (specifically, must only
+        # contain characters which are legal), because that might break working
+        # code (who knows what bytes some servers want to see, especially with
+        # browser plugins for internationalised URIs).
+        if not _rfc3986.is_clean_uri(url):
+            warn("url argument is not a URI "
+                 "(contains illegal characters) %r" % url)
+        urllib2.Request.__init__(self, url, data, headers)
+        self.selector = None
+        self.unredirected_hdrs = {}
+        self.visit = visit
+
+        # All the terminology below comes from RFC 2965.
+        self.unverifiable = unverifiable
+        # Set request-host of origin transaction.
+        # The origin request-host is needed in order to decide whether
+        # unverifiable sub-requests (automatic redirects, images embedded
+        # in HTML, etc.) are to third-party hosts.  If they are, the
+        # resulting transactions might need to be conducted with cookies
+        # turned off.
+        if origin_req_host is None:
+            origin_req_host = request_host(self)
+        self.origin_req_host = origin_req_host
+
+    def get_selector(self):
+        return urllib.splittag(self.__r_host)[0]
+
+    def get_origin_req_host(self):
+        return self.origin_req_host
+
+    def is_unverifiable(self):
+        return self.unverifiable
+
+    def add_unredirected_header(self, key, val):
+        """Add a header that will not be added to a redirected request."""
+        self.unredirected_hdrs[key.capitalize()] = val
+
+    def has_header(self, header_name):
+        """True iff request has named header (regular or unredirected)."""
+        return (header_name in self.headers or
+                header_name in self.unredirected_hdrs)
+
+    def get_header(self, header_name, default=None):
+        return self.headers.get(
+            header_name,
+            self.unredirected_hdrs.get(header_name, default))
+
+    def header_items(self):
+        hdrs = self.unredirected_hdrs.copy()
+        hdrs.update(self.headers)
+        return hdrs.items()
+
+    def __str__(self):
+        return "<Request for %s>" % self.get_full_url()
+
+    def get_method(self):
+        if self.has_data():
+            return "POST"
+        else:
+            return "GET"
diff --git a/src/calibre/utils/mechanize/_response.py b/src/calibre/utils/mechanize/_response.py
new file mode 100644
index 0000000000..6f4b0e299b
--- /dev/null
+++ b/src/calibre/utils/mechanize/_response.py
@@ -0,0 +1,515 @@
+"""Response classes.
+
+The seek_wrapper code is not used if you're using UserAgent with
+.set_seekable_responses(False), or if you're using the urllib2-level interface
+without SeekableProcessor or HTTPEquivProcessor.  Class closeable_response is
+instantiated by some handlers (AbstractHTTPHandler), but the closeable_response
+interface is only depended upon by Browser-level code.  Function
+upgrade_response is only used if you're using Browser or
+ResponseUpgradeProcessor.
+
+
+Copyright 2006 John J. Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import copy, mimetools
+from cStringIO import StringIO
+import urllib2
+
+# XXX Andrew Dalke kindly sent me a similar class in response to my request on
+# comp.lang.python, which I then proceeded to lose.  I wrote this class
+# instead, but I think he's released his code publicly since, could pinch the
+# tests from it, at least...
+
+# For testing seek_wrapper invariant (note that
+# test_urllib2.HandlerTest.test_seekable is expected to fail when this
+# invariant checking is turned on).  The invariant checking is done by module
+# ipdc, which is available here:
+# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834
+## from ipdbc import ContractBase
+## class seek_wrapper(ContractBase):
+class seek_wrapper:
+    """Adds a seek method to a file object.
+
+    This is only designed for seeking on readonly file-like objects.
+
+    Wrapped file-like object must have a read method.  The readline method is
+    only supported if that method is present on the wrapped object.  The
+    readlines method is always supported.  xreadlines and iteration are
+    supported only for Python 2.2 and above.
+
+    Public attributes:
+
+    wrapped: the wrapped file object
+    is_closed: true iff .close() has been called
+
+    WARNING: All other attributes of the wrapped object (ie. those that are not
+    one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
+    are passed through unaltered, which may or may not make sense for your
+    particular file object.
+
+    """
+    # General strategy is to check that cache is full enough, then delegate to
+    # the cache (self.__cache, which is a cStringIO.StringIO instance).  A seek
+    # position (self.__pos) is maintained independently of the cache, in order
+    # that a single cache may be shared between multiple seek_wrapper objects.
+    # Copying using module copy shares the cache in this way.
+
+    def __init__(self, wrapped):
+        self.wrapped = wrapped
+        self.__read_complete_state = [False]
+        self.__is_closed_state = [False]
+        self.__have_readline = hasattr(self.wrapped, "readline")
+        self.__cache = StringIO()
+        self.__pos = 0  # seek position
+
+    def invariant(self):
+        # The end of the cache is always at the same place as the end of the
+        # wrapped file.
+        return self.wrapped.tell() == len(self.__cache.getvalue())
+
+    def close(self):
+        self.wrapped.close()
+        self.is_closed = True
+
+    def __getattr__(self, name):
+        if name == "is_closed":
+            return self.__is_closed_state[0]
+        elif name == "read_complete":
+            return self.__read_complete_state[0]
+
+        wrapped = self.__dict__.get("wrapped")
+        if wrapped:
+            return getattr(wrapped, name)
+
+        return getattr(self.__class__, name)
+
+    def __setattr__(self, name, value):
+        if name == "is_closed":
+            self.__is_closed_state[0] = bool(value)
+        elif name == "read_complete":
+            if not self.is_closed:
+                self.__read_complete_state[0] = bool(value)
+        else:
+            self.__dict__[name] = value
+
+    def seek(self, offset, whence=0):
+        assert whence in [0,1,2]
+
+        # how much data, if any, do we need to read?
+        if whence == 2:  # 2: relative to end of *wrapped* file
+            if offset < 0: raise ValueError("negative seek offset")
+            # since we don't know yet where the end of that file is, we must
+            # read everything
+            to_read = None
+        else:
+            if whence == 0:  # 0: absolute
+                if offset < 0: raise ValueError("negative seek offset")
+                dest = offset
+            else:  # 1: relative to current position
+                pos = self.__pos
+                if pos < offset:
+                    raise ValueError("seek to before start of file")
+                dest = pos + offset
+            end = len(self.__cache.getvalue())
+            to_read = dest - end
+            if to_read < 0:
+                to_read = 0
+
+        if to_read != 0:
+            self.__cache.seek(0, 2)
+            if to_read is None:
+                assert whence == 2
+                self.__cache.write(self.wrapped.read())
+                self.read_complete = True
+                self.__pos = self.__cache.tell() - offset
+            else:
+                data = self.wrapped.read(to_read)
+                if not data:
+                    self.read_complete = True
+                else:
+                    self.__cache.write(data)
+                # Don't raise an exception even if we've seek()ed past the end
+                # of .wrapped, since fseek() doesn't complain in that case.
+                # Also like fseek(), pretend we have seek()ed past the end,
+                # i.e. not:
+                #self.__pos = self.__cache.tell()
+                # but rather:
+                self.__pos = dest
+        else:
+            self.__pos = dest
+
+    def tell(self):
+        return self.__pos
+
+    def __copy__(self):
+        cpy = self.__class__(self.wrapped)
+        cpy.__cache = self.__cache
+        cpy.__read_complete_state = self.__read_complete_state
+        cpy.__is_closed_state = self.__is_closed_state
+        return cpy
+
+    def get_data(self):
+        pos = self.__pos
+        try:
+            self.seek(0)
+            return self.read(-1)
+        finally:
+            self.__pos = pos
+
+    def read(self, size=-1):
+        pos = self.__pos
+        end = len(self.__cache.getvalue())
+        available = end - pos
+
+        # enough data already cached?
+        if size <= available and size != -1:
+            self.__cache.seek(pos)
+            self.__pos = pos+size
+            return self.__cache.read(size)
+
+        # no, so read sufficient data from wrapped file and cache it
+        self.__cache.seek(0, 2)
+        if size == -1:
+            self.__cache.write(self.wrapped.read())
+            self.read_complete = True
+        else:
+            to_read = size - available
+            assert to_read > 0
+            data = self.wrapped.read(to_read)
+            if not data:
+                self.read_complete = True
+            else:
+                self.__cache.write(data)
+        self.__cache.seek(pos)
+
+        data = self.__cache.read(size)
+        self.__pos = self.__cache.tell()
+        assert self.__pos == pos + len(data)
+        return data
+
+    def readline(self, size=-1):
+        if not self.__have_readline:
+            raise NotImplementedError("no readline method on wrapped object")
+
+        # line we're about to read might not be complete in the cache, so
+        # read another line first
+        pos = self.__pos
+        self.__cache.seek(0, 2)
+        data = self.wrapped.readline()
+        if not data:
+            self.read_complete = True
+        else:
+            self.__cache.write(data)
+        self.__cache.seek(pos)
+
+        data = self.__cache.readline()
+        if size != -1:
+            r = data[:size]
+            self.__pos = pos+size
+        else:
+            r = data
+            self.__pos = pos+len(data)
+        return r
+
+    def readlines(self, sizehint=-1):
+        pos = self.__pos
+        self.__cache.seek(0, 2)
+        self.__cache.write(self.wrapped.read())
+        self.read_complete = True
+        self.__cache.seek(pos)
+        data = self.__cache.readlines(sizehint)
+        self.__pos = self.__cache.tell()
+        return data
+
+    def __iter__(self): return self
+    def next(self):
+        line = self.readline()
+        if line == "": raise StopIteration
+        return line
+
+    xreadlines = __iter__
+
+    def __repr__(self):
+        return ("<%s at %s whose wrapped object = %r>" %
+                (self.__class__.__name__, hex(abs(id(self))), self.wrapped))
+
+
+class response_seek_wrapper(seek_wrapper):
+
+    """
+    Supports copying response objects and setting response body data.
+
+    """
+
+    def __init__(self, wrapped):
+        seek_wrapper.__init__(self, wrapped)
+        self._headers = self.wrapped.info()
+
+    def __copy__(self):
+        cpy = seek_wrapper.__copy__(self)
+        # copy headers from delegate
+        cpy._headers = copy.copy(self.info())
+        return cpy
+
+    # Note that .info() and .geturl() (the only two urllib2 response methods
+    # that are not implemented by seek_wrapper) must be here explicitly rather
+    # than by seek_wrapper's __getattr__ delegation) so that the nasty
+    # dynamically-created HTTPError classes in get_seek_wrapper_class() get the
+    # wrapped object's implementation, and not HTTPError's.
+
+    def info(self):
+        return self._headers
+
+    def geturl(self):
+        return self.wrapped.geturl()
+
+    def set_data(self, data):
+        self.seek(0)
+        self.read()
+        self.close()
+        cache = self._seek_wrapper__cache = StringIO()
+        cache.write(data)
+        self.seek(0)
+
+
+class eoffile:
+    # file-like object that always claims to be at end-of-file...
+    def read(self, size=-1): return ""
+    def readline(self, size=-1): return ""
+    def __iter__(self): return self
+    def next(self): return ""
+    def close(self): pass
+
+class eofresponse(eoffile):
+    def __init__(self, url, headers, code, msg):
+        self._url = url
+        self._headers = headers
+        self.code = code
+        self.msg = msg
+    def geturl(self): return self._url
+    def info(self): return self._headers
+
+
+class closeable_response:
+    """Avoids unnecessarily clobbering urllib.addinfourl methods on .close().
+
+    Only supports responses returned by mechanize.HTTPHandler.
+
+    After .close(), the following methods are supported:
+
+    .read()
+    .readline()
+    .info()
+    .geturl()
+    .__iter__()
+    .next()
+    .close()
+
+    and the following attributes are supported:
+
+    .code
+    .msg
+
+    Also supports pickling (but the stdlib currently does something to prevent
+    it: http://python.org/sf/1144636).
+
+    """
+    # presence of this attr indicates is useable after .close()
+    closeable_response = None
+
+    def __init__(self, fp, headers, url, code, msg):
+        self._set_fp(fp)
+        self._headers = headers
+        self._url = url
+        self.code = code
+        self.msg = msg
+
+    def _set_fp(self, fp):
+        self.fp = fp
+        self.read = self.fp.read
+        self.readline = self.fp.readline
+        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
+        if hasattr(self.fp, "fileno"):
+            self.fileno = self.fp.fileno
+        else:
+            self.fileno = lambda: None
+        self.__iter__ = self.fp.__iter__
+        self.next = self.fp.next
+
+    def __repr__(self):
+        return '<%s at %s whose fp = %r>' % (
+            self.__class__.__name__, hex(abs(id(self))), self.fp)
+
+    def info(self):
+        return self._headers
+
+    def geturl(self):
+        return self._url
+
+    def close(self):
+        self.fp._close = True
+        wrapped = self.fp
+        wrapped.close()
+        new_wrapped = eofresponse(
+            self._url, self._headers, self.code, self.msg)
+        self._set_fp(new_wrapped)
+
+    def __getstate__(self):
+        # There are three obvious options here:
+        # 1. truncate
+        # 2. read to end
+        # 3. close socket, pickle state including read position, then open
+        #    again on unpickle and use Range header
+        # XXXX um, 4. refuse to pickle unless .close()d.  This is better,
+        #  actually ("errors should never pass silently").  Pickling doesn't
+        #  work anyway ATM, because of http://python.org/sf/1144636 so fix
+        #  this later
+
+        # 2 breaks pickle protocol, because one expects the original object
+        # to be left unscathed by pickling.  3 is too complicated and
+        # surprising (and too much work ;-) to happen in a sane __getstate__.
+        # So we do 1.
+
+        state = self.__dict__.copy()
+        new_wrapped = eofresponse(
+            self._url, self._headers, self.code, self.msg)
+        state["wrapped"] = new_wrapped
+        return state
+
+def test_response(data='test data', headers=[],
+                  url="http://example.com/", code=200, msg="OK"):
+    return make_response(data, headers, url, code, msg)
+
+def test_html_response(data='test data', headers=[],
+                       url="http://example.com/", code=200, msg="OK"):
+    headers += [("Content-type", "text/html")]
+    return make_response(data, headers, url, code, msg)
+
+def make_response(data, headers, url, code, msg):
+    """Convenient factory for objects implementing response interface.
+
+    data: string containing response body data
+    headers: sequence of (name, value) pairs
+    url: URL of response
+    code: integer response code (e.g. 200)
+    msg: string response code message (e.g. "OK")
+
+    """
+    mime_headers = make_headers(headers)
+    r = closeable_response(StringIO(data), mime_headers, url, code, msg)
+    return response_seek_wrapper(r)
+
+
+def make_headers(headers):
+    """
+    headers: sequence of (name, value) pairs
+    """
+    hdr_text = []
+    for name_value in headers:
+        hdr_text.append("%s: %s" % name_value)
+    return mimetools.Message(StringIO("\n".join(hdr_text)))
+
+
+# Rest of this module is especially horrible, but needed, at least until fork
+# urllib2.  Even then, may want to preseve urllib2 compatibility.
+
+def get_seek_wrapper_class(response):
+    # in order to wrap response objects that are also exceptions, we must
+    # dynamically subclass the exception :-(((
+    if (isinstance(response, urllib2.HTTPError) and
+        not hasattr(response, "seek")):
+        if response.__class__.__module__ == "__builtin__":
+            exc_class_name = response.__class__.__name__
+        else:
+            exc_class_name = "%s.%s" % (
+                response.__class__.__module__, response.__class__.__name__)
+
+        class httperror_seek_wrapper(response_seek_wrapper, response.__class__):
+            # this only derives from HTTPError in order to be a subclass --
+            # the HTTPError behaviour comes from delegation
+
+            _exc_class_name = exc_class_name
+
+            def __init__(self, wrapped):
+                response_seek_wrapper.__init__(self, wrapped)
+                # be compatible with undocumented HTTPError attributes :-(
+                self.hdrs = wrapped.info()
+                self.filename = wrapped.geturl()
+
+            def __repr__(self):
+                return (
+                    "<%s (%s instance) at %s "
+                    "whose wrapped object = %r>" % (
+                    self.__class__.__name__, self._exc_class_name,
+                    hex(abs(id(self))), self.wrapped)
+                    )
+        wrapper_class = httperror_seek_wrapper
+    else:
+        wrapper_class = response_seek_wrapper
+    return wrapper_class
+
+def seek_wrapped_response(response):
+    """Return a copy of response that supports seekable response interface.
+
+    Accepts responses from both mechanize and urllib2 handlers.
+
+    Copes with both oridinary response instances and HTTPError instances (which
+    can't be simply wrapped due to the requirement of preserving the exception
+    base class).
+    """
+    if not hasattr(response, "seek"):
+        wrapper_class = get_seek_wrapper_class(response)
+        response = wrapper_class(response)
+    assert hasattr(response, "get_data")
+    return response
+
+def upgrade_response(response):
+    """Return a copy of response that supports Browser response interface.
+
+    Browser response interface is that of "seekable responses"
+    (response_seek_wrapper), plus the requirement that responses must be
+    useable after .close() (closeable_response).
+
+    Accepts responses from both mechanize and urllib2 handlers.
+
+    Copes with both ordinary response instances and HTTPError instances (which
+    can't be simply wrapped due to the requirement of preserving the exception
+    base class).
+    """
+    wrapper_class = get_seek_wrapper_class(response)
+    if hasattr(response, "closeable_response"):
+        if not hasattr(response, "seek"):
+            response = wrapper_class(response)
+        assert hasattr(response, "get_data")
+        return copy.copy(response)
+
+    # a urllib2 handler constructed the response, i.e. the response is an
+    # urllib.addinfourl or a urllib2.HTTPError, instead of a
+    # _Util.closeable_response as returned by e.g. mechanize.HTTPHandler
+    try:
+        code = response.code
+    except AttributeError:
+        code = None
+    try:
+        msg = response.msg
+    except AttributeError:
+        msg = None
+
+    # may have already-.read() data from .seek() cache
+    data = None
+    get_data = getattr(response, "get_data", None)
+    if get_data:
+        data = get_data()
+
+    response = closeable_response(
+        response.fp, response.info(), response.geturl(), code, msg)
+    response = wrapper_class(response)
+    if data:
+        response.set_data(data)
+    return response
diff --git a/src/calibre/utils/mechanize/_rfc3986.py b/src/calibre/utils/mechanize/_rfc3986.py
new file mode 100644
index 0000000000..15a46a5882
--- /dev/null
+++ b/src/calibre/utils/mechanize/_rfc3986.py
@@ -0,0 +1,240 @@
+"""RFC 3986 URI parsing and relative reference resolution / absolutization.
+
+(aka splitting and joining)
+
+Copyright 2006 John J. Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+# XXX Wow, this is ugly.  Overly-direct translation of the RFC ATM.
+
+import sys, re, posixpath, urllib
+
+## def chr_range(a, b):
+##     return "".join(map(chr, range(ord(a), ord(b)+1)))
+
+## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+##                         "abcdefghijklmnopqrstuvwxyz"
+##                         "0123456789"
+##                         "-_.~")
+## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]"
+## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%'
+# this re matches any character that's not in URI_CHARS
+BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]")
+
+
+def clean_url(url, encoding):
+    # percent-encode illegal URI characters
+    # Trying to come up with test cases for this gave me a headache, revisit
+    # when do switch to unicode.
+    # Somebody else's comments (lost the attribution):
+##     - IE will return you the url in the encoding you send it
+##     - Mozilla/Firefox will send you latin-1 if there's no non latin-1
+##     characters in your link. It will send you utf-8 however if there are...
+    if type(url) == type(""):
+        url = url.decode(encoding, "replace")
+    url = url.strip()
+    # for second param to urllib.quote(), we want URI_CHARS, minus the
+    # 'always_safe' characters that urllib.quote() never percent-encodes
+    return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~")
+
+def is_clean_uri(uri):
+    """
+    >>> is_clean_uri("ABC!")
+    True
+    >>> is_clean_uri(u"ABC!")
+    True
+    >>> is_clean_uri("ABC|")
+    False
+    >>> is_clean_uri(u"ABC|")
+    False
+    >>> is_clean_uri("http://example.com/0")
+    True
+    >>> is_clean_uri(u"http://example.com/0")
+    True
+    """
+    # note module re treats bytestrings as through they were decoded as latin-1
+    # so this function accepts both unicode and bytestrings
+    return not bool(BAD_URI_CHARS_RE.search(uri))
+
+
+SPLIT_MATCH = re.compile(
+    r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?").match
+def urlsplit(absolute_uri):
+    """Return scheme, authority, path, query, fragment."""
+    match = SPLIT_MATCH(absolute_uri)
+    if match:
+        g = match.groups()
+        return g[1], g[3], g[4], g[6], g[8]
+
+def urlunsplit(parts):
+    scheme, authority, path, query, fragment = parts
+    r = []
+    append = r.append
+    if scheme is not None:
+        append(scheme)
+        append(":")
+    if authority is not None:
+        append("//")
+        append(authority)
+    append(path)
+    if query is not None:
+        append("?")
+        append(query)
+    if fragment is not None:
+        append("#")
+        append(fragment)
+    return "".join(r)
+
+def urljoin(base_uri, uri_reference):
+    return urlunsplit(urljoin_parts(urlsplit(base_uri),
+                                    urlsplit(uri_reference)))
+
+# oops, this doesn't do the same thing as the literal translation
+# from the RFC below
+## def urljoin_parts(base_parts, reference_parts):
+##     scheme, authority, path, query, fragment = base_parts
+##     rscheme, rauthority, rpath, rquery, rfragment = reference_parts
+
+##     # compute target URI path
+##     if rpath == "":
+##         tpath = path
+##     else:
+##         tpath = rpath
+##         if not tpath.startswith("/"):
+##             tpath = merge(authority, path, tpath)
+##         tpath = posixpath.normpath(tpath)
+
+##     if rscheme is not None:
+##         return (rscheme, rauthority, tpath, rquery, rfragment)
+##     elif rauthority is not None:
+##         return (scheme, rauthority, tpath, rquery, rfragment)
+##     elif rpath == "":
+##         if rquery is not None:
+##             tquery = rquery
+##         else:
+##             tquery = query
+##         return (scheme, authority, tpath, tquery, rfragment)
+##     else:
+##         return (scheme, authority, tpath, rquery, rfragment)
+
+def urljoin_parts(base_parts, reference_parts):
+    scheme, authority, path, query, fragment = base_parts
+    rscheme, rauthority, rpath, rquery, rfragment = reference_parts
+
+    if rscheme == scheme:
+        rscheme = None
+
+    if rscheme is not None:
+        tscheme, tauthority, tpath, tquery = (
+            rscheme, rauthority, remove_dot_segments(rpath), rquery)
+    else:
+        if rauthority is not None:
+            tauthority, tpath, tquery = (
+                rauthority, remove_dot_segments(rpath), rquery)
+        else:
+            if rpath == "":
+                tpath = path
+                if rquery is not None:
+                    tquery = rquery
+                else:
+                    tquery = query
+            else:
+                if rpath.startswith("/"):
+                    tpath = remove_dot_segments(rpath)
+                else:
+                    tpath = merge(authority, path, rpath)
+                    tpath = remove_dot_segments(tpath)
+                tquery = rquery
+            tauthority = authority
+        tscheme = scheme
+    tfragment = rfragment
+    return (tscheme, tauthority, tpath, tquery, tfragment)
+
+# um, something *vaguely* like this is what I want, but I have to generate
+# lots of test cases first, if only to understand what it is that
+# remove_dot_segments really does...
+## def remove_dot_segments(path):
+##     if path == '':
+##         return ''
+##     comps = path.split('/')
+##     new_comps = []
+##     for comp in comps:
+##         if comp in ['.', '']:
+##             if not new_comps or new_comps[-1]:
+##                 new_comps.append('')
+##             continue
+##         if comp != '..':
+##             new_comps.append(comp)
+##         elif new_comps:
+##             new_comps.pop()
+##     return '/'.join(new_comps)
+
+
+def remove_dot_segments(path):
+    r = []
+    while path:
+        # A
+        if path.startswith("../"):
+            path = path[3:]
+            continue
+        if path.startswith("./"):
+            path = path[2:]
+            continue
+        # B
+        if path.startswith("/./"):
+            path = path[2:]
+            continue
+        if path == "/.":
+            path = "/"
+            continue
+        # C
+        if path.startswith("/../"):
+            path = path[3:]
+            if r:
+                r.pop()
+            continue
+        if path == "/..":
+            path = "/"
+            if r:
+                r.pop()
+            continue
+        # D
+        if path == ".":
+            path = path[1:]
+            continue
+        if path == "..":
+            path = path[2:]
+            continue
+        # E
+        start = 0
+        if path.startswith("/"):
+            start = 1
+        ii = path.find("/", start)
+        if ii < 0:
+            ii = None
+        r.append(path[:ii])
+        if ii is None:
+            break
+        path = path[ii:]
+    return "".join(r)
+
+def merge(base_authority, base_path, ref_path):
+    # XXXX Oddly, the sample Perl implementation of this by Roy Fielding
+    # doesn't even take base_authority as a parameter, despite the wording in
+    # the RFC suggesting otherwise.  Perhaps I'm missing some obvious identity.
+    #if base_authority is not None and base_path == "":
+    if base_path == "":
+        return "/" + ref_path
+    ii = base_path.rfind("/")
+    if ii >= 0:
+        return base_path[:ii+1] + ref_path
+    return ref_path
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/src/calibre/utils/mechanize/_seek.py b/src/calibre/utils/mechanize/_seek.py
new file mode 100644
index 0000000000..4086d52792
--- /dev/null
+++ b/src/calibre/utils/mechanize/_seek.py
@@ -0,0 +1,16 @@
+from urllib2 import BaseHandler
+from _util import deprecation
+from _response import response_seek_wrapper
+
+
+class SeekableProcessor(BaseHandler):
+    """Deprecated: Make responses seekable."""
+
+    def __init__(self):
+        deprecation(
+            "See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable")
+
+    def any_response(self, request, response):
+        if not hasattr(response, "seek"):
+            return response_seek_wrapper(response)
+        return response
diff --git a/src/calibre/utils/mechanize/_upgrade.py b/src/calibre/utils/mechanize/_upgrade.py
new file mode 100644
index 0000000000..73a447ccbd
--- /dev/null
+++ b/src/calibre/utils/mechanize/_upgrade.py
@@ -0,0 +1,40 @@
+from urllib2 import BaseHandler
+
+from _request import Request
+from _response import upgrade_response
+from _util import deprecation
+
+
+class HTTPRequestUpgradeProcessor(BaseHandler):
+    # upgrade urllib2.Request to this module's Request
+    # yuck!
+    handler_order = 0  # before anything else
+
+    def http_request(self, request):
+        if not hasattr(request, "add_unredirected_header"):
+            newrequest = Request(request._Request__original, request.data,
+                                 request.headers)
+            try: newrequest.origin_req_host = request.origin_req_host
+            except AttributeError: pass
+            try: newrequest.unverifiable = request.unverifiable
+            except AttributeError: pass
+            try: newrequest.visit = request.visit
+            except AttributeError: pass
+            request = newrequest
+        return request
+
+    https_request = http_request
+
+
+class ResponseUpgradeProcessor(BaseHandler):
+    # upgrade responses to be .close()able without becoming unusable
+    handler_order = 0  # before anything else
+
+    def __init__(self):
+        deprecation(
+            "See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable")
+
+    def any_response(self, request, response):
+        if not hasattr(response, 'closeable_response'):
+            response = upgrade_response(response)
+        return response
diff --git a/src/calibre/utils/mechanize/_urllib2.py b/src/calibre/utils/mechanize/_urllib2.py
new file mode 100644
index 0000000000..d35df216c8
--- /dev/null
+++ b/src/calibre/utils/mechanize/_urllib2.py
@@ -0,0 +1,62 @@
+# urllib2 work-alike interface
+# ...from urllib2...
+from urllib2 import \
+     URLError, \
+     HTTPError, \
+     GopherError
+# ...and from mechanize
+from _opener import OpenerDirector, \
+     SeekableResponseOpener, \
+     build_opener, install_opener, urlopen
+from _auth import \
+     HTTPPasswordMgr, \
+     HTTPPasswordMgrWithDefaultRealm, \
+     AbstractBasicAuthHandler, \
+     AbstractDigestAuthHandler, \
+     HTTPProxyPasswordMgr, \
+     ProxyHandler, \
+     ProxyBasicAuthHandler, \
+     ProxyDigestAuthHandler, \
+     HTTPBasicAuthHandler, \
+     HTTPDigestAuthHandler, \
+     HTTPSClientCertMgr
+from _request import \
+     Request
+from _http import \
+     RobotExclusionError
+
+# handlers...
+# ...from urllib2...
+from urllib2 import \
+     BaseHandler, \
+     UnknownHandler, \
+     FTPHandler, \
+     CacheFTPHandler, \
+     FileHandler, \
+     GopherHandler
+# ...and from mechanize
+from _http import \
+     HTTPHandler, \
+     HTTPDefaultErrorHandler, \
+     HTTPRedirectHandler, \
+     HTTPEquivProcessor, \
+     HTTPCookieProcessor, \
+     HTTPRefererProcessor, \
+     HTTPRefreshProcessor, \
+     HTTPErrorProcessor, \
+     HTTPRobotRulesProcessor
+from _upgrade import \
+     HTTPRequestUpgradeProcessor, \
+     ResponseUpgradeProcessor
+from _debug import \
+     HTTPResponseDebugProcessor, \
+     HTTPRedirectDebugProcessor
+from _seek import \
+     SeekableProcessor
+# crap ATM
+## from _gzip import \
+##      HTTPGzipProcessor
+import httplib
+if hasattr(httplib, 'HTTPS'):
+    from _http import HTTPSHandler
+del httplib
diff --git a/src/calibre/utils/mechanize/_useragent.py b/src/calibre/utils/mechanize/_useragent.py
new file mode 100644
index 0000000000..a6d5769dff
--- /dev/null
+++ b/src/calibre/utils/mechanize/_useragent.py
@@ -0,0 +1,348 @@
+"""Convenient HTTP UserAgent class.
+
+This is a subclass of urllib2.OpenerDirector.
+
+
+Copyright 2003-2006 John J. Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import sys, warnings, urllib2
+
+import _opener
+import _urllib2
+import _auth
+import _gzip
+import _response
+
+
+class UserAgentBase(_opener.OpenerDirector):
+    """Convenient user-agent class.
+
+    Do not use .add_handler() to add a handler for something already dealt with
+    by this code.
+
+    The only reason at present for the distinction between UserAgent and
+    UserAgentBase is so that classes that depend on .seek()able responses
+    (e.g. mechanize.Browser) can inherit from UserAgentBase.  The subclass
+    UserAgent exposes a .set_seekable_responses() method that allows switching
+    off the adding of a .seek() method to responses.
+
+    Public attributes:
+
+    addheaders: list of (name, value) pairs specifying headers to send with
+     every request, unless they are overridden in the Request instance.
+
+     >>> ua = UserAgentBase()
+     >>> ua.addheaders = [
+     ...  ("User-agent", "Mozilla/5.0 (compatible)"),
+     ...  ("From", "responsible.person@example.com")]
+
+    """
+
+    handler_classes = {
+        # scheme handlers
+        "http": _urllib2.HTTPHandler,
+        # CacheFTPHandler is buggy, at least in 2.3, so we don't use it
+        "ftp": _urllib2.FTPHandler,
+        "file": _urllib2.FileHandler,
+        "gopher": _urllib2.GopherHandler,
+
+        # other handlers
+        "_unknown": _urllib2.UnknownHandler,
+        # HTTP{S,}Handler depend on HTTPErrorProcessor too
+        "_http_error": _urllib2.HTTPErrorProcessor,
+        "_http_request_upgrade": _urllib2.HTTPRequestUpgradeProcessor,
+        "_http_default_error": _urllib2.HTTPDefaultErrorHandler,
+
+        # feature handlers
+        "_basicauth": _urllib2.HTTPBasicAuthHandler,
+        "_digestauth": _urllib2.HTTPDigestAuthHandler,
+        "_redirect": _urllib2.HTTPRedirectHandler,
+        "_cookies": _urllib2.HTTPCookieProcessor,
+        "_refresh": _urllib2.HTTPRefreshProcessor,
+        "_equiv": _urllib2.HTTPEquivProcessor,
+        "_proxy": _urllib2.ProxyHandler,
+        "_proxy_basicauth": _urllib2.ProxyBasicAuthHandler,
+        "_proxy_digestauth": _urllib2.ProxyDigestAuthHandler,
+        "_robots": _urllib2.HTTPRobotRulesProcessor,
+        "_gzip": _gzip.HTTPGzipProcessor,  # experimental!
+
+        # debug handlers
+        "_debug_redirect": _urllib2.HTTPRedirectDebugProcessor,
+        "_debug_response_body": _urllib2.HTTPResponseDebugProcessor,
+        }
+
+    default_schemes = ["http", "ftp", "file", "gopher"]
+    default_others = ["_unknown", "_http_error", "_http_request_upgrade",
+                      "_http_default_error",
+                      ]
+    default_features = ["_redirect", "_cookies",
+                        "_refresh", "_equiv",
+                        "_basicauth", "_digestauth",
+                        "_proxy", "_proxy_basicauth", "_proxy_digestauth",
+                        "_robots",
+                        ]
+    if hasattr(_urllib2, 'HTTPSHandler'):
+        handler_classes["https"] = _urllib2.HTTPSHandler
+        default_schemes.append("https")
+
+    def __init__(self):
+        _opener.OpenerDirector.__init__(self)
+
+        ua_handlers = self._ua_handlers = {}
+        for scheme in (self.default_schemes+
+                       self.default_others+
+                       self.default_features):
+            klass = self.handler_classes[scheme]
+            ua_handlers[scheme] = klass()
+        for handler in ua_handlers.itervalues():
+            self.add_handler(handler)
+
+        # Yuck.
+        # Ensure correct default constructor args were passed to
+        # HTTPRefreshProcessor and HTTPEquivProcessor.
+        if "_refresh" in ua_handlers:
+            self.set_handle_refresh(True)
+        if "_equiv" in ua_handlers:
+            self.set_handle_equiv(True)
+        # Ensure default password managers are installed.
+        pm = ppm = None
+        if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers:
+            pm = _urllib2.HTTPPasswordMgrWithDefaultRealm()
+        if ("_proxy_basicauth" in ua_handlers or
+            "_proxy_digestauth" in ua_handlers):
+            ppm = _auth.HTTPProxyPasswordMgr()
+        self.set_password_manager(pm)
+        self.set_proxy_password_manager(ppm)
+        # set default certificate manager
+        if "https" in ua_handlers:
+            cm = _urllib2.HTTPSClientCertMgr()
+            self.set_client_cert_manager(cm)
+
+    def close(self):
+        _opener.OpenerDirector.close(self)
+        self._ua_handlers = None
+
+    # XXX
+##     def set_timeout(self, timeout):
+##         self._timeout = timeout
+##     def set_http_connection_cache(self, conn_cache):
+##         self._http_conn_cache = conn_cache
+##     def set_ftp_connection_cache(self, conn_cache):
+##         # XXX ATM, FTP has cache as part of handler; should it be separate?
+##         self._ftp_conn_cache = conn_cache
+
+    def set_handled_schemes(self, schemes):
+        """Set sequence of URL scheme (protocol) strings.
+
+        For example: ua.set_handled_schemes(["http", "ftp"])
+
+        If this fails (with ValueError) because you've passed an unknown
+        scheme, the set of handled schemes will not be changed.
+
+        """
+        want = {}
+        for scheme in schemes:
+            if scheme.startswith("_"):
+                raise ValueError("not a scheme '%s'" % scheme)
+            if scheme not in self.handler_classes:
+                raise ValueError("unknown scheme '%s'")
+            want[scheme] = None
+
+        # get rid of scheme handlers we don't want
+        for scheme, oldhandler in self._ua_handlers.items():
+            if scheme.startswith("_"): continue  # not a scheme handler
+            if scheme not in want:
+                self._replace_handler(scheme, None)
+            else:
+                del want[scheme]  # already got it
+        # add the scheme handlers that are missing
+        for scheme in want.keys():
+            self._set_handler(scheme, True)
+
+    def set_cookiejar(self, cookiejar):
+        """Set a mechanize.CookieJar, or None."""
+        self._set_handler("_cookies", obj=cookiejar)
+
+    # XXX could use Greg Stein's httpx for some of this instead?
+    # or httplib2??
+    def set_proxies(self, proxies):
+        """Set a dictionary mapping URL scheme to proxy specification, or None.
+
+        e.g. {"http": "joe:password@myproxy.example.com:3128",
+              "ftp": "proxy.example.com"}
+
+        """
+        self._set_handler("_proxy", obj=proxies)
+
+    def add_password(self, url, user, password, realm=None):
+        self._password_manager.add_password(realm, url, user, password)
+    def add_proxy_password(self, user, password, hostport=None, realm=None):
+        self._proxy_password_manager.add_password(
+            realm, hostport, user, password)
+
+    def add_client_certificate(self, url, key_file, cert_file):
+        """Add an SSL client certificate, for HTTPS client auth.
+
+        key_file and cert_file must be filenames of the key and certificate
+        files, in PEM format.  You can use e.g. OpenSSL to convert a p12 (PKCS
+        12) file to PEM format:
+
+        openssl pkcs12 -clcerts -nokeys -in cert.p12 -out cert.pem
+        openssl pkcs12 -nocerts -in cert.p12 -out key.pem
+
+
+        Note that client certificate password input is very inflexible ATM.  At
+        the moment this seems to be console only, which is presumably the
+        default behaviour of libopenssl.  In future mechanize may support
+        third-party libraries that (I assume) allow more options here.
+
+        """
+        self._client_cert_manager.add_key_cert(url, key_file, cert_file)
+
+    # the following are rarely useful -- use add_password / add_proxy_password
+    # instead
+    def set_password_manager(self, password_manager):
+        """Set a mechanize.HTTPPasswordMgrWithDefaultRealm, or None."""
+        self._password_manager = password_manager
+        self._set_handler("_basicauth", obj=password_manager)
+        self._set_handler("_digestauth", obj=password_manager)
+    def set_proxy_password_manager(self, password_manager):
+        """Set a mechanize.HTTPProxyPasswordMgr, or None."""
+        self._proxy_password_manager = password_manager
+        self._set_handler("_proxy_basicauth", obj=password_manager)
+        self._set_handler("_proxy_digestauth", obj=password_manager)
+    def set_client_cert_manager(self, cert_manager):
+        """Set a mechanize.HTTPClientCertMgr, or None."""
+        self._client_cert_manager = cert_manager
+        handler = self._ua_handlers["https"]
+        handler.client_cert_manager = cert_manager
+
+    # these methods all take a boolean parameter
+    def set_handle_robots(self, handle):
+        """Set whether to observe rules from robots.txt."""
+        self._set_handler("_robots", handle)
+    def set_handle_redirect(self, handle):
+        """Set whether to handle HTTP 30x redirections."""
+        self._set_handler("_redirect", handle)
+    def set_handle_refresh(self, handle, max_time=None, honor_time=True):
+        """Set whether to handle HTTP Refresh headers."""
+        self._set_handler("_refresh", handle, constructor_kwds=
+                          {"max_time": max_time, "honor_time": honor_time})
+    def set_handle_equiv(self, handle, head_parser_class=None):
+        """Set whether to treat HTML http-equiv headers like HTTP headers.
+
+        Response objects may be .seek()able if this is set (currently returned
+        responses are, raised HTTPError exception responses are not).
+
+        """
+        if head_parser_class is not None:
+            constructor_kwds = {"head_parser_class": head_parser_class}
+        else:
+            constructor_kwds={}
+        self._set_handler("_equiv", handle, constructor_kwds=constructor_kwds)
+    def set_handle_gzip(self, handle):
+        """Handle gzip transfer encoding.
+
+        """
+        if handle:
+            warnings.warn(
+                "gzip transfer encoding is experimental!", stacklevel=2)
+        self._set_handler("_gzip", handle)
+    def set_debug_redirects(self, handle):
+        """Log information about HTTP redirects (including refreshes).
+
+        Logging is performed using module logging.  The logger name is
+        "mechanize.http_redirects".  To actually print some debug output,
+        eg:
+
+        import sys, logging
+        logger = logging.getLogger("mechanize.http_redirects")
+        logger.addHandler(logging.StreamHandler(sys.stdout))
+        logger.setLevel(logging.INFO)
+
+        Other logger names relevant to this module:
+
+        "mechanize.http_responses"
+        "mechanize.cookies" (or "cookielib" if running Python 2.4)
+
+        To turn on everything:
+
+        import sys, logging
+        logger = logging.getLogger("mechanize")
+        logger.addHandler(logging.StreamHandler(sys.stdout))
+        logger.setLevel(logging.INFO)
+
+        """
+        self._set_handler("_debug_redirect", handle)
+    def set_debug_responses(self, handle):
+        """Log HTTP response bodies.
+
+        See docstring for .set_debug_redirects() for details of logging.
+
+        Response objects may be .seek()able if this is set (currently returned
+        responses are, raised HTTPError exception responses are not).
+
+        """
+        self._set_handler("_debug_response_body", handle)
+    def set_debug_http(self, handle):
+        """Print HTTP headers to sys.stdout."""
+        level = int(bool(handle))
+        for scheme in "http", "https":
+            h = self._ua_handlers.get(scheme)
+            if h is not None:
+                h.set_http_debuglevel(level)
+
+    def _set_handler(self, name, handle=None, obj=None,
+                     constructor_args=(), constructor_kwds={}):
+        if handle is None:
+            handle = obj is not None
+        if handle:
+            handler_class = self.handler_classes[name]
+            if obj is not None:
+                newhandler = handler_class(obj)
+            else:
+                newhandler = handler_class(*constructor_args, **constructor_kwds)
+        else:
+            newhandler = None
+        self._replace_handler(name, newhandler)
+
+    def _replace_handler(self, name, newhandler=None):
+        # first, if handler was previously added, remove it
+        if name is not None:
+            handler = self._ua_handlers.get(name)
+            if handler:
+                try:
+                    self.handlers.remove(handler)
+                except ValueError:
+                    pass
+        # then add the replacement, if any
+        if newhandler is not None:
+            self.add_handler(newhandler)
+            self._ua_handlers[name] = newhandler
+
+
+class UserAgent(UserAgentBase):
+
+    def __init__(self):
+        UserAgentBase.__init__(self)
+        self._seekable = False
+
+    def set_seekable_responses(self, handle):
+        """Make response objects .seek()able."""
+        self._seekable = bool(handle)
+
+    def open(self, fullurl, data=None):
+        if self._seekable:
+            def bound_open(fullurl, data=None):
+                return UserAgentBase.open(self, fullurl, data)
+            response = _opener.wrapped_open(
+                bound_open, _response.seek_wrapped_response, fullurl, data)
+        else:
+            response = UserAgentBase.open(self, fullurl, data)
+        return response
diff --git a/src/calibre/utils/mechanize/_util.py b/src/calibre/utils/mechanize/_util.py
new file mode 100644
index 0000000000..e2c9d3aabf
--- /dev/null
+++ b/src/calibre/utils/mechanize/_util.py
@@ -0,0 +1,279 @@
+"""Utility functions and date/time routines.
+
+ Copyright 2002-2006 John J Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import re, string, time, warnings
+
+def deprecation(message):
+    warnings.warn(message, DeprecationWarning, stacklevel=3)
+def hide_deprecations():
+    warnings.filterwarnings('ignore', category=DeprecationWarning)
+def reset_deprecations():
+    warnings.filterwarnings('default', category=DeprecationWarning)
+
+
+def isstringlike(x):
+    try: x+""
+    except: return False
+    else: return True
+
+## def caller():
+##     try:
+##         raise SyntaxError
+##     except:
+##         import sys
+##     return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name
+
+
+from calendar import timegm
+
+# Date/time conversion routines for formats used by the HTTP protocol.
+
+EPOCH = 1970
+def my_timegm(tt):
+    year, month, mday, hour, min, sec = tt[:6]
+    if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
+        (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
+        return timegm(tt)
+    else:
+        return None
+
+days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
+months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+          "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+months_lower = []
+for month in months: months_lower.append(month.lower())
+
+
+def time2isoz(t=None):
+    """Return a string representing time in seconds since epoch, t.
+
+    If the function is called without an argument, it will use the current
+    time.
+
+    The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
+    representing Universal Time (UTC, aka GMT).  An example of this format is:
+
+    1994-11-24 08:49:37Z
+
+    """
+    if t is None: t = time.time()
+    year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
+    return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
+        year, mon, mday, hour, min, sec)
+
+def time2netscape(t=None):
+    """Return a string representing time in seconds since epoch, t.
+
+    If the function is called without an argument, it will use the current
+    time.
+
+    The format of the returned string is like this:
+
+    Wed, DD-Mon-YYYY HH:MM:SS GMT
+
+    """
+    if t is None: t = time.time()
+    year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
+    return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
+        days[wday], mday, months[mon-1], year, hour, min, sec)
+
+
+UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
+
+timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
+def offset_from_tz_string(tz):
+    offset = None
+    if UTC_ZONES.has_key(tz):
+        offset = 0
+    else:
+        m = timezone_re.search(tz)
+        if m:
+            offset = 3600 * int(m.group(2))
+            if m.group(3):
+                offset = offset + 60 * int(m.group(3))
+            if m.group(1) == '-':
+                offset = -offset
+    return offset
+
+def _str2time(day, mon, yr, hr, min, sec, tz):
+    # translate month name to number
+    # month numbers start with 1 (January)
+    try:
+        mon = months_lower.index(mon.lower())+1
+    except ValueError:
+        # maybe it's already a number
+        try:
+            imon = int(mon)
+        except ValueError:
+            return None
+        if 1 <= imon <= 12:
+            mon = imon
+        else:
+            return None
+
+    # make sure clock elements are defined
+    if hr is None: hr = 0
+    if min is None: min = 0
+    if sec is None: sec = 0
+
+    yr = int(yr)
+    day = int(day)
+    hr = int(hr)
+    min = int(min)
+    sec = int(sec)
+
+    if yr < 1000:
+        # find "obvious" year
+        cur_yr = time.localtime(time.time())[0]
+        m = cur_yr % 100
+        tmp = yr
+        yr = yr + cur_yr - m
+        m = m - tmp
+        if abs(m) > 50:
+            if m > 0: yr = yr + 100
+            else: yr = yr - 100
+
+    # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
+    t = my_timegm((yr, mon, day, hr, min, sec, tz))
+
+    if t is not None:
+        # adjust time using timezone string, to get absolute time since epoch
+        if tz is None:
+            tz = "UTC"
+        tz = tz.upper()
+        offset = offset_from_tz_string(tz)
+        if offset is None:
+            return None
+        t = t - offset
+
+    return t
+
+
+strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
+wkday_re = re.compile(
+    r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
+loose_http_re = re.compile(
+    r"""^
+    (\d\d?)            # day
+       (?:\s+|[-\/])
+    (\w+)              # month
+        (?:\s+|[-\/])
+    (\d+)              # year
+    (?:
+          (?:\s+|:)    # separator before clock
+       (\d\d?):(\d\d)  # hour:min
+       (?::(\d\d))?    # optional seconds
+    )?                 # optional clock
+       \s*
+    ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
+       \s*
+    (?:\(\w+\))?       # ASCII representation of timezone in parens.
+       \s*$""", re.X)
+def http2time(text):
+    """Returns time in seconds since epoch of time represented by a string.
+
+    Return value is an integer.
+
+    None is returned if the format of str is unrecognized, the time is outside
+    the representable range, or the timezone string is not recognized.  If the
+    string contains no timezone, UTC is assumed.
+
+    The timezone in the string may be numerical (like "-0800" or "+0100") or a
+    string timezone (like "UTC", "GMT", "BST" or "EST").  Currently, only the
+    timezone strings equivalent to UTC (zero offset) are known to the function.
+
+    The function loosely parses the following formats:
+
+    Wed, 09 Feb 1994 22:23:32 GMT       -- HTTP format
+    Tuesday, 08-Feb-94 14:15:29 GMT     -- old rfc850 HTTP format
+    Tuesday, 08-Feb-1994 14:15:29 GMT   -- broken rfc850 HTTP format
+    09 Feb 1994 22:23:32 GMT            -- HTTP format (no weekday)
+    08-Feb-94 14:15:29 GMT              -- rfc850 format (no weekday)
+    08-Feb-1994 14:15:29 GMT            -- broken rfc850 format (no weekday)
+
+    The parser ignores leading and trailing whitespace.  The time may be
+    absent.
+
+    If the year is given with only 2 digits, the function will select the
+    century that makes the year closest to the current date.
+
+    """
+    # fast exit for strictly conforming string
+    m = strict_re.search(text)
+    if m:
+        g = m.groups()
+        mon = months_lower.index(g[1].lower()) + 1
+        tt = (int(g[2]), mon, int(g[0]),
+              int(g[3]), int(g[4]), float(g[5]))
+        return my_timegm(tt)
+
+    # No, we need some messy parsing...
+
+    # clean up
+    text = text.lstrip()
+    text = wkday_re.sub("", text, 1)  # Useless weekday
+
+    # tz is time zone specifier string
+    day, mon, yr, hr, min, sec, tz = [None]*7
+
+    # loose regexp parse
+    m = loose_http_re.search(text)
+    if m is not None:
+        day, mon, yr, hr, min, sec, tz = m.groups()
+    else:
+        return None  # bad format
+
+    return _str2time(day, mon, yr, hr, min, sec, tz)
+
+
+iso_re = re.compile(
+    """^
+    (\d{4})              # year
+       [-\/]?
+    (\d\d?)              # numerical month
+       [-\/]?
+    (\d\d?)              # day
+   (?:
+         (?:\s+|[-:Tt])  # separator before clock
+      (\d\d?):?(\d\d)    # hour:min
+      (?::?(\d\d(?:\.\d*)?))?  # optional seconds (and fractional)
+   )?                    # optional clock
+      \s*
+   ([-+]?\d\d?:?(:?\d\d)?
+    |Z|z)?               # timezone  (Z is "zero meridian", i.e. GMT)
+      \s*$""", re.X)
+def iso2time(text):
+    """
+    As for http2time, but parses the ISO 8601 formats:
+
+    1994-02-03 14:15:29 -0100    -- ISO 8601 format
+    1994-02-03 14:15:29          -- zone is optional
+    1994-02-03                   -- only date
+    1994-02-03T14:15:29          -- Use T as separator
+    19940203T141529Z             -- ISO 8601 compact format
+    19940203                     -- only date
+
+    """
+    # clean up
+    text = text.lstrip()
+
+    # tz is time zone specifier string
+    day, mon, yr, hr, min, sec, tz = [None]*7
+
+    # loose regexp parse
+    m = iso_re.search(text)
+    if m is not None:
+        # XXX there's an extra bit of the timezone I'm ignoring here: is
+        #   this the right thing to do?
+        yr, mon, day, hr, min, sec, tz, _ = m.groups()
+    else:
+        return None  # bad format
+
+    return _str2time(day, mon, yr, hr, min, sec, tz)