Py3 compat for https downloader

This commit is contained in:
Kovid Goyal 2014-02-23 18:24:31 +05:30
parent 1e64794997
commit ec2567b580

View File

@ -6,11 +6,11 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import ssl, socket, re, httplib import ssl, socket, re
from urlparse import urlsplit
from contextlib import closing from contextlib import closing
from calibre import get_proxies from calibre import get_proxies
from calibre.constants import ispy3
class HTTPError(ValueError): class HTTPError(ValueError):
@ -21,119 +21,134 @@ class HTTPError(ValueError):
self.code = code self.code = code
self.url = url self.url = url
# Check certificate hostname {{{ if ispy3:
# Implementation taken from python 3 from urllib.parse import urlparse
class CertificateError(ValueError): import http.client as httplib
pass class HTTPSConnection(httplib.HTTPSConnection):
def _dnsname_match(dn, hostname, max_wildcards=1): def __init__(self, ssl_version, *args, **kwargs):
"""Matching according to RFC 6125, section 6.4.3 context = kwargs['context'] = ssl.SSLContext(ssl_version)
cf = kwargs.pop('cert_file')
context.load_verify_locations(cf)
context.verify_mode = ssl.CERT_REQUIRED
httplib.HTTPSConnection.__init__(self, *args, **kwargs)
else:
import httplib
from urlparse import urlsplit as urlparse
http://tools.ietf.org/html/rfc6125#section-6.4.3 # Check certificate hostname {{{
""" # Implementation taken from python 3
pats = [] class CertificateError(ValueError):
if not dn: pass
return False
parts = dn.split(r'.') def _dnsname_match(dn, hostname, max_wildcards=1):
leftmost, remainder = parts[0], parts[1:] """Matching according to RFC 6125, section 6.4.3
wildcards = leftmost.count('*') http://tools.ietf.org/html/rfc6125#section-6.4.3
if wildcards > max_wildcards: """
# Issue #17980: avoid denials of service by refusing more pats = []
# than one wildcard per fragment. A survery of established if not dn:
# policy among SSL implementations showed it to be a return False
# reasonable choice.
raise CertificateError(
"too many wildcards in certificate DNS name: " + repr(dn))
# speed up common case w/o wildcards parts = dn.split(r'.')
if not wildcards: leftmost, remainder = parts[0], parts[1:]
return dn.lower() == hostname.lower()
# RFC 6125, section 6.4.3, subitem 1. wildcards = leftmost.count('*')
# The client SHOULD NOT attempt to match a presented identifier in which if wildcards > max_wildcards:
# the wildcard character comprises a label other than the left-most label. # Issue #17980: avoid denials of service by refusing more
if leftmost == '*': # than one wildcard per fragment. A survery of established
# When '*' is a fragment by itself, it matches a non-empty dotless # policy among SSL implementations showed it to be a
# fragment. # reasonable choice.
pats.append('[^.]+') raise CertificateError(
elif leftmost.startswith('xn--') or hostname.startswith('xn--'): "too many wildcards in certificate DNS name: " + repr(dn))
# RFC 6125, section 6.4.3, subitem 3.
# The client SHOULD NOT attempt to match a presented identifier
# where the wildcard character is embedded within an A-label or
# U-label of an internationalized domain name.
pats.append(re.escape(leftmost))
else:
# Otherwise, '*' matches any dotless string, e.g. www*
pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
# add the remaining fragments, ignore any wildcards # speed up common case w/o wildcards
for frag in remainder: if not wildcards:
pats.append(re.escape(frag)) return dn.lower() == hostname.lower()
pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) # RFC 6125, section 6.4.3, subitem 1.
return pat.match(hostname) # The client SHOULD NOT attempt to match a presented identifier in which
# the wildcard character comprises a label other than the left-most label.
if leftmost == '*':
# When '*' is a fragment by itself, it matches a non-empty dotless
# fragment.
pats.append('[^.]+')
elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
# RFC 6125, section 6.4.3, subitem 3.
# The client SHOULD NOT attempt to match a presented identifier
# where the wildcard character is embedded within an A-label or
# U-label of an internationalized domain name.
pats.append(re.escape(leftmost))
else:
# Otherwise, '*' matches any dotless string, e.g. www*
pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
def match_hostname(cert, hostname): # add the remaining fragments, ignore any wildcards
"""Verify that *cert* (in decoded format as returned by for frag in remainder:
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 pats.append(re.escape(frag))
rules are followed, but IP addresses are not accepted for *hostname*.
CertificateError is raised on failure. On success, the function pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
returns nothing. return pat.match(hostname)
"""
if not cert:
raise ValueError("empty or no certificate")
dnsnames = []
san = cert.get('subjectAltName', ())
for key, value in san:
if key == 'DNS':
if _dnsname_match(value, hostname):
return
dnsnames.append(value)
if not dnsnames:
# The subject is only checked when there is no dNSName entry
# in subjectAltName
for sub in cert.get('subject', ()):
for key, value in sub:
# XXX according to RFC 2818, the most specific Common Name
# must be used.
if key == 'commonName':
if _dnsname_match(value, hostname):
return
dnsnames.append(value)
if len(dnsnames) > 1:
raise CertificateError("hostname %r "
"doesn't match either of %s"
% (hostname, ', '.join(map(repr, dnsnames))))
elif len(dnsnames) == 1:
raise CertificateError("hostname %r "
"doesn't match %r"
% (hostname, dnsnames[0]))
else:
raise CertificateError("no appropriate commonName or "
"subjectAltName fields were found")
# }}}
class HTTPSConnection(httplib.HTTPSConnection): def match_hostname(cert, hostname):
"""Verify that *cert* (in decoded format as returned by
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
rules are followed, but IP addresses are not accepted for *hostname*.
def __init__(self, ssl_version, *args, **kwargs): CertificateError is raised on failure. On success, the function
httplib.HTTPSConnection.__init__(self, *args, **kwargs) returns nothing.
self.calibre_ssl_version = ssl_version """
if not cert:
raise ValueError("empty or no certificate")
dnsnames = []
san = cert.get('subjectAltName', ())
for key, value in san:
if key == 'DNS':
if _dnsname_match(value, hostname):
return
dnsnames.append(value)
if not dnsnames:
# The subject is only checked when there is no dNSName entry
# in subjectAltName
for sub in cert.get('subject', ()):
for key, value in sub:
# XXX according to RFC 2818, the most specific Common Name
# must be used.
if key == 'commonName':
if _dnsname_match(value, hostname):
return
dnsnames.append(value)
if len(dnsnames) > 1:
raise CertificateError("hostname %r "
"doesn't match either of %s"
% (hostname, ', '.join(map(repr, dnsnames))))
elif len(dnsnames) == 1:
raise CertificateError("hostname %r "
"doesn't match %r"
% (hostname, dnsnames[0]))
else:
raise CertificateError("no appropriate commonName or "
"subjectAltName fields were found")
# }}}
def connect(self): class HTTPSConnection(httplib.HTTPSConnection):
"""Connect to a host on a given (SSL) port, properly verifying the SSL
certificate, both that it is valid and that its declared hostnames
match the hostname we are connecting to."""
sock = socket.create_connection((self.host, self.port), def __init__(self, ssl_version, *args, **kwargs):
self.timeout, self.source_address) httplib.HTTPSConnection.__init__(self, *args, **kwargs)
if self._tunnel_host: self.calibre_ssl_version = ssl_version
self.sock = sock
self._tunnel() def connect(self):
self.sock = ssl.wrap_socket(sock, cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.cert_file, ssl_version=self.calibre_ssl_version) """Connect to a host on a given (SSL) port, properly verifying the SSL
getattr(ssl, 'match_hostname', match_hostname)(self.sock.getpeercert(), self.host) certificate, both that it is valid and that its declared hostnames
match the hostname we are connecting to."""
sock = socket.create_connection((self.host, self.port),
self.timeout, self.source_address)
if self._tunnel_host:
self.sock = sock
self._tunnel()
self.sock = ssl.wrap_socket(sock, cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.cert_file, ssl_version=self.calibre_ssl_version)
getattr(ssl, 'match_hostname', match_hostname)(self.sock.getpeercert(), self.host)
def get_https_resource_securely( def get_https_resource_securely(
url, cacerts='calibre-ebook-root-CA.crt', timeout=60, max_redirects=5, ssl_version=None): url, cacerts='calibre-ebook-root-CA.crt', timeout=60, max_redirects=5, ssl_version=None):
@ -146,7 +161,7 @@ def get_https_resource_securely(
if ssl_version is None: if ssl_version is None:
ssl_version = ssl.PROTOCOL_TLSv1 ssl_version = ssl.PROTOCOL_TLSv1
cacerts = P(cacerts, allow_user_override=False) cacerts = P(cacerts, allow_user_override=False)
p = urlsplit(url) p = urlparse(url)
if p.scheme != 'https': if p.scheme != 'https':
raise ValueError('URL scheme must be https, not %s' % p.scheme) raise ValueError('URL scheme must be https, not %s' % p.scheme)