From ec2567b5803d14689e05f664bdd885e0d07c6a93 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 23 Feb 2014 18:24:31 +0530 Subject: [PATCH] Py3 compat for https downloader --- src/calibre/utils/https.py | 219 ++++++++++++++++++++----------------- 1 file changed, 117 insertions(+), 102 deletions(-) diff --git a/src/calibre/utils/https.py b/src/calibre/utils/https.py index 9da5877b92..6b905c05f3 100644 --- a/src/calibre/utils/https.py +++ b/src/calibre/utils/https.py @@ -6,11 +6,11 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2014, Kovid Goyal ' -import ssl, socket, re, httplib -from urlparse import urlsplit +import ssl, socket, re from contextlib import closing from calibre import get_proxies +from calibre.constants import ispy3 class HTTPError(ValueError): @@ -21,119 +21,134 @@ class HTTPError(ValueError): self.code = code self.url = url -# Check certificate hostname {{{ -# Implementation taken from python 3 -class CertificateError(ValueError): - pass +if ispy3: + from urllib.parse import urlparse + import http.client as httplib + class HTTPSConnection(httplib.HTTPSConnection): -def _dnsname_match(dn, hostname, max_wildcards=1): - """Matching according to RFC 6125, section 6.4.3 + def __init__(self, ssl_version, *args, **kwargs): + context = kwargs['context'] = ssl.SSLContext(ssl_version) + cf = kwargs.pop('cert_file') + context.load_verify_locations(cf) + context.verify_mode = ssl.CERT_REQUIRED + httplib.HTTPSConnection.__init__(self, *args, **kwargs) +else: + import httplib + from urlparse import urlsplit as urlparse - http://tools.ietf.org/html/rfc6125#section-6.4.3 - """ - pats = [] - if not dn: - return False + # Check certificate hostname {{{ + # Implementation taken from python 3 + class CertificateError(ValueError): + pass - parts = dn.split(r'.') - leftmost, remainder = parts[0], parts[1:] + def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 - wildcards = leftmost.count('*') - if wildcards > max_wildcards: - # Issue #17980: avoid denials of service by refusing more - # than one wildcard per fragment. A survery of established - # policy among SSL implementations showed it to be a - # reasonable choice. - raise CertificateError( - "too many wildcards in certificate DNS name: " + repr(dn)) + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False - # speed up common case w/o wildcards - if not wildcards: - return dn.lower() == hostname.lower() + parts = dn.split(r'.') + leftmost, remainder = parts[0], parts[1:] - # RFC 6125, section 6.4.3, subitem 1. - # The client SHOULD NOT attempt to match a presented identifier in which - # the wildcard character comprises a label other than the left-most label. - if leftmost == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - elif leftmost.startswith('xn--') or hostname.startswith('xn--'): - # RFC 6125, section 6.4.3, subitem 3. - # The client SHOULD NOT attempt to match a presented identifier - # where the wildcard character is embedded within an A-label or - # U-label of an internationalized domain name. - pats.append(re.escape(leftmost)) - else: - # Otherwise, '*' matches any dotless string, e.g. www* - pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survery of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) - # add the remaining fragments, ignore any wildcards - for frag in remainder: - pats.append(re.escape(frag)) + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() - pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) - return pat.match(hostname) + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) -def match_hostname(cert, hostname): - """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 - rules are followed, but IP addresses are not accepted for *hostname*. + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) - CertificateError is raised on failure. On success, the function - returns nothing. - """ - if not cert: - raise ValueError("empty or no certificate") - dnsnames = [] - san = cert.get('subjectAltName', ()) - for key, value in san: - if key == 'DNS': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if not dnsnames: - # The subject is only checked when there is no dNSName entry - # in subjectAltName - for sub in cert.get('subject', ()): - for key, value in sub: - # XXX according to RFC 2818, the most specific Common Name - # must be used. - if key == 'commonName': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if len(dnsnames) > 1: - raise CertificateError("hostname %r " - "doesn't match either of %s" - % (hostname, ', '.join(map(repr, dnsnames)))) - elif len(dnsnames) == 1: - raise CertificateError("hostname %r " - "doesn't match %r" - % (hostname, dnsnames[0])) - else: - raise CertificateError("no appropriate commonName or " - "subjectAltName fields were found") -# }}} + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) -class HTTPSConnection(httplib.HTTPSConnection): + def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. - def __init__(self, ssl_version, *args, **kwargs): - httplib.HTTPSConnection.__init__(self, *args, **kwargs) - self.calibre_ssl_version = ssl_version + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") + # }}} - def connect(self): - """Connect to a host on a given (SSL) port, properly verifying the SSL - certificate, both that it is valid and that its declared hostnames - match the hostname we are connecting to.""" + class HTTPSConnection(httplib.HTTPSConnection): - sock = socket.create_connection((self.host, self.port), - self.timeout, self.source_address) - if self._tunnel_host: - self.sock = sock - self._tunnel() - self.sock = ssl.wrap_socket(sock, cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.cert_file, ssl_version=self.calibre_ssl_version) - getattr(ssl, 'match_hostname', match_hostname)(self.sock.getpeercert(), self.host) + def __init__(self, ssl_version, *args, **kwargs): + httplib.HTTPSConnection.__init__(self, *args, **kwargs) + self.calibre_ssl_version = ssl_version + + def connect(self): + """Connect to a host on a given (SSL) port, properly verifying the SSL + certificate, both that it is valid and that its declared hostnames + match the hostname we are connecting to.""" + + sock = socket.create_connection((self.host, self.port), + self.timeout, self.source_address) + if self._tunnel_host: + self.sock = sock + self._tunnel() + self.sock = ssl.wrap_socket(sock, cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.cert_file, ssl_version=self.calibre_ssl_version) + getattr(ssl, 'match_hostname', match_hostname)(self.sock.getpeercert(), self.host) def get_https_resource_securely( url, cacerts='calibre-ebook-root-CA.crt', timeout=60, max_redirects=5, ssl_version=None): @@ -146,7 +161,7 @@ def get_https_resource_securely( if ssl_version is None: ssl_version = ssl.PROTOCOL_TLSv1 cacerts = P(cacerts, allow_user_override=False) - p = urlsplit(url) + p = urlparse(url) if p.scheme != 'https': raise ValueError('URL scheme must be https, not %s' % p.scheme)