From f969151e9f65fa2e8561ba9c17abcb4e96f06fb4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 23 Feb 2014 13:13:53 +0530 Subject: [PATCH] Utility code to download https URLs securely --- resources/calibre-ebook-root-CA.crt | 32 ++++++ src/calibre/utils/https.py | 156 ++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 resources/calibre-ebook-root-CA.crt create mode 100644 src/calibre/utils/https.py diff --git a/resources/calibre-ebook-root-CA.crt b/resources/calibre-ebook-root-CA.crt new file mode 100644 index 0000000000..cd47d2829b --- /dev/null +++ b/resources/calibre-ebook-root-CA.crt @@ -0,0 +1,32 @@ +-----BEGIN CERTIFICATE----- +MIIFlzCCA3+gAwIBAgIJAI67A/kD1DLtMA0GCSqGSIb3DQEBBQUAMGIxCzAJBgNV +BAYTAklOMRQwEgYDVQQIDAtNYWhhcmFzaHRyYTEPMA0GA1UEBwwGTXVtYmFpMRAw +DgYDVQQKDAdjYWxpYnJlMRowGAYDVQQDDBFjYWxpYnJlLWVib29rLmNvbTAeFw0x +NDAyMjMwNDAzNDFaFw0xNDAzMjUwNDAzNDFaMGIxCzAJBgNVBAYTAklOMRQwEgYD +VQQIDAtNYWhhcmFzaHRyYTEPMA0GA1UEBwwGTXVtYmFpMRAwDgYDVQQKDAdjYWxp +YnJlMRowGAYDVQQDDBFjYWxpYnJlLWVib29rLmNvbTCCAiIwDQYJKoZIhvcNAQEB +BQADggIPADCCAgoCggIBALZW3gMUCsloaMcGhqjIeZLUYarC0ers47qlpgfjJnwt +DYuOZjkqNkf7rBUE2XrK2FKKNsgYTDefArC3rmmkH7D3g7LO8yfY19L/xmFEt7zO +6hOea7kVrtINdTabli2ZKr3MOYFYt2SWMf8qkxBpQgxsY11bPYhIPi++QXJvcvO6 +JW3GQOh/wm0eZT9f7V3Msm9UwSDbk3IONPEp4nmPx6ZwNa9zUAfTMH0nHV9PB0wd +AXPHtKs/q9QTYt8GWXKzaalocOl/UJB4oBmgzaaZlqnNUOZ8cZNqwttRkYOep6er +dxDUDHLRNykyX0fE8DN9zf3X3IKGw2f2U56IKnRUMnBToL0+JiGbF3bCb+rJsoZZ +FKsntj1fF3EzSa/sEcyDf/rtt4wvgmk9FNAOew/D1GVYU/mbIV4wfdSqPISxNUpi +ZHb9m8RVeNm7HpoUsWVgrbHNjb/Pw7PllVdNMXwA8pvi6JMxKqn3Cvb5JDBsxYe8 +M3e2KjzqzBjgnvbx9QqC91TubKz1ftDKdX4yBoJuUiIZJckX2niIxXsqA0QOnvBF +6yN8TrK5F1zCQ74Z3RCTmGKqZWPuJC4VtF3k2Yyuwpg+fcUbRWFmld3XDJWlm1cb +mO3YLIju4lM7WGNE6OWQxMXB3puzxD1E8hYovS4W3EiXlw2qjxTMYofl9Iqir54v +AgMBAAGjUDBOMB0GA1UdDgQWBBRFarPkQ6DkrU6tIqmV5H6Wi5XGxDAfBgNVHSME +GDAWgBRFarPkQ6DkrU6tIqmV5H6Wi5XGxDAMBgNVHRMEBTADAQH/MA0GCSqGSIb3 +DQEBBQUAA4ICAQBAlBhF+greu0vYEDzz04HQjgfamxWQ4nXete8++9et1mcRw16i +RbEz/1ZeELz9KMwMpooVPIaYAWgqe+UNWuzHt0+jrH30NBcBv407G8eR/FWOU/cx +y/YMk3nXsAARoOcsFN1YSS1dNL1osezfsRStET8/bOEqpWD0yvt8wRWwh1hOCPVD +OpWTZx7+dZcK1Zh64Rm5mPYzbhWYxGGqNuZGFCuR9yI2bHHsFI69LryUKNf1cJ/N +dfvHt4GDxfF5ie4PWNgTp52wuI3YxNpsHgz9SmSEey6uVlA13vTO1QFX8Ymbyn6K +FRhr2LHY4iBdY+Gw47WnAqdo7uXpyM3wT6jI4gn7oENvCSUyM/JMSQqE1Etw0LBr +NIlC/RxN5wjcDvVCL/uS3PL6IW7R0wxrCQwBU3f5wMOnDM/R4EWJdS96zyb7Xnh3 +PQGoj6/vllymI7tuwRhEuvFknRRihu3vilHgtGczVXTG73nFJftLzvN/OhqSSQG/ +3c2JDX+vAy5jwPT/M3nPkrs68M4P77da1/BDZ0/KgJb/JzYZyNpq1nhWo3nMn+Sx +jq7y+h6ry8Omnlw7a/7CnNgvkLfP/uTfllL4erETFntHNh6LqCvpPNOqrvAP5keB +EB8yoJraypfuiNELOw1zSRksMxe2ac4b/dhDNStBTPC0egfRSm3FA0XoOQ== +-----END CERTIFICATE----- diff --git a/src/calibre/utils/https.py b/src/calibre/utils/https.py new file mode 100644 index 0000000000..6aab7eccc9 --- /dev/null +++ b/src/calibre/utils/https.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2014, Kovid Goyal ' + +import ssl, socket, re, httplib +from urlparse import urlsplit +from contextlib import closing + +# Check certificate hostname {{{ +# Implementation taken from python 3 +class CertificateError(ValueError): + pass + +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False + + parts = dn.split(r'.') + leftmost, remainder = parts[0], parts[1:] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survery of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") +# }}} + +class HTTPSConnection(httplib.HTTPSConnection): + + def connect(self): + """Connect to a host on a given (SSL) port, properly verifying the SSL + certificate, both that it is valid and that its declared hostnames + match the hostname we are connecting to.""" + + sock = socket.create_connection((self.host, self.port), + self.timeout, self.source_address) + if self._tunnel_host: + self.sock = sock + self._tunnel() + self.sock = ssl.wrap_socket(sock, cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.cert_file) + getattr(ssl, 'match_hostname', match_hostname)(self.sock.getpeercert(), self.host) + +def get_https_resource_securely(url, cacerts='calibre-ebook-root-CA.crt', timeout=60, max_redirects=5): + ''' + Download the resource pointed to by url using https securely (verify server + certificate). Ensures that redirects, if any, are also downloaded + securely. Needs a CA certificates bundle (in PEM format) to verify the + server's certificates. + ''' + cacerts = P(cacerts, allow_user_override=False) + p = urlsplit(url) + if p.scheme != 'https': + raise ValueError('URL scheme must be https, not %s' % p.scheme) + c = HTTPSConnection(p.hostname, p.port, cert_file=cacerts, timeout=timeout) + with closing(c): + path = p.path or '/' + if p.query: + path += '?' + p.query + c.request('GET', path) + response = c.getresponse() + if response.status in (httplib.MOVED_PERMANENTLY, httplib.FOUND, httplib.SEE_OTHER): + if max_redirects <= 0: + raise ValueError('Too many redirects, giving up') + newurl = response.getheader('Location', None) + if newurl is None: + raise ValueError('%s returned a redirect response with no Location header' % url) + return get_https_resource_securely(newurl, cacerts=cacerts, timeout=timeout, max_redirects=max_redirects-1) + if response.status != httplib.OK: + raise ValueError('%s returned an unsupported http response code: %d (%s)' % ( + url, response.status, httplib.responses.get(response.status, None))) + return response.read() + +if __name__ == '__main__': + # print (len(get_url_secure('https://status.calibre-ebook.com/dist/win32'))) + print (get_https_resource_securely('https://status.calibre-ebook.com/latest')) +