From 5fbc95dea0377a853e42e0925ee976c91e338a09 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Jun 2015 09:37:49 +0530 Subject: [PATCH] News download: Add support for turning off SSL certificate verification --- src/calibre/__init__.py | 7 ++++--- src/calibre/srv/errors.py | 3 +++ src/calibre/utils/browser.py | 35 ++++++++++++++++++++++++++++++++--- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index ad30fe9b87..8602b75299 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -377,20 +377,21 @@ def random_user_agent(choose=None): choose = random.randint(0, len(choices)-1) return choices[choose] -def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, use_robust_parser=False): +def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, use_robust_parser=False, verify_ssl_certificates=True): ''' Create a mechanize browser for web scraping. The browser handles cookies, refresh requests and ignores robots.txt. Also uses proxy if available. :param honor_time: If True honors pause time in refresh requests :param max_time: Maximum time in seconds to wait during a refresh request + :param verify_ssl_certificates: If false SSL certificates errors are ignored ''' from calibre.utils.browser import Browser if use_robust_parser: import mechanize - opener = Browser(factory=mechanize.RobustFactory()) + opener = Browser(factory=mechanize.RobustFactory(), verify_ssl=verify_ssl_certificates) else: - opener = Browser() + opener = Browser(verify_ssl=verify_ssl_certificates) opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time) opener.set_handle_robots(False) if user_agent is None: diff --git a/src/calibre/srv/errors.py b/src/calibre/srv/errors.py index 621ddbb330..8af998c5d7 100644 --- a/src/calibre/srv/errors.py +++ b/src/calibre/srv/errors.py @@ -11,6 +11,9 @@ import httplib class JobQueueFull(Exception): pass +class RouteError(ValueError): + pass + class HTTPSimpleResponse(Exception): def __init__(self, http_code, http_message='', close_connection=False, location=None): diff --git a/src/calibre/utils/browser.py b/src/calibre/utils/browser.py index ea1b1d9fca..65d64fae6a 100644 --- a/src/calibre/utils/browser.py +++ b/src/calibre/utils/browser.py @@ -5,23 +5,51 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import copy +import copy, httplib, ssl from cookielib import CookieJar -from mechanize import Browser as B +from mechanize import Browser as B, HTTPSHandler + +class ModernHTTPSHandler(HTTPSHandler): + + ssl_context = None + + def https_open(self, req): + if self.client_cert_manager is not None: + key_file, cert_file = self.client_cert_manager.find_key_cert( + req.get_full_url()) + if cert_file: + self.ssl_context.load_cert_chain(cert_file, key_file) + def conn_factory(hostport): + return httplib.HTTPSConnection(hostport, context=self.ssl_context) + return self.do_open(conn_factory, req) + class Browser(B): ''' A cloneable mechanize browser. Useful for multithreading. The idea is that each thread has a browser clone. Every clone uses the same thread safe cookie jar. All clones share the same browser configuration. + + Also adds support for fine-tuning SSL verification via an SSL context object. ''' + handler_classes = B.handler_classes.copy() + handler_classes['https'] = ModernHTTPSHandler + def __init__(self, *args, **kwargs): self._clone_actions = {} + sc = kwargs.pop('ssl_context', None) + if sc is None: + sc = ssl.create_default_context() if kwargs.pop('verify_ssl', True) else ssl._create_unverified_context(cert_reqs=ssl.CERT_NONE) B.__init__(self, *args, **kwargs) self.set_cookiejar(CookieJar()) + self._ua_handlers['https'].ssl_context = sc + + @property + def https_handler(self): + return self._ua_handlers['https'] def set_handle_refresh(self, *args, **kwargs): B.set_handle_refresh(self, *args, **kwargs) @@ -89,6 +117,7 @@ class Browser(B): def clone_browser(self): clone = Browser() + clone.https_handler.ssl_context = self.https_handler.ssl_context clone.addheaders = copy.deepcopy(self.addheaders) for func, args, kwargs in self._clone_actions.values(): func = getattr(clone, func) @@ -100,7 +129,7 @@ if __name__ == '__main__': from pprint import pprint orig = browser() clone = orig.clone_browser() - pprint( orig._ua_handlers) + pprint(orig._ua_handlers) pprint(clone._ua_handlers) assert orig._ua_handlers.keys() == clone._ua_handlers.keys() assert orig._ua_handlers['_cookies'].cookiejar is \