mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
News download: Add support for turning off SSL certificate verification
This commit is contained in:
parent
e03e50730d
commit
5fbc95dea0
@ -377,20 +377,21 @@ def random_user_agent(choose=None):
|
||||
choose = random.randint(0, len(choices)-1)
|
||||
return choices[choose]
|
||||
|
||||
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, use_robust_parser=False):
|
||||
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, use_robust_parser=False, verify_ssl_certificates=True):
|
||||
'''
|
||||
Create a mechanize browser for web scraping. The browser handles cookies,
|
||||
refresh requests and ignores robots.txt. Also uses proxy if available.
|
||||
|
||||
:param honor_time: If True honors pause time in refresh requests
|
||||
:param max_time: Maximum time in seconds to wait during a refresh request
|
||||
:param verify_ssl_certificates: If false SSL certificates errors are ignored
|
||||
'''
|
||||
from calibre.utils.browser import Browser
|
||||
if use_robust_parser:
|
||||
import mechanize
|
||||
opener = Browser(factory=mechanize.RobustFactory())
|
||||
opener = Browser(factory=mechanize.RobustFactory(), verify_ssl=verify_ssl_certificates)
|
||||
else:
|
||||
opener = Browser()
|
||||
opener = Browser(verify_ssl=verify_ssl_certificates)
|
||||
opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
|
||||
opener.set_handle_robots(False)
|
||||
if user_agent is None:
|
||||
|
@ -11,6 +11,9 @@ import httplib
|
||||
class JobQueueFull(Exception):
|
||||
pass
|
||||
|
||||
class RouteError(ValueError):
|
||||
pass
|
||||
|
||||
class HTTPSimpleResponse(Exception):
|
||||
|
||||
def __init__(self, http_code, http_message='', close_connection=False, location=None):
|
||||
|
@ -5,23 +5,51 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import copy
|
||||
import copy, httplib, ssl
|
||||
from cookielib import CookieJar
|
||||
|
||||
from mechanize import Browser as B
|
||||
from mechanize import Browser as B, HTTPSHandler
|
||||
|
||||
class ModernHTTPSHandler(HTTPSHandler):
|
||||
|
||||
ssl_context = None
|
||||
|
||||
def https_open(self, req):
|
||||
if self.client_cert_manager is not None:
|
||||
key_file, cert_file = self.client_cert_manager.find_key_cert(
|
||||
req.get_full_url())
|
||||
if cert_file:
|
||||
self.ssl_context.load_cert_chain(cert_file, key_file)
|
||||
def conn_factory(hostport):
|
||||
return httplib.HTTPSConnection(hostport, context=self.ssl_context)
|
||||
return self.do_open(conn_factory, req)
|
||||
|
||||
|
||||
class Browser(B):
|
||||
'''
|
||||
A cloneable mechanize browser. Useful for multithreading. The idea is that
|
||||
each thread has a browser clone. Every clone uses the same thread safe
|
||||
cookie jar. All clones share the same browser configuration.
|
||||
|
||||
Also adds support for fine-tuning SSL verification via an SSL context object.
|
||||
'''
|
||||
|
||||
handler_classes = B.handler_classes.copy()
|
||||
handler_classes['https'] = ModernHTTPSHandler
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._clone_actions = {}
|
||||
sc = kwargs.pop('ssl_context', None)
|
||||
if sc is None:
|
||||
sc = ssl.create_default_context() if kwargs.pop('verify_ssl', True) else ssl._create_unverified_context(cert_reqs=ssl.CERT_NONE)
|
||||
|
||||
B.__init__(self, *args, **kwargs)
|
||||
self.set_cookiejar(CookieJar())
|
||||
self._ua_handlers['https'].ssl_context = sc
|
||||
|
||||
@property
|
||||
def https_handler(self):
|
||||
return self._ua_handlers['https']
|
||||
|
||||
def set_handle_refresh(self, *args, **kwargs):
|
||||
B.set_handle_refresh(self, *args, **kwargs)
|
||||
@ -89,6 +117,7 @@ class Browser(B):
|
||||
|
||||
def clone_browser(self):
|
||||
clone = Browser()
|
||||
clone.https_handler.ssl_context = self.https_handler.ssl_context
|
||||
clone.addheaders = copy.deepcopy(self.addheaders)
|
||||
for func, args, kwargs in self._clone_actions.values():
|
||||
func = getattr(clone, func)
|
||||
|
Loading…
x
Reference in New Issue
Block a user