mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Allow using the webengine backend in recipes
This commit is contained in:
parent
5c8332e438
commit
5b00e588b2
@ -43,7 +43,6 @@ class science(BasicNewsRecipe):
|
|||||||
classes('pb-ad')
|
classes('pb-ad')
|
||||||
]
|
]
|
||||||
browser_type = 'qt'
|
browser_type = 'qt'
|
||||||
simultaneous_downloads = 1 # server returns invalid data on HTTP2 connections when multiple requests are queued on the same connection
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for p in soup.findAll(attrs={'role':'paragraph'}):
|
for p in soup.findAll(attrs={'role':'paragraph'}):
|
||||||
|
@ -216,7 +216,7 @@ class FetchBackend(QObject):
|
|||||||
self.timeout_timer.start()
|
self.timeout_timer.start()
|
||||||
return
|
return
|
||||||
if len(self.workers) < 5:
|
if len(self.workers) < 5:
|
||||||
self.workers.append(self.create_worker)
|
self.workers.append(self.create_worker())
|
||||||
self.workers[-1].start_download(self.output_dir, req, data)
|
self.workers[-1].start_download(self.output_dir, req, data)
|
||||||
self.timeout_timer.start()
|
self.timeout_timer.start()
|
||||||
return
|
return
|
||||||
|
@ -428,9 +428,10 @@ class BasicNewsRecipe(Recipe):
|
|||||||
recipe_specific_options = None
|
recipe_specific_options = None
|
||||||
|
|
||||||
#: The simulated browser engine to use when downloading from servers. The default is to use the Python mechanize
|
#: The simulated browser engine to use when downloading from servers. The default is to use the Python mechanize
|
||||||
#: browser engine. An alternate is "qt" which will use the network engine from the Qt toolkit.
|
#: browser engine, which supports logging in. However, if you don't need logging in, consider changing this
|
||||||
#: The mechanize engine supports logging in, the Qt engine does not. However, the Qt engine supports HTTP/2 and
|
#: to either 'webengine' which uses an actual Chromium browser to do the network requests or 'qt' which
|
||||||
#: similar technologies and also is harder for bot interception services to fingerprint.
|
#: uses the Qt Networking backend. Both 'webengine' and 'qt' support HTTP/2, which mechanize does not and
|
||||||
|
#: are thus harder to fingerprint for bot protection services.
|
||||||
browser_type = 'mechanize'
|
browser_type = 'mechanize'
|
||||||
|
|
||||||
#: Set to False if you do not want to use gzipped transfers with the mechanize browser.
|
#: Set to False if you do not want to use gzipped transfers with the mechanize browser.
|
||||||
@ -571,9 +572,10 @@ class BasicNewsRecipe(Recipe):
|
|||||||
ua = getattr(self, 'last_used_user_agent', None) or self.calibre_most_common_ua or random_user_agent(allow_ie=False)
|
ua = getattr(self, 'last_used_user_agent', None) or self.calibre_most_common_ua or random_user_agent(allow_ie=False)
|
||||||
kwargs['user_agent'] = self.last_used_user_agent = ua
|
kwargs['user_agent'] = self.last_used_user_agent = ua
|
||||||
self.log('Using user agent:', kwargs['user_agent'])
|
self.log('Using user agent:', kwargs['user_agent'])
|
||||||
if self.browser_type == 'qt':
|
if self.browser_type != 'mechanize':
|
||||||
from calibre.scraper.qt import Browser
|
from calibre.scraper.qt import Browser, WebEngineBrowser
|
||||||
return Browser(user_agent=kwargs['user_agent'], verify_ssl_certificates=kwargs.get('verify_ssl_certificates', False))
|
return {'qt': Browser, 'webengine': WebEngineBrowser}[self.browser_type](
|
||||||
|
user_agent=kwargs['user_agent'], verify_ssl_certificates=kwargs.get('verify_ssl_certificates', False))
|
||||||
br = browser(*args, **kwargs)
|
br = browser(*args, **kwargs)
|
||||||
br.addheaders += [('Accept', '*/*')]
|
br.addheaders += [('Accept', '*/*')]
|
||||||
if self.handle_gzip:
|
if self.handle_gzip:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user