mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fetch news: Allow using the chromium network stack to make HTTP requests
This commit is contained in:
parent
f40950d1ff
commit
66f8ae20fc
@ -106,6 +106,9 @@ class Browser:
|
|||||||
self.user_agent = val
|
self.user_agent = val
|
||||||
self._send_command({'action': 'set_user_agent', 'user_agent': val})
|
self._send_command({'action': 'set_user_agent', 'user_agent': val})
|
||||||
|
|
||||||
|
def clone_browser(self):
|
||||||
|
return self
|
||||||
|
|
||||||
def _send_command(self, cmd):
|
def _send_command(self, cmd):
|
||||||
self.worker.stdin.write(json.dumps(cmd).encode())
|
self.worker.stdin.write(json.dumps(cmd).encode())
|
||||||
self.worker.stdin.write(b'\n')
|
self.worker.stdin.write(b'\n')
|
||||||
|
@ -427,7 +427,15 @@ class BasicNewsRecipe(Recipe):
|
|||||||
#: If no default is specified, the option will not be in the dict at all, when unspecified by the user.
|
#: If no default is specified, the option will not be in the dict at all, when unspecified by the user.
|
||||||
recipe_specific_options = None
|
recipe_specific_options = None
|
||||||
|
|
||||||
#: Set to False if you do not want to use gzipped transfers. Note that some old servers flake out with gzip
|
#: The simulated browser engine to use when downloading from servers. The default is to use the Python mechanize
|
||||||
|
#: browser engine. An alternate is "chromium" which will use the network engine from the Chromium web browser instead.
|
||||||
|
#: The mechanize engine supports logging in, the Chromium engine does not. However, the Chromium engine supports HTTP/2 and
|
||||||
|
#: similar technologies and also is harder for bot interception services to fingerprint. To customize the Chromium based
|
||||||
|
#: browser, such as adding headers or cookies override the get_chromium_browser() method in your recipe.
|
||||||
|
browser_type = 'mechanize'
|
||||||
|
|
||||||
|
#: Set to False if you do not want to use gzipped transfers with the mechanize browser.
|
||||||
|
#: Note that some old servers flake out with gzip.
|
||||||
handle_gzip = True
|
handle_gzip = True
|
||||||
|
|
||||||
# See the built-in recipes for examples of these settings.
|
# See the built-in recipes for examples of these settings.
|
||||||
@ -550,6 +558,8 @@ class BasicNewsRecipe(Recipe):
|
|||||||
return br
|
return br
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
if self.browser_type == 'chromium':
|
||||||
|
return self.get_chromium_browser()
|
||||||
if 'user_agent' not in kwargs:
|
if 'user_agent' not in kwargs:
|
||||||
# More and more news sites are serving JPEG XR images to IE
|
# More and more news sites are serving JPEG XR images to IE
|
||||||
ua = getattr(self, 'last_used_user_agent', None) or self.calibre_most_common_ua or random_user_agent(allow_ie=False)
|
ua = getattr(self, 'last_used_user_agent', None) or self.calibre_most_common_ua or random_user_agent(allow_ie=False)
|
||||||
@ -561,6 +571,25 @@ class BasicNewsRecipe(Recipe):
|
|||||||
br.set_handle_gzip(True)
|
br.set_handle_gzip(True)
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
def get_chromium_browser(self, *a, **kw):
|
||||||
|
'''
|
||||||
|
Get a "browser" that uses the Chromium network stack for support of HTTP/2 and HTTP/3 and a TLS fingerprint identical
|
||||||
|
to that of a normal browser. Customizing the browser is simple::
|
||||||
|
|
||||||
|
br = super().get_chromium_browser()
|
||||||
|
# Adding headers that are added to every network request
|
||||||
|
br.addheaders += [
|
||||||
|
('My-Header': 'Some value'),
|
||||||
|
('Another-Header': 'another value'),
|
||||||
|
]
|
||||||
|
# Changing the user agent
|
||||||
|
br.set_user_agent('some user agent')
|
||||||
|
# Adding cookies
|
||||||
|
br.set_simple_cookie('cookie-name', 'cookie-value')
|
||||||
|
'''
|
||||||
|
from calibre.scraper.fetch import Browser
|
||||||
|
return Browser()
|
||||||
|
|
||||||
def clone_browser(self, br):
|
def clone_browser(self, br):
|
||||||
'''
|
'''
|
||||||
Clone the browser br. Cloned browsers are used for multi-threaded
|
Clone the browser br. Cloned browsers are used for multi-threaded
|
||||||
@ -580,7 +609,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def cloned_browser(self):
|
def cloned_browser(self):
|
||||||
if hasattr(self.get_browser, 'is_base_class_implementation'):
|
if hasattr(self.get_browser, 'is_base_class_implementation') and self.browser_type == 'mechanize':
|
||||||
# We are using the default get_browser, which means no need to
|
# We are using the default get_browser, which means no need to
|
||||||
# clone
|
# clone
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user