mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
News download: Fix threading issues in skip_ad_pages() method.
This commit is contained in:
parent
407be8da53
commit
f3e85aa26d
@ -325,7 +325,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'''
|
'''
|
||||||
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
||||||
if re.match(r'\w+://', url_or_raw):
|
if re.match(r'\w+://', url_or_raw):
|
||||||
f = self.browser.open(url_or_raw)
|
br = self.clone_browser(self.browser)
|
||||||
|
f = br.open_novisit(url_or_raw)
|
||||||
_raw = f.read()
|
_raw = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
if not _raw:
|
if not _raw:
|
||||||
|
@ -364,7 +364,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'''
|
'''
|
||||||
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
||||||
if re.match(r'\w+://', url_or_raw):
|
if re.match(r'\w+://', url_or_raw):
|
||||||
f = self.browser.open(url_or_raw)
|
br = self.clone_browser(self.browser)
|
||||||
|
f = br.open_novisit(url_or_raw)
|
||||||
_raw = f.read()
|
_raw = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
if not _raw:
|
if not _raw:
|
||||||
|
@ -437,6 +437,16 @@ class BasicNewsRecipe(Recipe):
|
|||||||
# Uh-oh recipe using something exotic, call get_browser
|
# Uh-oh recipe using something exotic, call get_browser
|
||||||
return self.get_browser()
|
return self.get_browser()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cloned_browser(self):
|
||||||
|
if self.get_browser.im_func is BasicNewsRecipe.get_browser.im_func:
|
||||||
|
# We are using the default get_browser, which means no need to
|
||||||
|
# clone
|
||||||
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
else:
|
||||||
|
br = self.clone_browser(self.browser)
|
||||||
|
return br
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
'''
|
'''
|
||||||
Override in a subclass to customize extraction of the :term:`URL` that points
|
Override in a subclass to customize extraction of the :term:`URL` that points
|
||||||
@ -534,7 +544,10 @@ class BasicNewsRecipe(Recipe):
|
|||||||
`url_or_raw`: Either a URL or the downloaded index page as a string
|
`url_or_raw`: Either a URL or the downloaded index page as a string
|
||||||
'''
|
'''
|
||||||
if re.match(r'\w+://', url_or_raw):
|
if re.match(r'\w+://', url_or_raw):
|
||||||
open_func = getattr(self.browser, 'open_novisit', self.browser.open)
|
# We may be called in a thread (in the skip_ad_pages method), so
|
||||||
|
# clone the browser to be safe
|
||||||
|
br = self.cloned_browser
|
||||||
|
open_func = getattr(br, 'open_novisit', br.open)
|
||||||
with closing(open_func(url_or_raw)) as f:
|
with closing(open_func(url_or_raw)) as f:
|
||||||
_raw = f.read()
|
_raw = f.read()
|
||||||
if not _raw:
|
if not _raw:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user