mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Allow using get_obfuscated-article without using temp files
This commit is contained in:
parent
41e9ac0840
commit
347b911b8d
@ -852,6 +852,11 @@ class BasicNewsRecipe(Recipe):
|
|||||||
every article URL. It should return the path to a file on the filesystem
|
every article URL. It should return the path to a file on the filesystem
|
||||||
that contains the article HTML. That file is processed by the recursive
|
that contains the article HTML. That file is processed by the recursive
|
||||||
HTML fetching engine, so it can contain links to pages/images on the web.
|
HTML fetching engine, so it can contain links to pages/images on the web.
|
||||||
|
Alternately, you can return a dictionary of the form:
|
||||||
|
{'data': <HTML data>, 'url': <the resolved URL of the article>}. This avoids
|
||||||
|
needing to create temporary files. The `url` key in the dictionary is useful if
|
||||||
|
the effective URL of the article is different from the URL passed into this method,
|
||||||
|
for example, because of redirects. It can be omitted if the URL is unchanged.
|
||||||
|
|
||||||
This method is typically useful for sites that try to make it difficult to
|
This method is typically useful for sites that try to make it difficult to
|
||||||
access article content automatically.
|
access article content automatically.
|
||||||
@ -1163,7 +1168,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
return templ.generate(f, feeds, self.description_limiter,
|
return templ.generate(f, feeds, self.description_limiter,
|
||||||
extra_css=css).render(doctype='xhtml')
|
extra_css=css).render(doctype='xhtml')
|
||||||
|
|
||||||
def _fetch_article(self, url, dir_, f, a, num_of_feeds):
|
def _fetch_article(self, url, dir_, f, a, num_of_feeds, preloaded=None):
|
||||||
br = self.browser
|
br = self.browser
|
||||||
if hasattr(self.get_browser, 'is_base_class_implementation'):
|
if hasattr(self.get_browser, 'is_base_class_implementation'):
|
||||||
# We are using the default get_browser, which means no need to
|
# We are using the default get_browser, which means no need to
|
||||||
@ -1180,6 +1185,8 @@ class BasicNewsRecipe(Recipe):
|
|||||||
fetcher.current_dir = dir_
|
fetcher.current_dir = dir_
|
||||||
fetcher.show_progress = False
|
fetcher.show_progress = False
|
||||||
fetcher.image_url_processor = self.image_url_processor
|
fetcher.image_url_processor = self.image_url_processor
|
||||||
|
if preloaded is not None:
|
||||||
|
fetcher.preloaded_urls[url] = preloaded
|
||||||
res, path, failures = fetcher.start_fetch(url), fetcher.downloaded_paths, fetcher.failed_links
|
res, path, failures = fetcher.start_fetch(url), fetcher.downloaded_paths, fetcher.failed_links
|
||||||
if not res or not os.path.exists(res):
|
if not res or not os.path.exists(res):
|
||||||
msg = _('Could not fetch article.') + ' '
|
msg = _('Could not fetch article.') + ' '
|
||||||
@ -1195,9 +1202,17 @@ class BasicNewsRecipe(Recipe):
|
|||||||
return self._fetch_article(url, dir, f, a, num_of_feeds)
|
return self._fetch_article(url, dir, f, a, num_of_feeds)
|
||||||
|
|
||||||
def fetch_obfuscated_article(self, url, dir, f, a, num_of_feeds):
|
def fetch_obfuscated_article(self, url, dir, f, a, num_of_feeds):
|
||||||
path = os.path.abspath(self.get_obfuscated_article(url))
|
x = self.get_obfuscated_article(url)
|
||||||
url = ('file:'+path) if iswindows else ('file://'+path)
|
if isinstance(x, dict):
|
||||||
return self._fetch_article(url, dir, f, a, num_of_feeds)
|
data = x['data']
|
||||||
|
if isinstance(data, str):
|
||||||
|
data = data.encode(self.encoding or 'utf-8')
|
||||||
|
url = data.get('url', url)
|
||||||
|
else:
|
||||||
|
with open(x, 'rb') as of:
|
||||||
|
data = of.read()
|
||||||
|
os.remove(x)
|
||||||
|
return self._fetch_article(url, dir, f, a, num_of_feeds, preloaded=data)
|
||||||
|
|
||||||
def fetch_embedded_article(self, article, dir, f, a, num_of_feeds):
|
def fetch_embedded_article(self, article, dir, f, a, num_of_feeds):
|
||||||
templ = templates.EmbeddedContent()
|
templ = templates.EmbeddedContent()
|
||||||
|
@ -185,6 +185,7 @@ class RecursiveFetcher:
|
|||||||
self.show_progress = True
|
self.show_progress = True
|
||||||
self.failed_links = []
|
self.failed_links = []
|
||||||
self.job_info = job_info
|
self.job_info = job_info
|
||||||
|
self.preloaded_urls = {}
|
||||||
|
|
||||||
def get_soup(self, src, url=None):
|
def get_soup(self, src, url=None):
|
||||||
nmassage = []
|
nmassage = []
|
||||||
@ -245,6 +246,11 @@ class RecursiveFetcher:
|
|||||||
|
|
||||||
def fetch_url(self, url):
|
def fetch_url(self, url):
|
||||||
data = None
|
data = None
|
||||||
|
q = self.preloaded_urls.pop(url, None)
|
||||||
|
if q is not None:
|
||||||
|
ans = response(q)
|
||||||
|
ans.newurl = url
|
||||||
|
return ans
|
||||||
self.log.debug('Fetching', url)
|
self.log.debug('Fetching', url)
|
||||||
st = time.monotonic()
|
st = time.monotonic()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user