mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add API to the recipe class to preprocess image data easily
This commit is contained in:
parent
3a624eda8e
commit
5d3e4085f6
@ -472,6 +472,14 @@ class BasicNewsRecipe(Recipe):
|
|||||||
'''
|
'''
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
def preprocess_image(self, img_data, image_url):
|
||||||
|
'''
|
||||||
|
Perform some processing on downloaded image data. This is called on the raw
|
||||||
|
data before any resizing is done. Must return the processed raw data. Return
|
||||||
|
None to skip the image.
|
||||||
|
'''
|
||||||
|
return img_data
|
||||||
|
|
||||||
def get_browser(self, *args, **kwargs):
|
def get_browser(self, *args, **kwargs):
|
||||||
'''
|
'''
|
||||||
Return a browser instance used to fetch documents from the web. By default
|
Return a browser instance used to fetch documents from the web. By default
|
||||||
@ -929,6 +937,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
setattr(self.web2disk_options, extra, getattr(self, extra))
|
setattr(self.web2disk_options, extra, getattr(self, extra))
|
||||||
|
|
||||||
self.web2disk_options.postprocess_html = self._postprocess_html
|
self.web2disk_options.postprocess_html = self._postprocess_html
|
||||||
|
self.web2disk_options.preprocess_image = self.preprocess_image
|
||||||
self.web2disk_options.encoding = self.encoding
|
self.web2disk_options.encoding = self.encoding
|
||||||
self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_
|
self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_
|
||||||
|
|
||||||
|
@ -154,6 +154,7 @@ class RecursiveFetcher(object):
|
|||||||
lambda raw, url: raw)
|
lambda raw, url: raw)
|
||||||
self.prepreprocess_html_ext = getattr(options, 'skip_ad_pages', lambda soup: None)
|
self.prepreprocess_html_ext = getattr(options, 'skip_ad_pages', lambda soup: None)
|
||||||
self.postprocess_html_ext = getattr(options, 'postprocess_html', None)
|
self.postprocess_html_ext = getattr(options, 'postprocess_html', None)
|
||||||
|
self.preprocess_image_ext = getattr(options, 'preprocess_image', None)
|
||||||
self._is_link_wanted = getattr(options, 'is_link_wanted',
|
self._is_link_wanted = getattr(options, 'is_link_wanted',
|
||||||
default_is_link_wanted)
|
default_is_link_wanted)
|
||||||
self.compress_news_images_max_size = getattr(options, 'compress_news_images_max_size', None)
|
self.compress_news_images_max_size = getattr(options, 'compress_news_images_max_size', None)
|
||||||
@ -396,6 +397,9 @@ class RecursiveFetcher(object):
|
|||||||
fname = ascii_filename('img'+str(c))
|
fname = ascii_filename('img'+str(c))
|
||||||
if isinstance(fname, unicode):
|
if isinstance(fname, unicode):
|
||||||
fname = fname.encode('ascii', 'replace')
|
fname = fname.encode('ascii', 'replace')
|
||||||
|
data = self.preprocess_image_ext(data, iurl) if self.preprocess_image_ext is not None else data
|
||||||
|
if data is None:
|
||||||
|
continue
|
||||||
itype = what(None, data)
|
itype = what(None, data)
|
||||||
if itype == 'svg' or (itype is None and b'<svg' in data[:1024]):
|
if itype == 'svg' or (itype is None and b'<svg' in data[:1024]):
|
||||||
# SVG image
|
# SVG image
|
||||||
|
Loading…
x
Reference in New Issue
Block a user