Add API to the recipe class to preprocess image data easily

This commit is contained in:
Kovid Goyal 2017-04-18 09:00:01 +05:30
parent 3a624eda8e
commit 5d3e4085f6
2 changed files with 14 additions and 1 deletions

View File

@ -472,6 +472,14 @@ class BasicNewsRecipe(Recipe):
'''
return url
def preprocess_image(self, img_data, image_url):
'''
Perform some processing on downloaded image data. This is called on the raw
data before any resizing is done. Must return the processed raw data. Return
None to skip the image.
'''
return img_data
def get_browser(self, *args, **kwargs):
'''
Return a browser instance used to fetch documents from the web. By default
@ -929,6 +937,7 @@ class BasicNewsRecipe(Recipe):
setattr(self.web2disk_options, extra, getattr(self, extra))
self.web2disk_options.postprocess_html = self._postprocess_html
self.web2disk_options.preprocess_image = self.preprocess_image
self.web2disk_options.encoding = self.encoding
self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_

View File

@ -153,7 +153,8 @@ class RecursiveFetcher(object):
self.preprocess_raw_html = getattr(options, 'preprocess_raw_html',
lambda raw, url: raw)
self.prepreprocess_html_ext = getattr(options, 'skip_ad_pages', lambda soup: None)
self.postprocess_html_ext= getattr(options, 'postprocess_html', None)
self.postprocess_html_ext = getattr(options, 'postprocess_html', None)
self.preprocess_image_ext = getattr(options, 'preprocess_image', None)
self._is_link_wanted = getattr(options, 'is_link_wanted',
default_is_link_wanted)
self.compress_news_images_max_size = getattr(options, 'compress_news_images_max_size', None)
@ -396,6 +397,9 @@ class RecursiveFetcher(object):
fname = ascii_filename('img'+str(c))
if isinstance(fname, unicode):
fname = fname.encode('ascii', 'replace')
data = self.preprocess_image_ext(data, iurl) if self.preprocess_image_ext is not None else data
if data is None:
continue
itype = what(None, data)
if itype == 'svg' or (itype is None and b'<svg' in data[:1024]):
# SVG image