diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 4acd7fd2ed..ead1e3b56b 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -472,6 +472,14 @@ class BasicNewsRecipe(Recipe): ''' return url + def preprocess_image(self, img_data, image_url): + ''' + Perform some processing on downloaded image data. This is called on the raw + data before any resizing is done. Must return the processed raw data. Return + None to skip the image. + ''' + return img_data + def get_browser(self, *args, **kwargs): ''' Return a browser instance used to fetch documents from the web. By default @@ -929,6 +937,7 @@ class BasicNewsRecipe(Recipe): setattr(self.web2disk_options, extra, getattr(self, extra)) self.web2disk_options.postprocess_html = self._postprocess_html + self.web2disk_options.preprocess_image = self.preprocess_image self.web2disk_options.encoding = self.encoding self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_ diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index 214088841f..c3d3559e2e 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -153,7 +153,8 @@ class RecursiveFetcher(object): self.preprocess_raw_html = getattr(options, 'preprocess_raw_html', lambda raw, url: raw) self.prepreprocess_html_ext = getattr(options, 'skip_ad_pages', lambda soup: None) - self.postprocess_html_ext= getattr(options, 'postprocess_html', None) + self.postprocess_html_ext = getattr(options, 'postprocess_html', None) + self.preprocess_image_ext = getattr(options, 'preprocess_image', None) self._is_link_wanted = getattr(options, 'is_link_wanted', default_is_link_wanted) self.compress_news_images_max_size = getattr(options, 'compress_news_images_max_size', None) @@ -396,6 +397,9 @@ class RecursiveFetcher(object): fname = ascii_filename('img'+str(c)) if isinstance(fname, unicode): fname = fname.encode('ascii', 'replace') + data = self.preprocess_image_ext(data, iurl) if self.preprocess_image_ext is not None else data + if data is None: + continue itype = what(None, data) if itype == 'svg' or (itype is None and b'