Add API to the recipe class to preprocess image data easily

2025-11-25 07:45:01 -05:00 · 2017-04-18 09:00:01 +05:30 · 2017-04-18 09:00:01 +05:30 · 5d3e4085f6
commit 5d3e4085f6
parent 3a624eda8e
2 changed files with 14 additions and 1 deletions
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -472,6 +472,14 @@ class BasicNewsRecipe(Recipe):
        '''
        return url
    def preprocess_image(self, img_data, image_url):
        '''
        Perform some processing on downloaded image data. This is called on the raw
        data before any resizing is done. Must return the processed raw data. Return
        None to skip the image.
        '''
        return img_data
    def get_browser(self, *args, **kwargs):
        '''
        Return a browser instance used to fetch documents from the web. By default
@ -929,6 +937,7 @@ class BasicNewsRecipe(Recipe):
            setattr(self.web2disk_options, extra, getattr(self, extra))
        self.web2disk_options.postprocess_html = self._postprocess_html
        self.web2disk_options.preprocess_image = self.preprocess_image
        self.web2disk_options.encoding = self.encoding
        self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -154,6 +154,7 @@ class RecursiveFetcher(object):
                lambda raw, url: raw)
        self.prepreprocess_html_ext = getattr(options, 'skip_ad_pages', lambda soup: None)
        self.postprocess_html_ext = getattr(options, 'postprocess_html', None)
        self.preprocess_image_ext = getattr(options, 'preprocess_image', None)
        self._is_link_wanted     = getattr(options, 'is_link_wanted',
                default_is_link_wanted)
        self.compress_news_images_max_size = getattr(options, 'compress_news_images_max_size', None)
@ -396,6 +397,9 @@ class RecursiveFetcher(object):
            fname = ascii_filename('img'+str(c))
            if isinstance(fname, unicode):
                fname = fname.encode('ascii', 'replace')
            data = self.preprocess_image_ext(data, iurl) if self.preprocess_image_ext is not None else data
            if data is None:
                continue
            itype = what(None, data)
            if itype == 'svg' or (itype is None and b'<svg' in data[:1024]):
                # SVG image