News download: Add an option recipe authors can set to have calibre automatically reduce the size of downloaded images by lowering their quality

2025-07-09 03:04:10 -04:00 · 2013-03-17 11:58:33 +05:30 · 2013-03-17 11:58:33 +05:30 · 62211f4006
commit 62211f4006
parent 3fd23ceadd
2 changed files with 91 additions and 3 deletions
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -338,6 +338,41 @@ class BasicNewsRecipe(Recipe):
    #: :meth:`javascript_login` method, to do the actual logging in.
    use_javascript_to_login = False

+    # The following parameters control how the recipe attempts to minimize
+    # jpeg image sizes
+
+    #: Set this to False to ignore all scaling and compression parameters and
+    #: pass images through unmodified. If True and the other compression
+    #: parameters are left at their default values, jpeg images will be scaled to fit
+    #: in the screen dimensions set by the output profile and compressed to size at
+    #: most (w * h)/16 where w x h are the scaled image dimensions.
+    compress_news_images = False
+
+    #: The factor used when auto compressing jpeg images. If set to None,
+    #: auto compression is disabled. Otherwise, the images will be reduced in size to
+    #: (w * h)/compress_news_images_auto_size bytes if possible by reducing
+    #: the quality level, where w x h are the image dimensions in pixels.
+    #: The minimum jpeg quality will be 5/100 so it is possible this constraint
+    #: will not be met.  This parameter can be overridden by the parameter
+    #: compress_news_images_max_size which provides a fixed maximum size for images.
+    compress_news_images_auto_size = 16
+
+    #: Set jpeg quality so images do not exceed the size given (in KBytes).
+    #: If set, this parameter overrides auto compression via compress_news_images_auto_size.
+    #: The minimum jpeg quality will be 5/100 so it is possible this constraint
+    #: will not be met.
+    compress_news_images_max_size = None
+
+    #: Rescale images to fit in the device screen dimensions set by the output profile.
+    #: Ignored if no output profile is set.
+    scale_news_images_to_device = True
+
+    #: Maximum dimensions (w,h) to scale images to. If scale_news_images_to_device is True
+    #: this is set to the device screen dimensions set by the output profile unless
+    #: there is no profile set, in which case it is left at whatever value it has been
+    #: assigned (default None).
+    scale_news_images = None
+
    # See the built-in profiles for examples of these settings.

    def short_title(self):
@ -849,11 +884,19 @@ class BasicNewsRecipe(Recipe):
        for reg in self.filter_regexps:
            web2disk_cmdline.extend(['--filter-regexp', reg])

+        if options.output_profile.short_name == 'default':
+            self.scale_news_images_to_device = False
+        elif self.scale_news_images_to_device:
+            self.scale_news_images = options.output_profile.screen_size
+
        self.web2disk_options = web2disk_option_parser().parse_args(web2disk_cmdline)[0]
        for extra in ('keep_only_tags', 'remove_tags', 'preprocess_regexps',
                      'skip_ad_pages', 'preprocess_html', 'remove_tags_after',
-                      'remove_tags_before', 'is_link_wanted'):
+                      'remove_tags_before', 'is_link_wanted',
+                      'compress_news_images', 'compress_news_images_max_size',
+                      'compress_news_images_auto_size', 'scale_news_images'):
            setattr(self.web2disk_options, extra, getattr(self, extra))
+
        self.web2disk_options.postprocess_html = self._postprocess_html
        self.web2disk_options.encoding = self.encoding
        self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -12,7 +12,7 @@ from urllib import url2pathname, quote
 from httplib import responses
 from base64 import b64decode

-from calibre import browser, relpath, unicode_path
+from calibre import browser, relpath, unicode_path, fit_image
 from calibre.constants import filesystem_encoding, iswindows
 from calibre.utils.filenames import ascii_filename
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
@ -20,7 +20,7 @@ from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.utils.magick import Image
-from calibre.utils.magick.draw import identify_data
+from calibre.utils.magick.draw import identify_data, thumbnail

 class FetchError(Exception):
    pass
@ -142,6 +142,10 @@ class RecursiveFetcher(object):
        self.postprocess_html_ext= getattr(options, 'postprocess_html', None)
        self._is_link_wanted     = getattr(options, 'is_link_wanted',
                default_is_link_wanted)
+        self.compress_news_images_max_size = getattr(options, 'compress_news_images_max_size', None)
+        self.compress_news_images = getattr(options, 'compress_news_images', False)
+        self.compress_news_images_auto_size = getattr(options, 'compress_news_images_auto_size', 16)
+        self.scale_news_images = getattr(options, 'scale_news_images', None)
        self.download_stylesheets = not options.no_stylesheets
        self.show_progress = True
        self.failed_links = []
@ -338,7 +342,42 @@ class RecursiveFetcher(object):
                            x.write(data)
                        ns.replaceWith(src.replace(m.group(1), stylepath))

+    def rescale_image(self, data):
+        orig_w, orig_h, ifmt = identify_data(data)
+        orig_data = data # save it in case compression fails
+        if self.scale_news_images is not None:
+            wmax, hmax = self.scale_news_images
+            scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax)
+            if scale:
+                data = thumbnail(data, new_w, new_h, compression_quality=95)[-1]
+                orig_w = new_w
+                orig_h = new_h
+        if self.compress_news_images_max_size is None:
+            if self.compress_news_images_auto_size is None: # not compressing
+                return data
+            else:
+                maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size
+        else:
+            maxsizeb = self.compress_news_images_max_size * 1024
+        scaled_data = data # save it in case compression fails
+        if len(scaled_data) <= maxsizeb: # no compression required
+            return scaled_data

+        img = Image()
+        quality = 95
+        img.load(data)
+        while len(data) >= maxsizeb and quality >= 5:
+            quality -= 5
+            img.set_compression_quality(quality)
+            data = img.export('jpg')
+
+        if len(data) >= len(scaled_data): # compression failed
+            return orig_data if len(orig_data) <= len(scaled_data) else scaled_data
+
+        if len(data) >= len(orig_data): # no improvement
+            return orig_data
+
+        return data

    def process_images(self, soup, baseurl):
        diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
@ -390,6 +429,12 @@ class RecursiveFetcher(object):
                        im = Image()
                        im.load(data)
                        data = im.export(itype)
+                    if self.compress_news_images and itype in {'jpg','jpeg'}:
+                        try:
+                            data = self.rescale_image(data)
+                        except:
+                            self.log.exception('failed to compress image '+iurl)
+                            identify_data(data)
                    else:
                        identify_data(data)
                    imgpath = os.path.join(diskpath, fname+'.'+itype)