From 62211f40069e8ab510c3a0cab6e5178d8851e75b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 17 Mar 2013 11:58:33 +0530 Subject: [PATCH] News download: Add an option recipe authors can set to have calibre automatically reduce the size of downloaded images by lowering their quality --- src/calibre/web/feeds/news.py | 45 +++++++++++++++++++++++++++++- src/calibre/web/fetch/simple.py | 49 +++++++++++++++++++++++++++++++-- 2 files changed, 91 insertions(+), 3 deletions(-) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 5bf09d8a3b..e9348f6ae7 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -338,6 +338,41 @@ class BasicNewsRecipe(Recipe): #: :meth:`javascript_login` method, to do the actual logging in. use_javascript_to_login = False + # The following parameters control how the recipe attempts to minimize + # jpeg image sizes + + #: Set this to False to ignore all scaling and compression parameters and + #: pass images through unmodified. If True and the other compression + #: parameters are left at their default values, jpeg images will be scaled to fit + #: in the screen dimensions set by the output profile and compressed to size at + #: most (w * h)/16 where w x h are the scaled image dimensions. + compress_news_images = False + + #: The factor used when auto compressing jpeg images. If set to None, + #: auto compression is disabled. Otherwise, the images will be reduced in size to + #: (w * h)/compress_news_images_auto_size bytes if possible by reducing + #: the quality level, where w x h are the image dimensions in pixels. + #: The minimum jpeg quality will be 5/100 so it is possible this constraint + #: will not be met. This parameter can be overridden by the parameter + #: compress_news_images_max_size which provides a fixed maximum size for images. + compress_news_images_auto_size = 16 + + #: Set jpeg quality so images do not exceed the size given (in KBytes). + #: If set, this parameter overrides auto compression via compress_news_images_auto_size. + #: The minimum jpeg quality will be 5/100 so it is possible this constraint + #: will not be met. + compress_news_images_max_size = None + + #: Rescale images to fit in the device screen dimensions set by the output profile. + #: Ignored if no output profile is set. + scale_news_images_to_device = True + + #: Maximum dimensions (w,h) to scale images to. If scale_news_images_to_device is True + #: this is set to the device screen dimensions set by the output profile unless + #: there is no profile set, in which case it is left at whatever value it has been + #: assigned (default None). + scale_news_images = None + # See the built-in profiles for examples of these settings. def short_title(self): @@ -849,11 +884,19 @@ class BasicNewsRecipe(Recipe): for reg in self.filter_regexps: web2disk_cmdline.extend(['--filter-regexp', reg]) + if options.output_profile.short_name == 'default': + self.scale_news_images_to_device = False + elif self.scale_news_images_to_device: + self.scale_news_images = options.output_profile.screen_size + self.web2disk_options = web2disk_option_parser().parse_args(web2disk_cmdline)[0] for extra in ('keep_only_tags', 'remove_tags', 'preprocess_regexps', 'skip_ad_pages', 'preprocess_html', 'remove_tags_after', - 'remove_tags_before', 'is_link_wanted'): + 'remove_tags_before', 'is_link_wanted', + 'compress_news_images', 'compress_news_images_max_size', + 'compress_news_images_auto_size', 'scale_news_images'): setattr(self.web2disk_options, extra, getattr(self, extra)) + self.web2disk_options.postprocess_html = self._postprocess_html self.web2disk_options.encoding = self.encoding self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_ diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index e7ad119dae..7cc8bd9309 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -12,7 +12,7 @@ from urllib import url2pathname, quote from httplib import responses from base64 import b64decode -from calibre import browser, relpath, unicode_path +from calibre import browser, relpath, unicode_path, fit_image from calibre.constants import filesystem_encoding, iswindows from calibre.utils.filenames import ascii_filename from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag @@ -20,7 +20,7 @@ from calibre.ebooks.chardet import xml_to_unicode from calibre.utils.config import OptionParser from calibre.utils.logging import Log from calibre.utils.magick import Image -from calibre.utils.magick.draw import identify_data +from calibre.utils.magick.draw import identify_data, thumbnail class FetchError(Exception): pass @@ -142,6 +142,10 @@ class RecursiveFetcher(object): self.postprocess_html_ext= getattr(options, 'postprocess_html', None) self._is_link_wanted = getattr(options, 'is_link_wanted', default_is_link_wanted) + self.compress_news_images_max_size = getattr(options, 'compress_news_images_max_size', None) + self.compress_news_images = getattr(options, 'compress_news_images', False) + self.compress_news_images_auto_size = getattr(options, 'compress_news_images_auto_size', 16) + self.scale_news_images = getattr(options, 'scale_news_images', None) self.download_stylesheets = not options.no_stylesheets self.show_progress = True self.failed_links = [] @@ -338,7 +342,42 @@ class RecursiveFetcher(object): x.write(data) ns.replaceWith(src.replace(m.group(1), stylepath)) + def rescale_image(self, data): + orig_w, orig_h, ifmt = identify_data(data) + orig_data = data # save it in case compression fails + if self.scale_news_images is not None: + wmax, hmax = self.scale_news_images + scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax) + if scale: + data = thumbnail(data, new_w, new_h, compression_quality=95)[-1] + orig_w = new_w + orig_h = new_h + if self.compress_news_images_max_size is None: + if self.compress_news_images_auto_size is None: # not compressing + return data + else: + maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size + else: + maxsizeb = self.compress_news_images_max_size * 1024 + scaled_data = data # save it in case compression fails + if len(scaled_data) <= maxsizeb: # no compression required + return scaled_data + img = Image() + quality = 95 + img.load(data) + while len(data) >= maxsizeb and quality >= 5: + quality -= 5 + img.set_compression_quality(quality) + data = img.export('jpg') + + if len(data) >= len(scaled_data): # compression failed + return orig_data if len(orig_data) <= len(scaled_data) else scaled_data + + if len(data) >= len(orig_data): # no improvement + return orig_data + + return data def process_images(self, soup, baseurl): diskpath = unicode_path(os.path.join(self.current_dir, 'images')) @@ -390,6 +429,12 @@ class RecursiveFetcher(object): im = Image() im.load(data) data = im.export(itype) + if self.compress_news_images and itype in {'jpg','jpeg'}: + try: + data = self.rescale_image(data) + except: + self.log.exception('failed to compress image '+iurl) + identify_data(data) else: identify_data(data) imgpath = os.path.join(diskpath, fname+'.'+itype)