News download: Add an option recipe authors can set to have calibre automatically reduce the size of downloaded images by lowering their quality

This commit is contained in:
Kovid Goyal 2013-03-17 11:58:33 +05:30
parent 3fd23ceadd
commit 62211f4006
2 changed files with 91 additions and 3 deletions

View File

@ -338,6 +338,41 @@ class BasicNewsRecipe(Recipe):
#: :meth:`javascript_login` method, to do the actual logging in.
use_javascript_to_login = False
# The following parameters control how the recipe attempts to minimize
# jpeg image sizes
#: Set this to False to ignore all scaling and compression parameters and
#: pass images through unmodified. If True and the other compression
#: parameters are left at their default values, jpeg images will be scaled to fit
#: in the screen dimensions set by the output profile and compressed to size at
#: most (w * h)/16 where w x h are the scaled image dimensions.
compress_news_images = False
#: The factor used when auto compressing jpeg images. If set to None,
#: auto compression is disabled. Otherwise, the images will be reduced in size to
#: (w * h)/compress_news_images_auto_size bytes if possible by reducing
#: the quality level, where w x h are the image dimensions in pixels.
#: The minimum jpeg quality will be 5/100 so it is possible this constraint
#: will not be met. This parameter can be overridden by the parameter
#: compress_news_images_max_size which provides a fixed maximum size for images.
compress_news_images_auto_size = 16
#: Set jpeg quality so images do not exceed the size given (in KBytes).
#: If set, this parameter overrides auto compression via compress_news_images_auto_size.
#: The minimum jpeg quality will be 5/100 so it is possible this constraint
#: will not be met.
compress_news_images_max_size = None
#: Rescale images to fit in the device screen dimensions set by the output profile.
#: Ignored if no output profile is set.
scale_news_images_to_device = True
#: Maximum dimensions (w,h) to scale images to. If scale_news_images_to_device is True
#: this is set to the device screen dimensions set by the output profile unless
#: there is no profile set, in which case it is left at whatever value it has been
#: assigned (default None).
scale_news_images = None
# See the built-in profiles for examples of these settings.
def short_title(self):
@ -849,11 +884,19 @@ class BasicNewsRecipe(Recipe):
for reg in self.filter_regexps:
web2disk_cmdline.extend(['--filter-regexp', reg])
if options.output_profile.short_name == 'default':
self.scale_news_images_to_device = False
elif self.scale_news_images_to_device:
self.scale_news_images = options.output_profile.screen_size
self.web2disk_options = web2disk_option_parser().parse_args(web2disk_cmdline)[0]
for extra in ('keep_only_tags', 'remove_tags', 'preprocess_regexps',
'skip_ad_pages', 'preprocess_html', 'remove_tags_after',
'remove_tags_before', 'is_link_wanted'):
'remove_tags_before', 'is_link_wanted',
'compress_news_images', 'compress_news_images_max_size',
'compress_news_images_auto_size', 'scale_news_images'):
setattr(self.web2disk_options, extra, getattr(self, extra))
self.web2disk_options.postprocess_html = self._postprocess_html
self.web2disk_options.encoding = self.encoding
self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_

View File

@ -12,7 +12,7 @@ from urllib import url2pathname, quote
from httplib import responses
from base64 import b64decode
from calibre import browser, relpath, unicode_path
from calibre import browser, relpath, unicode_path, fit_image
from calibre.constants import filesystem_encoding, iswindows
from calibre.utils.filenames import ascii_filename
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
@ -20,7 +20,7 @@ from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.utils.magick import Image
from calibre.utils.magick.draw import identify_data
from calibre.utils.magick.draw import identify_data, thumbnail
class FetchError(Exception):
pass
@ -142,6 +142,10 @@ class RecursiveFetcher(object):
self.postprocess_html_ext= getattr(options, 'postprocess_html', None)
self._is_link_wanted = getattr(options, 'is_link_wanted',
default_is_link_wanted)
self.compress_news_images_max_size = getattr(options, 'compress_news_images_max_size', None)
self.compress_news_images = getattr(options, 'compress_news_images', False)
self.compress_news_images_auto_size = getattr(options, 'compress_news_images_auto_size', 16)
self.scale_news_images = getattr(options, 'scale_news_images', None)
self.download_stylesheets = not options.no_stylesheets
self.show_progress = True
self.failed_links = []
@ -338,7 +342,42 @@ class RecursiveFetcher(object):
x.write(data)
ns.replaceWith(src.replace(m.group(1), stylepath))
def rescale_image(self, data):
orig_w, orig_h, ifmt = identify_data(data)
orig_data = data # save it in case compression fails
if self.scale_news_images is not None:
wmax, hmax = self.scale_news_images
scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax)
if scale:
data = thumbnail(data, new_w, new_h, compression_quality=95)[-1]
orig_w = new_w
orig_h = new_h
if self.compress_news_images_max_size is None:
if self.compress_news_images_auto_size is None: # not compressing
return data
else:
maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size
else:
maxsizeb = self.compress_news_images_max_size * 1024
scaled_data = data # save it in case compression fails
if len(scaled_data) <= maxsizeb: # no compression required
return scaled_data
img = Image()
quality = 95
img.load(data)
while len(data) >= maxsizeb and quality >= 5:
quality -= 5
img.set_compression_quality(quality)
data = img.export('jpg')
if len(data) >= len(scaled_data): # compression failed
return orig_data if len(orig_data) <= len(scaled_data) else scaled_data
if len(data) >= len(orig_data): # no improvement
return orig_data
return data
def process_images(self, soup, baseurl):
diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
@ -390,6 +429,12 @@ class RecursiveFetcher(object):
im = Image()
im.load(data)
data = im.export(itype)
if self.compress_news_images and itype in {'jpg','jpeg'}:
try:
data = self.rescale_image(data)
except:
self.log.exception('failed to compress image '+iurl)
identify_data(data)
else:
identify_data(data)
imgpath = os.path.join(diskpath, fname+'.'+itype)