mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
News download: Add an option recipe authors can set to have calibre automatically reduce the size of downloaded images by lowering their quality
This commit is contained in:
parent
3fd23ceadd
commit
62211f4006
@ -338,6 +338,41 @@ class BasicNewsRecipe(Recipe):
|
||||
#: :meth:`javascript_login` method, to do the actual logging in.
|
||||
use_javascript_to_login = False
|
||||
|
||||
# The following parameters control how the recipe attempts to minimize
|
||||
# jpeg image sizes
|
||||
|
||||
#: Set this to False to ignore all scaling and compression parameters and
|
||||
#: pass images through unmodified. If True and the other compression
|
||||
#: parameters are left at their default values, jpeg images will be scaled to fit
|
||||
#: in the screen dimensions set by the output profile and compressed to size at
|
||||
#: most (w * h)/16 where w x h are the scaled image dimensions.
|
||||
compress_news_images = False
|
||||
|
||||
#: The factor used when auto compressing jpeg images. If set to None,
|
||||
#: auto compression is disabled. Otherwise, the images will be reduced in size to
|
||||
#: (w * h)/compress_news_images_auto_size bytes if possible by reducing
|
||||
#: the quality level, where w x h are the image dimensions in pixels.
|
||||
#: The minimum jpeg quality will be 5/100 so it is possible this constraint
|
||||
#: will not be met. This parameter can be overridden by the parameter
|
||||
#: compress_news_images_max_size which provides a fixed maximum size for images.
|
||||
compress_news_images_auto_size = 16
|
||||
|
||||
#: Set jpeg quality so images do not exceed the size given (in KBytes).
|
||||
#: If set, this parameter overrides auto compression via compress_news_images_auto_size.
|
||||
#: The minimum jpeg quality will be 5/100 so it is possible this constraint
|
||||
#: will not be met.
|
||||
compress_news_images_max_size = None
|
||||
|
||||
#: Rescale images to fit in the device screen dimensions set by the output profile.
|
||||
#: Ignored if no output profile is set.
|
||||
scale_news_images_to_device = True
|
||||
|
||||
#: Maximum dimensions (w,h) to scale images to. If scale_news_images_to_device is True
|
||||
#: this is set to the device screen dimensions set by the output profile unless
|
||||
#: there is no profile set, in which case it is left at whatever value it has been
|
||||
#: assigned (default None).
|
||||
scale_news_images = None
|
||||
|
||||
# See the built-in profiles for examples of these settings.
|
||||
|
||||
def short_title(self):
|
||||
@ -849,11 +884,19 @@ class BasicNewsRecipe(Recipe):
|
||||
for reg in self.filter_regexps:
|
||||
web2disk_cmdline.extend(['--filter-regexp', reg])
|
||||
|
||||
if options.output_profile.short_name == 'default':
|
||||
self.scale_news_images_to_device = False
|
||||
elif self.scale_news_images_to_device:
|
||||
self.scale_news_images = options.output_profile.screen_size
|
||||
|
||||
self.web2disk_options = web2disk_option_parser().parse_args(web2disk_cmdline)[0]
|
||||
for extra in ('keep_only_tags', 'remove_tags', 'preprocess_regexps',
|
||||
'skip_ad_pages', 'preprocess_html', 'remove_tags_after',
|
||||
'remove_tags_before', 'is_link_wanted'):
|
||||
'remove_tags_before', 'is_link_wanted',
|
||||
'compress_news_images', 'compress_news_images_max_size',
|
||||
'compress_news_images_auto_size', 'scale_news_images'):
|
||||
setattr(self.web2disk_options, extra, getattr(self, extra))
|
||||
|
||||
self.web2disk_options.postprocess_html = self._postprocess_html
|
||||
self.web2disk_options.encoding = self.encoding
|
||||
self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_
|
||||
|
@ -12,7 +12,7 @@ from urllib import url2pathname, quote
|
||||
from httplib import responses
|
||||
from base64 import b64decode
|
||||
|
||||
from calibre import browser, relpath, unicode_path
|
||||
from calibre import browser, relpath, unicode_path, fit_image
|
||||
from calibre.constants import filesystem_encoding, iswindows
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
@ -20,7 +20,7 @@ from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.utils.magick import Image
|
||||
from calibre.utils.magick.draw import identify_data
|
||||
from calibre.utils.magick.draw import identify_data, thumbnail
|
||||
|
||||
class FetchError(Exception):
|
||||
pass
|
||||
@ -142,6 +142,10 @@ class RecursiveFetcher(object):
|
||||
self.postprocess_html_ext= getattr(options, 'postprocess_html', None)
|
||||
self._is_link_wanted = getattr(options, 'is_link_wanted',
|
||||
default_is_link_wanted)
|
||||
self.compress_news_images_max_size = getattr(options, 'compress_news_images_max_size', None)
|
||||
self.compress_news_images = getattr(options, 'compress_news_images', False)
|
||||
self.compress_news_images_auto_size = getattr(options, 'compress_news_images_auto_size', 16)
|
||||
self.scale_news_images = getattr(options, 'scale_news_images', None)
|
||||
self.download_stylesheets = not options.no_stylesheets
|
||||
self.show_progress = True
|
||||
self.failed_links = []
|
||||
@ -338,7 +342,42 @@ class RecursiveFetcher(object):
|
||||
x.write(data)
|
||||
ns.replaceWith(src.replace(m.group(1), stylepath))
|
||||
|
||||
def rescale_image(self, data):
|
||||
orig_w, orig_h, ifmt = identify_data(data)
|
||||
orig_data = data # save it in case compression fails
|
||||
if self.scale_news_images is not None:
|
||||
wmax, hmax = self.scale_news_images
|
||||
scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax)
|
||||
if scale:
|
||||
data = thumbnail(data, new_w, new_h, compression_quality=95)[-1]
|
||||
orig_w = new_w
|
||||
orig_h = new_h
|
||||
if self.compress_news_images_max_size is None:
|
||||
if self.compress_news_images_auto_size is None: # not compressing
|
||||
return data
|
||||
else:
|
||||
maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size
|
||||
else:
|
||||
maxsizeb = self.compress_news_images_max_size * 1024
|
||||
scaled_data = data # save it in case compression fails
|
||||
if len(scaled_data) <= maxsizeb: # no compression required
|
||||
return scaled_data
|
||||
|
||||
img = Image()
|
||||
quality = 95
|
||||
img.load(data)
|
||||
while len(data) >= maxsizeb and quality >= 5:
|
||||
quality -= 5
|
||||
img.set_compression_quality(quality)
|
||||
data = img.export('jpg')
|
||||
|
||||
if len(data) >= len(scaled_data): # compression failed
|
||||
return orig_data if len(orig_data) <= len(scaled_data) else scaled_data
|
||||
|
||||
if len(data) >= len(orig_data): # no improvement
|
||||
return orig_data
|
||||
|
||||
return data
|
||||
|
||||
def process_images(self, soup, baseurl):
|
||||
diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
|
||||
@ -390,6 +429,12 @@ class RecursiveFetcher(object):
|
||||
im = Image()
|
||||
im.load(data)
|
||||
data = im.export(itype)
|
||||
if self.compress_news_images and itype in {'jpg','jpeg'}:
|
||||
try:
|
||||
data = self.rescale_image(data)
|
||||
except:
|
||||
self.log.exception('failed to compress image '+iurl)
|
||||
identify_data(data)
|
||||
else:
|
||||
identify_data(data)
|
||||
imgpath = os.path.join(diskpath, fname+'.'+itype)
|
||||
|
Loading…
x
Reference in New Issue
Block a user