mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
MOBI Input: Dont auto-convert images in PNG/GIF formats to JPEG
This commit is contained in:
parent
0e6e5afe8d
commit
3df15e222a
@ -10,7 +10,7 @@ import shutil, os, re, struct, textwrap, io
|
|||||||
|
|
||||||
from lxml import html, etree
|
from lxml import html, etree
|
||||||
|
|
||||||
from calibre import (xml_entity_to_unicode, entity_to_unicode)
|
from calibre import xml_entity_to_unicode, entity_to_unicode, guess_type
|
||||||
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
||||||
from calibre.ebooks import DRMError, unit_convert
|
from calibre.ebooks import DRMError, unit_convert
|
||||||
from calibre.ebooks.chardet import strip_encoding_declarations
|
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||||
@ -178,7 +178,7 @@ class MobiReader(object):
|
|||||||
self.processed_html = strip_encoding_declarations(self.processed_html)
|
self.processed_html = strip_encoding_declarations(self.processed_html)
|
||||||
self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode,
|
self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode,
|
||||||
self.processed_html)
|
self.processed_html)
|
||||||
self.extract_images(processed_records, output_dir)
|
image_name_map = self.extract_images(processed_records, output_dir)
|
||||||
self.replace_page_breaks()
|
self.replace_page_breaks()
|
||||||
self.cleanup_html()
|
self.cleanup_html()
|
||||||
|
|
||||||
@ -272,7 +272,7 @@ class MobiReader(object):
|
|||||||
head.insert(0, title)
|
head.insert(0, title)
|
||||||
head.text = '\n\t'
|
head.text = '\n\t'
|
||||||
|
|
||||||
self.upshift_markup(root)
|
self.upshift_markup(root, image_name_map)
|
||||||
guides = root.xpath('//guide')
|
guides = root.xpath('//guide')
|
||||||
guide = guides[0] if guides else None
|
guide = guides[0] if guides else None
|
||||||
metadata_elems = root.xpath('//metadata')
|
metadata_elems = root.xpath('//metadata')
|
||||||
@ -389,8 +389,9 @@ class MobiReader(object):
|
|||||||
raw += unit
|
raw += unit
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
def upshift_markup(self, root):
|
def upshift_markup(self, root, image_name_map=None):
|
||||||
self.log.debug('Converting style information to CSS...')
|
self.log.debug('Converting style information to CSS...')
|
||||||
|
image_name_map = image_name_map or {}
|
||||||
size_map = {
|
size_map = {
|
||||||
'xx-small': '0.5',
|
'xx-small': '0.5',
|
||||||
'x-small': '1',
|
'x-small': '1',
|
||||||
@ -510,10 +511,11 @@ class MobiReader(object):
|
|||||||
recindex = attrib.pop(attr, None) or recindex
|
recindex = attrib.pop(attr, None) or recindex
|
||||||
if recindex is not None:
|
if recindex is not None:
|
||||||
try:
|
try:
|
||||||
recindex = '%05d'%int(recindex)
|
recindex = int(recindex)
|
||||||
except:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
attrib['src'] = 'images/%s.jpg' % recindex
|
else:
|
||||||
|
attrib['src'] = 'images/' + image_name_map.get(recindex, '%05d.jpg' % recindex)
|
||||||
for attr in ('width', 'height'):
|
for attr in ('width', 'height'):
|
||||||
if attr in attrib:
|
if attr in attrib:
|
||||||
val = attrib[attr]
|
val = attrib[attr]
|
||||||
@ -674,7 +676,7 @@ class MobiReader(object):
|
|||||||
for i in getattr(self, 'image_names', []):
|
for i in getattr(self, 'image_names', []):
|
||||||
path = os.path.join(bp, 'images', i)
|
path = os.path.join(bp, 'images', i)
|
||||||
added.add(path)
|
added.add(path)
|
||||||
manifest.append((path, 'image/jpeg'))
|
manifest.append((path, guess_type(path)[0] or 'image/jpeg'))
|
||||||
if cover_copied is not None:
|
if cover_copied is not None:
|
||||||
manifest.append((cover_copied, 'image/jpeg'))
|
manifest.append((cover_copied, 'image/jpeg'))
|
||||||
|
|
||||||
@ -870,6 +872,7 @@ class MobiReader(object):
|
|||||||
os.makedirs(output_dir)
|
os.makedirs(output_dir)
|
||||||
image_index = 0
|
image_index = 0
|
||||||
self.image_names = []
|
self.image_names = []
|
||||||
|
image_name_map = {}
|
||||||
start = getattr(self.book_header, 'first_image_index', -1)
|
start = getattr(self.book_header, 'first_image_index', -1)
|
||||||
if start > self.num_sections or start < 0:
|
if start > self.num_sections or start < 0:
|
||||||
# BAEN PRC files have bad headers
|
# BAEN PRC files have bad headers
|
||||||
@ -882,18 +885,30 @@ class MobiReader(object):
|
|||||||
image_index += 1
|
image_index += 1
|
||||||
if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
|
if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
|
||||||
b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
|
b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
|
||||||
# This record is a known non image type, not need to try to
|
# This record is a known non image type, no need to try to
|
||||||
# load the image
|
# load the image
|
||||||
continue
|
continue
|
||||||
|
|
||||||
path = os.path.join(output_dir, '%05d.jpg' % image_index)
|
|
||||||
try:
|
try:
|
||||||
if what(None, data) not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
|
imgfmt = what(None, data)
|
||||||
continue
|
|
||||||
save_cover_data_to(data, path, minify_to=(10000, 10000))
|
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
if imgfmt not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
|
||||||
|
continue
|
||||||
|
if imgfmt == 'jpeg':
|
||||||
|
imgfmt = 'jpg'
|
||||||
|
path = os.path.join(output_dir, '%05d.%s' % (image_index, imgfmt))
|
||||||
|
image_name_map[image_index] = os.path.basename(path)
|
||||||
|
if imgfmt in ('gif', 'png'):
|
||||||
|
with open(path, 'wb') as f:
|
||||||
|
f.write(data)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
save_cover_data_to(data, path, minify_to=(10000, 10000))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
self.image_names.append(os.path.basename(path))
|
self.image_names.append(os.path.basename(path))
|
||||||
|
return image_name_map
|
||||||
|
|
||||||
|
|
||||||
def test_mbp_regex():
|
def test_mbp_regex():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user