mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
AZW3: When converting from AZW3 files, use the high quality version of the image if the source AZW3 file contains both low and high quality images
This commit is contained in:
parent
734384b384
commit
2befb1e2e9
55
src/calibre/ebooks/mobi/reader/containers.py
Normal file
55
src/calibre/ebooks/mobi/reader/containers.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
from struct import unpack_from, error
|
||||||
|
|
||||||
|
from calibre.utils.magick.draw import identify_data
|
||||||
|
from calibre.utils.imghdr import what
|
||||||
|
|
||||||
|
def find_imgtype(data):
|
||||||
|
imgtype = what(None, data)
|
||||||
|
if imgtype is None:
|
||||||
|
try:
|
||||||
|
imgtype = identify_data(data)[2]
|
||||||
|
except Exception:
|
||||||
|
imgtype = 'unknown'
|
||||||
|
return imgtype
|
||||||
|
|
||||||
|
class Container(object):
|
||||||
|
|
||||||
|
def __init__(self, data):
|
||||||
|
self.is_image_container = False
|
||||||
|
self.resource_index = 0
|
||||||
|
|
||||||
|
if len(data) > 60 and data[48:52] == b'EXTH':
|
||||||
|
length, num_items = unpack_from(b'>LL', data, 52)
|
||||||
|
pos = 60
|
||||||
|
while pos < 60 + length - 8:
|
||||||
|
try:
|
||||||
|
idx, size = unpack_from(b'>LL', data, pos)
|
||||||
|
except error:
|
||||||
|
break
|
||||||
|
pos += 8
|
||||||
|
size -= 8
|
||||||
|
if size < 0:
|
||||||
|
break
|
||||||
|
if idx == 539:
|
||||||
|
self.is_image_container = data[pos:pos+size] == b'application/image'
|
||||||
|
break
|
||||||
|
pos += size
|
||||||
|
|
||||||
|
def load_image(self, data):
|
||||||
|
self.resource_index += 1
|
||||||
|
if self.is_image_container:
|
||||||
|
data = data[12:]
|
||||||
|
imgtype = find_imgtype(data)
|
||||||
|
if imgtype != 'unknown':
|
||||||
|
return data, imgtype
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
|
@ -119,9 +119,10 @@ class MobiReader(object):
|
|||||||
try:
|
try:
|
||||||
self.book_header = BookHeader(self.sections[k8i][0],
|
self.book_header = BookHeader(self.sections[k8i][0],
|
||||||
self.ident, user_encoding, self.log)
|
self.ident, user_encoding, self.log)
|
||||||
|
self.book_header.kf8_first_image_index = self.book_header.first_image_index + k8i
|
||||||
|
self.book_header.mobi6_records = bh.records
|
||||||
|
|
||||||
# Only the first_image_index from the MOBI 6 header is
|
# Need the first_image_index from the mobi 6 header as well
|
||||||
# useful
|
|
||||||
for x in ('first_image_index',):
|
for x in ('first_image_index',):
|
||||||
setattr(self.book_header, x, getattr(bh, x))
|
setattr(self.book_header, x, getattr(bh, x))
|
||||||
|
|
||||||
|
@ -18,12 +18,12 @@ from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
|||||||
from calibre.ebooks.mobi.reader.index import read_index
|
from calibre.ebooks.mobi.reader.index import read_index
|
||||||
from calibre.ebooks.mobi.reader.ncx import read_ncx, build_toc
|
from calibre.ebooks.mobi.reader.ncx import read_ncx, build_toc
|
||||||
from calibre.ebooks.mobi.reader.markup import expand_mobi8_markup
|
from calibre.ebooks.mobi.reader.markup import expand_mobi8_markup
|
||||||
|
from calibre.ebooks.mobi.reader.containers import Container, find_imgtype
|
||||||
from calibre.ebooks.metadata.opf2 import Guide, OPFCreator
|
from calibre.ebooks.metadata.opf2 import Guide, OPFCreator
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.mobi.utils import read_font_record
|
from calibre.ebooks.mobi.utils import read_font_record
|
||||||
from calibre.ebooks.oeb.parse_utils import parse_html
|
from calibre.ebooks.oeb.parse_utils import parse_html
|
||||||
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
|
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
|
||||||
from calibre.utils.imghdr import what
|
|
||||||
|
|
||||||
Part = namedtuple('Part',
|
Part = namedtuple('Part',
|
||||||
'num type filename start end aid')
|
'num type filename start end aid')
|
||||||
@ -59,6 +59,12 @@ def reverse_tag_iter(block):
|
|||||||
yield block[plt:pgt+1]
|
yield block[plt:pgt+1]
|
||||||
end = plt
|
end = plt
|
||||||
|
|
||||||
|
def get_first_resource_index(first_image_index, num_of_text_records, first_text_record_number):
|
||||||
|
first_resource_index = first_image_index
|
||||||
|
if first_resource_index in {-1, NULL_INDEX}:
|
||||||
|
first_resource_index = num_of_text_records + first_text_record_number
|
||||||
|
return first_resource_index
|
||||||
|
|
||||||
class Mobi8Reader(object):
|
class Mobi8Reader(object):
|
||||||
|
|
||||||
def __init__(self, mobi6_reader, log, for_tweak=False):
|
def __init__(self, mobi6_reader, log, for_tweak=False):
|
||||||
@ -69,11 +75,16 @@ class Mobi8Reader(object):
|
|||||||
|
|
||||||
def __call__(self):
|
def __call__(self):
|
||||||
self.mobi6_reader.check_for_drm()
|
self.mobi6_reader.check_for_drm()
|
||||||
offset = 1
|
bh = self.mobi6_reader.book_header
|
||||||
res_end = len(self.mobi6_reader.sections)
|
|
||||||
if self.mobi6_reader.kf8_type == 'joint':
|
if self.mobi6_reader.kf8_type == 'joint':
|
||||||
offset = self.mobi6_reader.kf8_boundary + 2
|
offset = self.mobi6_reader.kf8_boundary + 2
|
||||||
res_end = self.mobi6_reader.kf8_boundary
|
self.resource_offsets = [
|
||||||
|
(get_first_resource_index(bh.first_image_index, bh.mobi6_records, 1), offset - 2),
|
||||||
|
(get_first_resource_index(bh.kf8_first_image_index, bh.records, offset), len(self.mobi6_reader.sections)),
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
offset = 1
|
||||||
|
self.resource_offsets = [(get_first_resource_index(bh.first_image_index, bh.records, offset), len(self.mobi6_reader.sections))]
|
||||||
|
|
||||||
self.processed_records = self.mobi6_reader.extract_text(offset=offset)
|
self.processed_records = self.mobi6_reader.extract_text(offset=offset)
|
||||||
self.raw_ml = self.mobi6_reader.mobi_html
|
self.raw_ml = self.mobi6_reader.mobi_html
|
||||||
@ -81,18 +92,14 @@ class Mobi8Reader(object):
|
|||||||
f.write(self.raw_ml)
|
f.write(self.raw_ml)
|
||||||
|
|
||||||
self.kf8_sections = self.mobi6_reader.sections[offset-1:]
|
self.kf8_sections = self.mobi6_reader.sections[offset-1:]
|
||||||
first_resource_index = self.header.first_image_index
|
|
||||||
if first_resource_index in {-1, NULL_INDEX}:
|
|
||||||
first_resource_index = self.header.records + 1
|
|
||||||
self.resource_sections = \
|
|
||||||
self.mobi6_reader.sections[first_resource_index:res_end]
|
|
||||||
self.cover_offset = getattr(self.header.exth, 'cover_offset', None)
|
self.cover_offset = getattr(self.header.exth, 'cover_offset', None)
|
||||||
|
|
||||||
self.read_indices()
|
self.read_indices()
|
||||||
self.build_parts()
|
self.build_parts()
|
||||||
guide = self.create_guide()
|
guide = self.create_guide()
|
||||||
ncx = self.create_ncx()
|
ncx = self.create_ncx()
|
||||||
resource_map = self.extract_resources()
|
resource_map = self.extract_resources(self.mobi6_reader.sections)
|
||||||
spine = self.expand_text(resource_map)
|
spine = self.expand_text(resource_map)
|
||||||
return self.write_opf(guide, ncx, spine, resource_map)
|
return self.write_opf(guide, ncx, spine, resource_map)
|
||||||
|
|
||||||
@ -385,19 +392,21 @@ class Mobi8Reader(object):
|
|||||||
# Build the TOC object
|
# Build the TOC object
|
||||||
return build_toc(index_entries)
|
return build_toc(index_entries)
|
||||||
|
|
||||||
def extract_resources(self):
|
def extract_resources(self, sections):
|
||||||
from calibre.ebooks.mobi.writer2.resources import PLACEHOLDER_GIF
|
from calibre.ebooks.mobi.writer2.resources import PLACEHOLDER_GIF
|
||||||
resource_map = []
|
resource_map = []
|
||||||
|
container = None
|
||||||
for x in ('fonts', 'images'):
|
for x in ('fonts', 'images'):
|
||||||
os.mkdir(x)
|
os.mkdir(x)
|
||||||
|
|
||||||
for i, sec in enumerate(self.resource_sections):
|
for start, end in self.resource_offsets:
|
||||||
|
for i, sec in enumerate(sections[start:end]):
|
||||||
fname_idx = i+1
|
fname_idx = i+1
|
||||||
data = sec[0]
|
data = sec[0]
|
||||||
typ = data[:4]
|
typ = data[:4]
|
||||||
href = None
|
href = None
|
||||||
if typ in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'BOUN',
|
if typ in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'BOUN',
|
||||||
b'FDST', b'DATP', b'AUDI', b'VIDE', b'RESC', b'CMET'}:
|
b'FDST', b'DATP', b'AUDI', b'VIDE', b'RESC', b'CMET', b'PAGE'}:
|
||||||
pass # Ignore these records
|
pass # Ignore these records
|
||||||
elif typ == b'FONT':
|
elif typ == b'FONT':
|
||||||
font = read_font_record(data)
|
font = read_font_record(data)
|
||||||
@ -412,15 +421,22 @@ class Mobi8Reader(object):
|
|||||||
font['raw_data'])
|
font['raw_data'])
|
||||||
if font['encrypted']:
|
if font['encrypted']:
|
||||||
self.encrypted_fonts.append(href)
|
self.encrypted_fonts.append(href)
|
||||||
else:
|
elif typ == b'CONT':
|
||||||
|
if data == b'CONTBOUNDARY':
|
||||||
|
container = None
|
||||||
|
continue
|
||||||
|
container = Container(data)
|
||||||
|
elif typ == b'CRES':
|
||||||
|
data, imgtype = container.load_image(data)
|
||||||
|
if data is not None:
|
||||||
|
href = 'images/%05d.%s'%(container.resource_index, imgtype)
|
||||||
|
with open(href.replace('/', os.sep), 'wb') as f:
|
||||||
|
f.write(data)
|
||||||
|
elif typ == b'\xa0\xa0\xa0\xa0' and len(data) == 4:
|
||||||
|
container.resource_index += 1
|
||||||
|
elif container is None:
|
||||||
if not (len(data) == len(PLACEHOLDER_GIF) and data == PLACEHOLDER_GIF):
|
if not (len(data) == len(PLACEHOLDER_GIF) and data == PLACEHOLDER_GIF):
|
||||||
imgtype = what(None, data)
|
imgtype = find_imgtype(data)
|
||||||
if imgtype is None:
|
|
||||||
from calibre.utils.magick.draw import identify_data
|
|
||||||
try:
|
|
||||||
imgtype = identify_data(data)[2]
|
|
||||||
except Exception:
|
|
||||||
imgtype = 'unknown'
|
|
||||||
href = 'images/%05d.%s'%(fname_idx, imgtype)
|
href = 'images/%05d.%s'%(fname_idx, imgtype)
|
||||||
with open(href.replace('/', os.sep), 'wb') as f:
|
with open(href.replace('/', os.sep), 'wb') as f:
|
||||||
f.write(data)
|
f.write(data)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user