From 51b245e1bb9aae12a6cc18677600a24a5f774b41 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 4 Jul 2016 17:39:27 +0530 Subject: [PATCH] Start work on polishing of epub 3 covers --- src/calibre/ebooks/metadata/opf3.py | 2 + src/calibre/ebooks/oeb/polish/container.py | 39 +++++++- src/calibre/ebooks/oeb/polish/cover.py | 90 ++++++++++++------- .../ebooks/oeb/polish/tests/structure.py | 73 ++++++++++++++- 4 files changed, 168 insertions(+), 36 deletions(-) diff --git a/src/calibre/ebooks/metadata/opf3.py b/src/calibre/ebooks/metadata/opf3.py index 28ae691d11..57384ecc56 100644 --- a/src/calibre/ebooks/metadata/opf3.py +++ b/src/calibre/ebooks/metadata/opf3.py @@ -148,6 +148,8 @@ def expand_prefix(raw, prefixes): return regex(r'(\S+)\s*:\s*(\S+)').sub(lambda m:(prefixes.get(m.group(1), m.group(1)) + ':' + m.group(2)), raw or '') def ensure_prefix(root, prefixes, prefix, value=None): + if prefixes is None: + prefixes = read_prefixes(root) prefixes[prefix] = value or reserved_prefixes[prefix] prefixes = {k:v for k, v in prefixes.iteritems() if reserved_prefixes.get(k) != v} if prefixes: diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index 7e4f329824..1fcb4d70e5 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -24,6 +24,7 @@ from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.conversion.plugins.epub_input import ( ADOBE_OBFUSCATION, IDPF_OBFUSCATION, decrypt_font_data) from calibre.ebooks.conversion.preprocess import HTMLPreProcessor, CSSPreProcessor as cssp +from calibre.ebooks.metadata.opf3 import read_prefixes, expand_prefix, ensure_prefix, CALIBRE_PREFIX from calibre.ebooks.metadata.utils import parse_opf_version from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi.reader.headers import MetadataHeader @@ -612,11 +613,15 @@ class Container(ContainerBase): # {{{ def manifest_items_with_property(self, property_name): ' All manifest items that have the specified property ' - q = property_name.lower() + prefixes = read_prefixes(self.opf) + q = expand_prefix(property_name, prefixes).lower() for item in self.opf_xpath('//opf:manifest/opf:item[@href and @properties]'): props = (item.get('properties') or '').lower().split() - if q in props: - yield self.href_to_name(item.get('href'), self.opf_name) + for p in props: + pq = expand_prefix(p, prefixes).lower() + if pq == q: + yield self.href_to_name(item.get('href'), self.opf_name) + break def manifest_items_of_type(self, predicate): ''' The names of all manifest items whose media-type matches predicate. @@ -631,6 +636,34 @@ class Container(ContainerBase): # {{{ for name in names: yield name + def apply_unique_properties(self, name, *properties): + ''' Ensure that the specified properties are set on only the manifest item + identified by name. You can pass None as the name to remove the + property from all items. ''' + properties = frozenset(properties) + for p in properties: + if p.startswith('calibre:'): + ensure_prefix(self.opf, None, 'calibre', CALIBRE_PREFIX) + break + + for item in self.opf_xpath('//opf:manifest/opf:item'): + iname = self.href_to_name(item.get('href'), self.opf_name) + props = (item.get('properties') or '').split() + lprops = {p.lower() for p in props} + for prop in properties: + if prop.lower() in lprops: + if name != iname: + props = [p for p in props if p.lower() != prop] + if props: + item.set('properties', ' '.join(props)) + else: + del item.attrib['properties'] + else: + if name == iname: + props.append(prop) + item.set('properties', ' '.join(props)) + self.dirty(self.opf_name) + @property def guide_type_map(self): ' Mapping of guide type to canonical name ' diff --git a/src/calibre/ebooks/oeb/polish/cover.py b/src/calibre/ebooks/oeb/polish/cover.py index 4dc215b2bc..afdf1a92fa 100644 --- a/src/calibre/ebooks/oeb/polish/cover.py +++ b/src/calibre/ebooks/oeb/polish/cover.py @@ -148,6 +148,14 @@ def find_cover_image2(container, strict=False): def find_cover_image3(container): for name in container.manifest_items_with_property('cover-image'): return name + manifest_id_map = container.manifest_id_map + mm = container.mime_map + for meta in container.opf_xpath('//opf:meta[@name="cover" and @content]'): + item_id = meta.get('content') + name = manifest_id_map.get(item_id, None) + media_type = mm.get(name, None) + if is_raster_image(media_type): + return name def find_cover_image(container, strict=False): 'Find a raster image marked as a cover in the OPF' @@ -165,11 +173,13 @@ def get_guides(container): guides = container.opf_xpath('//opf:guide') return guides + def mark_as_cover_epub(container, name): mmap = {v:k for k, v in container.manifest_id_map.iteritems()} if name not in mmap: raise ValueError('Cannot mark %s as cover as it is not in manifest' % name) mid = mmap[name] + ver = container.opf_version_parsed # Remove all entries from the opf that identify a raster image as cover for meta in container.opf_xpath('//opf:meta[@name="cover" and @content]'): @@ -177,22 +187,25 @@ def mark_as_cover_epub(container, name): for ref in container.opf_xpath('//opf:guide/opf:reference[@href and @type]'): if ref.get('type').lower() not in COVER_TYPES: continue - name = container.href_to_name(ref.get('href'), container.opf_name) - mt = container.mime_map.get(name, None) + rname = container.href_to_name(ref.get('href'), container.opf_name) + mt = container.mime_map.get(rname, None) if is_raster_image(mt): container.remove_from_xml(ref) - # Add reference to image in - for metadata in container.opf_xpath('//opf:metadata'): - m = metadata.makeelement(OPF('meta'), name='cover', content=mid) - container.insert_into_xml(metadata, m) + if ver.major < 3: + # Add reference to image in + for metadata in container.opf_xpath('//opf:metadata'): + m = metadata.makeelement(OPF('meta'), name='cover', content=mid) + container.insert_into_xml(metadata, m) - # If no entry for titlepage exists in guide, insert one that points to this - # image - if not container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'): - for guide in get_guides(container): - container.insert_into_xml(guide, guide.makeelement( - OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name))) + # If no entry for cover exists in guide, insert one that points to this + # image + if not container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'): + for guide in get_guides(container): + container.insert_into_xml(guide, guide.makeelement( + OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name))) + else: + container.apply_unique_properties(name, 'cover-image') container.dirty(container.opf_name) @@ -202,6 +215,7 @@ def mark_as_titlepage(container, name, move_to_start=True): :param move_to_start: If True the HTML file is moved to the start of the spine ''' + ver = container.opf_version_parsed if move_to_start: for item, q, linear in container.spine_iter: if name == q: @@ -210,20 +224,29 @@ def mark_as_titlepage(container, name, move_to_start=True): item.set('linear', 'yes') if item.getparent().index(item) > 0: container.insert_into_xml(item.getparent(), item, 0) - for ref in container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'): - ref.getparent().remove(ref) + if ver.major < 3: + for ref in container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'): + ref.getparent().remove(ref) + + for guide in get_guides(container): + container.insert_into_xml(guide, guide.makeelement( + OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name))) + else: + container.apply_unique_properties(name, 'calibre:title-page') - for guide in get_guides(container): - container.insert_into_xml(guide, guide.makeelement( - OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name))) container.dirty(container.opf_name) def find_cover_page(container): 'Find a document marked as a cover in the OPF' - mm = container.mime_map - guide_type_map = container.guide_type_map - for ref_type, name in guide_type_map.iteritems(): - if ref_type.lower() == 'cover' and mm.get(name, '').lower() in OEB_DOCS: + ver = container.opf_version_parsed + if ver.major < 3: + mm = container.mime_map + guide_type_map = container.guide_type_map + for ref_type, name in guide_type_map.iteritems(): + if ref_type.lower() == 'cover' and mm.get(name, '').lower() in OEB_DOCS: + return name + else: + for name in container.manifest_items_with_property('calibre:title-page'): return name def find_cover_image_in_page(container, cover_page): @@ -262,7 +285,7 @@ def clean_opf(container): name = gtm.get(typ, None) if name and name in container.name_path_map: yield name - + container.apply_unique_properties(None, 'cover-image', 'calibre:title-page') container.dirty(container.opf_name) def create_epub_cover(container, cover_path, existing_image, options=None): @@ -345,14 +368,19 @@ def create_epub_cover(container, cover_path, existing_image, options=None): spine = container.opf_xpath('//opf:spine')[0] ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id')) container.insert_into_xml(spine, ref, index=0) - guide = container.opf_get_or_create('guide') - container.insert_into_xml(guide, guide.makeelement( - OPF('reference'), type='cover', title=_('Cover'), - href=container.name_to_href(titlepage, base=container.opf_name))) - metadata = container.opf_get_or_create('metadata') - meta = metadata.makeelement(OPF('meta'), name='cover') - meta.set('content', raster_cover_item.get('id')) - container.insert_into_xml(metadata, meta) + ver = container.opf_version_parsed + if ver.major < 3: + guide = container.opf_get_or_create('guide') + container.insert_into_xml(guide, guide.makeelement( + OPF('reference'), type='cover', title=_('Cover'), + href=container.name_to_href(titlepage, base=container.opf_name))) + metadata = container.opf_get_or_create('metadata') + meta = metadata.makeelement(OPF('meta'), name='cover') + meta.set('content', raster_cover_item.get('id')) + container.insert_into_xml(metadata, meta) + else: + container.apply_unique_properties(raster_cover, 'cover-image') + container.apply_unique_properties(titlepage, 'calibre:title-page') return raster_cover, titlepage @@ -435,5 +463,3 @@ def set_epub_cover(container, cover_path, report, options=None): if link_sub: replace_links(container, link_sub, frag_map=lambda x, y:None) return raster_cover, titlepage - - diff --git a/src/calibre/ebooks/oeb/polish/tests/structure.py b/src/calibre/ebooks/oeb/polish/tests/structure.py index ecbb2d6d45..df903c166c 100644 --- a/src/calibre/ebooks/oeb/polish/tests/structure.py +++ b/src/calibre/ebooks/oeb/polish/tests/structure.py @@ -4,16 +4,75 @@ from __future__ import (unicode_literals, division, absolute_import, print_function) - +from io import BytesIO +from itertools import count +from zipfile import ZipFile, ZIP_STORED import os from calibre.ebooks.oeb.polish.tests.base import BaseTest from calibre.ebooks.oeb.polish.container import get_container from calibre.ebooks.oeb.polish.create import create_book +from calibre.ebooks.oeb.polish.cover import find_cover_image, mark_as_cover from calibre.ebooks.oeb.polish.toc import get_toc +from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.metadata.book.base import Metadata +from calibre.ebooks.metadata.opf3 import CALIBRE_PREFIX + +OPF_TEMPLATE = ''' + + + test + {metadata} + + {manifest} + {spine} + {guide} +''' % CALIBRE_PREFIX # noqa + +def create_manifest_item(name, data=b'', properties=None): + return (name, data, properties) +cmi = create_manifest_item + +def create_epub(manifest, spine=(), guide=(), meta_cover=None, ver=3): + mo = [] + for name, data, properties in manifest: + mo.append('' % ( + name, name, guess_type(name), ('properties="%s"' % properties if properties else ''))) + mo = ''.join(mo) + metadata = '' + if meta_cover: + metadata = '' % meta_cover + spine = ''.join('' % name for name in spine) + guide = ''.join('' % (name, typ) for name, typ in guide) + opf = OPF_TEMPLATE.format(manifest=mo, ver='%d.0'%ver, metadata=metadata, spine=spine, guide=guide) + buf = BytesIO() + with ZipFile(buf, 'w', ZIP_STORED) as zf: + zf.writestr('META-INF/container.xml', b''' + + + + +''') + zf.writestr('content.opf', opf.encode('utf-8')) + for name, data, properties in manifest: + if isinstance(data, type('')): + data = data.encode('utf-8') + zf.writestr(name, data) + buf.seek(0) + return buf + +counter = count() + class Structure(BaseTest): + def create_epub(self, *args, **kw): + n = next(counter) + ep = os.path.join(self.tdir, str(n) + 'book.epub') + with open(ep, 'wb') as f: + f.write(create_epub(*args, **kw).getvalue()) + c = get_container(ep, tdir=os.path.join(self.tdir, 'container%d' % n), tweak_mode=True) + return c + def test_toc_detection(self): ep = os.path.join(self.tdir, 'book.epub') create_book(Metadata('Test ToC'), ep) @@ -29,3 +88,15 @@ class Structure(BaseTest): toc = get_toc(c) self.assertTrue(len(toc)) self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav') + + def test_epub3_covers(self): + c = self.create_epub([cmi('c.jpg')]) + self.assertIsNone(find_cover_image(c)) + c = self.create_epub([cmi('c.jpg')], meta_cover='c.jpg') + self.assertEqual('c.jpg', find_cover_image(c)) + c = self.create_epub([cmi('c.jpg', b'z', 'cover-image'), cmi('d.jpg')], meta_cover='d.jpg') + self.assertEqual('c.jpg', find_cover_image(c)) + mark_as_cover(c, 'd.jpg') + self.assertEqual('d.jpg', find_cover_image(c)) + self.assertFalse(c.opf_xpath('//*/@name')) +