Start work on polishing of epub 3 covers

This commit is contained in:
Kovid Goyal 2016-07-04 17:39:27 +05:30
parent 671128d144
commit 51b245e1bb
4 changed files with 168 additions and 36 deletions

View File

@ -148,6 +148,8 @@ def expand_prefix(raw, prefixes):
return regex(r'(\S+)\s*:\s*(\S+)').sub(lambda m:(prefixes.get(m.group(1), m.group(1)) + ':' + m.group(2)), raw or '')
def ensure_prefix(root, prefixes, prefix, value=None):
if prefixes is None:
prefixes = read_prefixes(root)
prefixes[prefix] = value or reserved_prefixes[prefix]
prefixes = {k:v for k, v in prefixes.iteritems() if reserved_prefixes.get(k) != v}
if prefixes:

View File

@ -24,6 +24,7 @@ from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.conversion.plugins.epub_input import (
ADOBE_OBFUSCATION, IDPF_OBFUSCATION, decrypt_font_data)
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor, CSSPreProcessor as cssp
from calibre.ebooks.metadata.opf3 import read_prefixes, expand_prefix, ensure_prefix, CALIBRE_PREFIX
from calibre.ebooks.metadata.utils import parse_opf_version
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.reader.headers import MetadataHeader
@ -612,11 +613,15 @@ class Container(ContainerBase): # {{{
def manifest_items_with_property(self, property_name):
' All manifest items that have the specified property '
q = property_name.lower()
prefixes = read_prefixes(self.opf)
q = expand_prefix(property_name, prefixes).lower()
for item in self.opf_xpath('//opf:manifest/opf:item[@href and @properties]'):
props = (item.get('properties') or '').lower().split()
if q in props:
yield self.href_to_name(item.get('href'), self.opf_name)
for p in props:
pq = expand_prefix(p, prefixes).lower()
if pq == q:
yield self.href_to_name(item.get('href'), self.opf_name)
break
def manifest_items_of_type(self, predicate):
''' The names of all manifest items whose media-type matches predicate.
@ -631,6 +636,34 @@ class Container(ContainerBase): # {{{
for name in names:
yield name
def apply_unique_properties(self, name, *properties):
''' Ensure that the specified properties are set on only the manifest item
identified by name. You can pass None as the name to remove the
property from all items. '''
properties = frozenset(properties)
for p in properties:
if p.startswith('calibre:'):
ensure_prefix(self.opf, None, 'calibre', CALIBRE_PREFIX)
break
for item in self.opf_xpath('//opf:manifest/opf:item'):
iname = self.href_to_name(item.get('href'), self.opf_name)
props = (item.get('properties') or '').split()
lprops = {p.lower() for p in props}
for prop in properties:
if prop.lower() in lprops:
if name != iname:
props = [p for p in props if p.lower() != prop]
if props:
item.set('properties', ' '.join(props))
else:
del item.attrib['properties']
else:
if name == iname:
props.append(prop)
item.set('properties', ' '.join(props))
self.dirty(self.opf_name)
@property
def guide_type_map(self):
' Mapping of guide type to canonical name '

View File

@ -148,6 +148,14 @@ def find_cover_image2(container, strict=False):
def find_cover_image3(container):
for name in container.manifest_items_with_property('cover-image'):
return name
manifest_id_map = container.manifest_id_map
mm = container.mime_map
for meta in container.opf_xpath('//opf:meta[@name="cover" and @content]'):
item_id = meta.get('content')
name = manifest_id_map.get(item_id, None)
media_type = mm.get(name, None)
if is_raster_image(media_type):
return name
def find_cover_image(container, strict=False):
'Find a raster image marked as a cover in the OPF'
@ -165,11 +173,13 @@ def get_guides(container):
guides = container.opf_xpath('//opf:guide')
return guides
def mark_as_cover_epub(container, name):
mmap = {v:k for k, v in container.manifest_id_map.iteritems()}
if name not in mmap:
raise ValueError('Cannot mark %s as cover as it is not in manifest' % name)
mid = mmap[name]
ver = container.opf_version_parsed
# Remove all entries from the opf that identify a raster image as cover
for meta in container.opf_xpath('//opf:meta[@name="cover" and @content]'):
@ -177,22 +187,25 @@ def mark_as_cover_epub(container, name):
for ref in container.opf_xpath('//opf:guide/opf:reference[@href and @type]'):
if ref.get('type').lower() not in COVER_TYPES:
continue
name = container.href_to_name(ref.get('href'), container.opf_name)
mt = container.mime_map.get(name, None)
rname = container.href_to_name(ref.get('href'), container.opf_name)
mt = container.mime_map.get(rname, None)
if is_raster_image(mt):
container.remove_from_xml(ref)
# Add reference to image in <metadata>
for metadata in container.opf_xpath('//opf:metadata'):
m = metadata.makeelement(OPF('meta'), name='cover', content=mid)
container.insert_into_xml(metadata, m)
if ver.major < 3:
# Add reference to image in <metadata>
for metadata in container.opf_xpath('//opf:metadata'):
m = metadata.makeelement(OPF('meta'), name='cover', content=mid)
container.insert_into_xml(metadata, m)
# If no entry for titlepage exists in guide, insert one that points to this
# image
if not container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'):
for guide in get_guides(container):
container.insert_into_xml(guide, guide.makeelement(
OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name)))
# If no entry for cover exists in guide, insert one that points to this
# image
if not container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'):
for guide in get_guides(container):
container.insert_into_xml(guide, guide.makeelement(
OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name)))
else:
container.apply_unique_properties(name, 'cover-image')
container.dirty(container.opf_name)
@ -202,6 +215,7 @@ def mark_as_titlepage(container, name, move_to_start=True):
:param move_to_start: If True the HTML file is moved to the start of the spine
'''
ver = container.opf_version_parsed
if move_to_start:
for item, q, linear in container.spine_iter:
if name == q:
@ -210,20 +224,29 @@ def mark_as_titlepage(container, name, move_to_start=True):
item.set('linear', 'yes')
if item.getparent().index(item) > 0:
container.insert_into_xml(item.getparent(), item, 0)
for ref in container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'):
ref.getparent().remove(ref)
if ver.major < 3:
for ref in container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'):
ref.getparent().remove(ref)
for guide in get_guides(container):
container.insert_into_xml(guide, guide.makeelement(
OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name)))
else:
container.apply_unique_properties(name, 'calibre:title-page')
for guide in get_guides(container):
container.insert_into_xml(guide, guide.makeelement(
OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name)))
container.dirty(container.opf_name)
def find_cover_page(container):
'Find a document marked as a cover in the OPF'
mm = container.mime_map
guide_type_map = container.guide_type_map
for ref_type, name in guide_type_map.iteritems():
if ref_type.lower() == 'cover' and mm.get(name, '').lower() in OEB_DOCS:
ver = container.opf_version_parsed
if ver.major < 3:
mm = container.mime_map
guide_type_map = container.guide_type_map
for ref_type, name in guide_type_map.iteritems():
if ref_type.lower() == 'cover' and mm.get(name, '').lower() in OEB_DOCS:
return name
else:
for name in container.manifest_items_with_property('calibre:title-page'):
return name
def find_cover_image_in_page(container, cover_page):
@ -262,7 +285,7 @@ def clean_opf(container):
name = gtm.get(typ, None)
if name and name in container.name_path_map:
yield name
container.apply_unique_properties(None, 'cover-image', 'calibre:title-page')
container.dirty(container.opf_name)
def create_epub_cover(container, cover_path, existing_image, options=None):
@ -345,14 +368,19 @@ def create_epub_cover(container, cover_path, existing_image, options=None):
spine = container.opf_xpath('//opf:spine')[0]
ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id'))
container.insert_into_xml(spine, ref, index=0)
guide = container.opf_get_or_create('guide')
container.insert_into_xml(guide, guide.makeelement(
OPF('reference'), type='cover', title=_('Cover'),
href=container.name_to_href(titlepage, base=container.opf_name)))
metadata = container.opf_get_or_create('metadata')
meta = metadata.makeelement(OPF('meta'), name='cover')
meta.set('content', raster_cover_item.get('id'))
container.insert_into_xml(metadata, meta)
ver = container.opf_version_parsed
if ver.major < 3:
guide = container.opf_get_or_create('guide')
container.insert_into_xml(guide, guide.makeelement(
OPF('reference'), type='cover', title=_('Cover'),
href=container.name_to_href(titlepage, base=container.opf_name)))
metadata = container.opf_get_or_create('metadata')
meta = metadata.makeelement(OPF('meta'), name='cover')
meta.set('content', raster_cover_item.get('id'))
container.insert_into_xml(metadata, meta)
else:
container.apply_unique_properties(raster_cover, 'cover-image')
container.apply_unique_properties(titlepage, 'calibre:title-page')
return raster_cover, titlepage
@ -435,5 +463,3 @@ def set_epub_cover(container, cover_path, report, options=None):
if link_sub:
replace_links(container, link_sub, frag_map=lambda x, y:None)
return raster_cover, titlepage

View File

@ -4,16 +4,75 @@
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from io import BytesIO
from itertools import count
from zipfile import ZipFile, ZIP_STORED
import os
from calibre.ebooks.oeb.polish.tests.base import BaseTest
from calibre.ebooks.oeb.polish.container import get_container
from calibre.ebooks.oeb.polish.create import create_book
from calibre.ebooks.oeb.polish.cover import find_cover_image, mark_as_cover
from calibre.ebooks.oeb.polish.toc import get_toc
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.opf3 import CALIBRE_PREFIX
OPF_TEMPLATE = '''
<package xmlns="http://www.idpf.org/2007/opf" version="{ver}" prefix="calibre: %s" unique-identifier="uid">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
<dc:identifier id="uid">test</dc:identifier>
{metadata}
</metadata>
<manifest>{manifest}</manifest>
<spine>{spine}</spine>
<guide>{guide}</guide>
</package>''' % CALIBRE_PREFIX # noqa
def create_manifest_item(name, data=b'', properties=None):
return (name, data, properties)
cmi = create_manifest_item
def create_epub(manifest, spine=(), guide=(), meta_cover=None, ver=3):
mo = []
for name, data, properties in manifest:
mo.append('<item id="%s" href="%s" media-type="%s" %s/>' % (
name, name, guess_type(name), ('properties="%s"' % properties if properties else '')))
mo = ''.join(mo)
metadata = ''
if meta_cover:
metadata = '<meta name="cover" content="%s"/>' % meta_cover
spine = ''.join('<itemref idref="%s"/>' % name for name in spine)
guide = ''.join('<reference href="%s" type="%s"/>' % (name, typ) for name, typ in guide)
opf = OPF_TEMPLATE.format(manifest=mo, ver='%d.0'%ver, metadata=metadata, spine=spine, guide=guide)
buf = BytesIO()
with ZipFile(buf, 'w', ZIP_STORED) as zf:
zf.writestr('META-INF/container.xml', b'''
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>''')
zf.writestr('content.opf', opf.encode('utf-8'))
for name, data, properties in manifest:
if isinstance(data, type('')):
data = data.encode('utf-8')
zf.writestr(name, data)
buf.seek(0)
return buf
counter = count()
class Structure(BaseTest):
def create_epub(self, *args, **kw):
n = next(counter)
ep = os.path.join(self.tdir, str(n) + 'book.epub')
with open(ep, 'wb') as f:
f.write(create_epub(*args, **kw).getvalue())
c = get_container(ep, tdir=os.path.join(self.tdir, 'container%d' % n), tweak_mode=True)
return c
def test_toc_detection(self):
ep = os.path.join(self.tdir, 'book.epub')
create_book(Metadata('Test ToC'), ep)
@ -29,3 +88,15 @@ class Structure(BaseTest):
toc = get_toc(c)
self.assertTrue(len(toc))
self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav')
def test_epub3_covers(self):
c = self.create_epub([cmi('c.jpg')])
self.assertIsNone(find_cover_image(c))
c = self.create_epub([cmi('c.jpg')], meta_cover='c.jpg')
self.assertEqual('c.jpg', find_cover_image(c))
c = self.create_epub([cmi('c.jpg', b'z', 'cover-image'), cmi('d.jpg')], meta_cover='d.jpg')
self.assertEqual('c.jpg', find_cover_image(c))
mark_as_cover(c, 'd.jpg')
self.assertEqual('d.jpg', find_cover_image(c))
self.assertFalse(c.opf_xpath('//*/@name'))