Start work on polishing of epub 3 covers

This commit is contained in:
Kovid Goyal 2016-07-04 17:39:27 +05:30
parent 671128d144
commit 51b245e1bb
4 changed files with 168 additions and 36 deletions

View File

@ -148,6 +148,8 @@ def expand_prefix(raw, prefixes):
return regex(r'(\S+)\s*:\s*(\S+)').sub(lambda m:(prefixes.get(m.group(1), m.group(1)) + ':' + m.group(2)), raw or '') return regex(r'(\S+)\s*:\s*(\S+)').sub(lambda m:(prefixes.get(m.group(1), m.group(1)) + ':' + m.group(2)), raw or '')
def ensure_prefix(root, prefixes, prefix, value=None): def ensure_prefix(root, prefixes, prefix, value=None):
if prefixes is None:
prefixes = read_prefixes(root)
prefixes[prefix] = value or reserved_prefixes[prefix] prefixes[prefix] = value or reserved_prefixes[prefix]
prefixes = {k:v for k, v in prefixes.iteritems() if reserved_prefixes.get(k) != v} prefixes = {k:v for k, v in prefixes.iteritems() if reserved_prefixes.get(k) != v}
if prefixes: if prefixes:

View File

@ -24,6 +24,7 @@ from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.conversion.plugins.epub_input import ( from calibre.ebooks.conversion.plugins.epub_input import (
ADOBE_OBFUSCATION, IDPF_OBFUSCATION, decrypt_font_data) ADOBE_OBFUSCATION, IDPF_OBFUSCATION, decrypt_font_data)
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor, CSSPreProcessor as cssp from calibre.ebooks.conversion.preprocess import HTMLPreProcessor, CSSPreProcessor as cssp
from calibre.ebooks.metadata.opf3 import read_prefixes, expand_prefix, ensure_prefix, CALIBRE_PREFIX
from calibre.ebooks.metadata.utils import parse_opf_version from calibre.ebooks.metadata.utils import parse_opf_version
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.headers import MetadataHeader
@ -612,11 +613,15 @@ class Container(ContainerBase): # {{{
def manifest_items_with_property(self, property_name): def manifest_items_with_property(self, property_name):
' All manifest items that have the specified property ' ' All manifest items that have the specified property '
q = property_name.lower() prefixes = read_prefixes(self.opf)
q = expand_prefix(property_name, prefixes).lower()
for item in self.opf_xpath('//opf:manifest/opf:item[@href and @properties]'): for item in self.opf_xpath('//opf:manifest/opf:item[@href and @properties]'):
props = (item.get('properties') or '').lower().split() props = (item.get('properties') or '').lower().split()
if q in props: for p in props:
pq = expand_prefix(p, prefixes).lower()
if pq == q:
yield self.href_to_name(item.get('href'), self.opf_name) yield self.href_to_name(item.get('href'), self.opf_name)
break
def manifest_items_of_type(self, predicate): def manifest_items_of_type(self, predicate):
''' The names of all manifest items whose media-type matches predicate. ''' The names of all manifest items whose media-type matches predicate.
@ -631,6 +636,34 @@ class Container(ContainerBase): # {{{
for name in names: for name in names:
yield name yield name
def apply_unique_properties(self, name, *properties):
''' Ensure that the specified properties are set on only the manifest item
identified by name. You can pass None as the name to remove the
property from all items. '''
properties = frozenset(properties)
for p in properties:
if p.startswith('calibre:'):
ensure_prefix(self.opf, None, 'calibre', CALIBRE_PREFIX)
break
for item in self.opf_xpath('//opf:manifest/opf:item'):
iname = self.href_to_name(item.get('href'), self.opf_name)
props = (item.get('properties') or '').split()
lprops = {p.lower() for p in props}
for prop in properties:
if prop.lower() in lprops:
if name != iname:
props = [p for p in props if p.lower() != prop]
if props:
item.set('properties', ' '.join(props))
else:
del item.attrib['properties']
else:
if name == iname:
props.append(prop)
item.set('properties', ' '.join(props))
self.dirty(self.opf_name)
@property @property
def guide_type_map(self): def guide_type_map(self):
' Mapping of guide type to canonical name ' ' Mapping of guide type to canonical name '

View File

@ -148,6 +148,14 @@ def find_cover_image2(container, strict=False):
def find_cover_image3(container): def find_cover_image3(container):
for name in container.manifest_items_with_property('cover-image'): for name in container.manifest_items_with_property('cover-image'):
return name return name
manifest_id_map = container.manifest_id_map
mm = container.mime_map
for meta in container.opf_xpath('//opf:meta[@name="cover" and @content]'):
item_id = meta.get('content')
name = manifest_id_map.get(item_id, None)
media_type = mm.get(name, None)
if is_raster_image(media_type):
return name
def find_cover_image(container, strict=False): def find_cover_image(container, strict=False):
'Find a raster image marked as a cover in the OPF' 'Find a raster image marked as a cover in the OPF'
@ -165,11 +173,13 @@ def get_guides(container):
guides = container.opf_xpath('//opf:guide') guides = container.opf_xpath('//opf:guide')
return guides return guides
def mark_as_cover_epub(container, name): def mark_as_cover_epub(container, name):
mmap = {v:k for k, v in container.manifest_id_map.iteritems()} mmap = {v:k for k, v in container.manifest_id_map.iteritems()}
if name not in mmap: if name not in mmap:
raise ValueError('Cannot mark %s as cover as it is not in manifest' % name) raise ValueError('Cannot mark %s as cover as it is not in manifest' % name)
mid = mmap[name] mid = mmap[name]
ver = container.opf_version_parsed
# Remove all entries from the opf that identify a raster image as cover # Remove all entries from the opf that identify a raster image as cover
for meta in container.opf_xpath('//opf:meta[@name="cover" and @content]'): for meta in container.opf_xpath('//opf:meta[@name="cover" and @content]'):
@ -177,22 +187,25 @@ def mark_as_cover_epub(container, name):
for ref in container.opf_xpath('//opf:guide/opf:reference[@href and @type]'): for ref in container.opf_xpath('//opf:guide/opf:reference[@href and @type]'):
if ref.get('type').lower() not in COVER_TYPES: if ref.get('type').lower() not in COVER_TYPES:
continue continue
name = container.href_to_name(ref.get('href'), container.opf_name) rname = container.href_to_name(ref.get('href'), container.opf_name)
mt = container.mime_map.get(name, None) mt = container.mime_map.get(rname, None)
if is_raster_image(mt): if is_raster_image(mt):
container.remove_from_xml(ref) container.remove_from_xml(ref)
if ver.major < 3:
# Add reference to image in <metadata> # Add reference to image in <metadata>
for metadata in container.opf_xpath('//opf:metadata'): for metadata in container.opf_xpath('//opf:metadata'):
m = metadata.makeelement(OPF('meta'), name='cover', content=mid) m = metadata.makeelement(OPF('meta'), name='cover', content=mid)
container.insert_into_xml(metadata, m) container.insert_into_xml(metadata, m)
# If no entry for titlepage exists in guide, insert one that points to this # If no entry for cover exists in guide, insert one that points to this
# image # image
if not container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'): if not container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'):
for guide in get_guides(container): for guide in get_guides(container):
container.insert_into_xml(guide, guide.makeelement( container.insert_into_xml(guide, guide.makeelement(
OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name))) OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name)))
else:
container.apply_unique_properties(name, 'cover-image')
container.dirty(container.opf_name) container.dirty(container.opf_name)
@ -202,6 +215,7 @@ def mark_as_titlepage(container, name, move_to_start=True):
:param move_to_start: If True the HTML file is moved to the start of the spine :param move_to_start: If True the HTML file is moved to the start of the spine
''' '''
ver = container.opf_version_parsed
if move_to_start: if move_to_start:
for item, q, linear in container.spine_iter: for item, q, linear in container.spine_iter:
if name == q: if name == q:
@ -210,21 +224,30 @@ def mark_as_titlepage(container, name, move_to_start=True):
item.set('linear', 'yes') item.set('linear', 'yes')
if item.getparent().index(item) > 0: if item.getparent().index(item) > 0:
container.insert_into_xml(item.getparent(), item, 0) container.insert_into_xml(item.getparent(), item, 0)
if ver.major < 3:
for ref in container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'): for ref in container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'):
ref.getparent().remove(ref) ref.getparent().remove(ref)
for guide in get_guides(container): for guide in get_guides(container):
container.insert_into_xml(guide, guide.makeelement( container.insert_into_xml(guide, guide.makeelement(
OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name))) OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name)))
else:
container.apply_unique_properties(name, 'calibre:title-page')
container.dirty(container.opf_name) container.dirty(container.opf_name)
def find_cover_page(container): def find_cover_page(container):
'Find a document marked as a cover in the OPF' 'Find a document marked as a cover in the OPF'
ver = container.opf_version_parsed
if ver.major < 3:
mm = container.mime_map mm = container.mime_map
guide_type_map = container.guide_type_map guide_type_map = container.guide_type_map
for ref_type, name in guide_type_map.iteritems(): for ref_type, name in guide_type_map.iteritems():
if ref_type.lower() == 'cover' and mm.get(name, '').lower() in OEB_DOCS: if ref_type.lower() == 'cover' and mm.get(name, '').lower() in OEB_DOCS:
return name return name
else:
for name in container.manifest_items_with_property('calibre:title-page'):
return name
def find_cover_image_in_page(container, cover_page): def find_cover_image_in_page(container, cover_page):
root = container.parsed(cover_page) root = container.parsed(cover_page)
@ -262,7 +285,7 @@ def clean_opf(container):
name = gtm.get(typ, None) name = gtm.get(typ, None)
if name and name in container.name_path_map: if name and name in container.name_path_map:
yield name yield name
container.apply_unique_properties(None, 'cover-image', 'calibre:title-page')
container.dirty(container.opf_name) container.dirty(container.opf_name)
def create_epub_cover(container, cover_path, existing_image, options=None): def create_epub_cover(container, cover_path, existing_image, options=None):
@ -345,6 +368,8 @@ def create_epub_cover(container, cover_path, existing_image, options=None):
spine = container.opf_xpath('//opf:spine')[0] spine = container.opf_xpath('//opf:spine')[0]
ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id')) ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id'))
container.insert_into_xml(spine, ref, index=0) container.insert_into_xml(spine, ref, index=0)
ver = container.opf_version_parsed
if ver.major < 3:
guide = container.opf_get_or_create('guide') guide = container.opf_get_or_create('guide')
container.insert_into_xml(guide, guide.makeelement( container.insert_into_xml(guide, guide.makeelement(
OPF('reference'), type='cover', title=_('Cover'), OPF('reference'), type='cover', title=_('Cover'),
@ -353,6 +378,9 @@ def create_epub_cover(container, cover_path, existing_image, options=None):
meta = metadata.makeelement(OPF('meta'), name='cover') meta = metadata.makeelement(OPF('meta'), name='cover')
meta.set('content', raster_cover_item.get('id')) meta.set('content', raster_cover_item.get('id'))
container.insert_into_xml(metadata, meta) container.insert_into_xml(metadata, meta)
else:
container.apply_unique_properties(raster_cover, 'cover-image')
container.apply_unique_properties(titlepage, 'calibre:title-page')
return raster_cover, titlepage return raster_cover, titlepage
@ -435,5 +463,3 @@ def set_epub_cover(container, cover_path, report, options=None):
if link_sub: if link_sub:
replace_links(container, link_sub, frag_map=lambda x, y:None) replace_links(container, link_sub, frag_map=lambda x, y:None)
return raster_cover, titlepage return raster_cover, titlepage

View File

@ -4,16 +4,75 @@
from __future__ import (unicode_literals, division, absolute_import, from __future__ import (unicode_literals, division, absolute_import,
print_function) print_function)
from io import BytesIO
from itertools import count
from zipfile import ZipFile, ZIP_STORED
import os import os
from calibre.ebooks.oeb.polish.tests.base import BaseTest from calibre.ebooks.oeb.polish.tests.base import BaseTest
from calibre.ebooks.oeb.polish.container import get_container from calibre.ebooks.oeb.polish.container import get_container
from calibre.ebooks.oeb.polish.create import create_book from calibre.ebooks.oeb.polish.create import create_book
from calibre.ebooks.oeb.polish.cover import find_cover_image, mark_as_cover
from calibre.ebooks.oeb.polish.toc import get_toc from calibre.ebooks.oeb.polish.toc import get_toc
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.opf3 import CALIBRE_PREFIX
OPF_TEMPLATE = '''
<package xmlns="http://www.idpf.org/2007/opf" version="{ver}" prefix="calibre: %s" unique-identifier="uid">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
<dc:identifier id="uid">test</dc:identifier>
{metadata}
</metadata>
<manifest>{manifest}</manifest>
<spine>{spine}</spine>
<guide>{guide}</guide>
</package>''' % CALIBRE_PREFIX # noqa
def create_manifest_item(name, data=b'', properties=None):
return (name, data, properties)
cmi = create_manifest_item
def create_epub(manifest, spine=(), guide=(), meta_cover=None, ver=3):
mo = []
for name, data, properties in manifest:
mo.append('<item id="%s" href="%s" media-type="%s" %s/>' % (
name, name, guess_type(name), ('properties="%s"' % properties if properties else '')))
mo = ''.join(mo)
metadata = ''
if meta_cover:
metadata = '<meta name="cover" content="%s"/>' % meta_cover
spine = ''.join('<itemref idref="%s"/>' % name for name in spine)
guide = ''.join('<reference href="%s" type="%s"/>' % (name, typ) for name, typ in guide)
opf = OPF_TEMPLATE.format(manifest=mo, ver='%d.0'%ver, metadata=metadata, spine=spine, guide=guide)
buf = BytesIO()
with ZipFile(buf, 'w', ZIP_STORED) as zf:
zf.writestr('META-INF/container.xml', b'''
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>''')
zf.writestr('content.opf', opf.encode('utf-8'))
for name, data, properties in manifest:
if isinstance(data, type('')):
data = data.encode('utf-8')
zf.writestr(name, data)
buf.seek(0)
return buf
counter = count()
class Structure(BaseTest): class Structure(BaseTest):
def create_epub(self, *args, **kw):
n = next(counter)
ep = os.path.join(self.tdir, str(n) + 'book.epub')
with open(ep, 'wb') as f:
f.write(create_epub(*args, **kw).getvalue())
c = get_container(ep, tdir=os.path.join(self.tdir, 'container%d' % n), tweak_mode=True)
return c
def test_toc_detection(self): def test_toc_detection(self):
ep = os.path.join(self.tdir, 'book.epub') ep = os.path.join(self.tdir, 'book.epub')
create_book(Metadata('Test ToC'), ep) create_book(Metadata('Test ToC'), ep)
@ -29,3 +88,15 @@ class Structure(BaseTest):
toc = get_toc(c) toc = get_toc(c)
self.assertTrue(len(toc)) self.assertTrue(len(toc))
self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav') self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav')
def test_epub3_covers(self):
c = self.create_epub([cmi('c.jpg')])
self.assertIsNone(find_cover_image(c))
c = self.create_epub([cmi('c.jpg')], meta_cover='c.jpg')
self.assertEqual('c.jpg', find_cover_image(c))
c = self.create_epub([cmi('c.jpg', b'z', 'cover-image'), cmi('d.jpg')], meta_cover='d.jpg')
self.assertEqual('c.jpg', find_cover_image(c))
mark_as_cover(c, 'd.jpg')
self.assertEqual('d.jpg', find_cover_image(c))
self.assertFalse(c.opf_xpath('//*/@name'))