EPUB3 Input: Fix titlepage being referred to in the nav causing two titlepage entries in the final book.

This commit is contained in:
Kovid Goyal 2018-05-24 12:37:17 +05:30
parent 09ffa06cc4
commit 1b89462d73
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
6 changed files with 59 additions and 23 deletions

View File

@ -287,10 +287,6 @@ class EPUBInput(InputFormatPlugin):
raise DRMError(os.path.basename(path)) raise DRMError(os.path.basename(path))
self.encrypted_fonts = self._encrypted_font_uris self.encrypted_fonts = self._encrypted_font_uris
epub3_nav = opf.epub3_nav
if epub3_nav is not None:
self.convert_epub3_nav(epub3_nav, opf, log, options)
if len(parts) > 1 and parts[0]: if len(parts) > 1 and parts[0]:
delta = '/'.join(parts[:-1])+'/' delta = '/'.join(parts[:-1])+'/'
@ -304,6 +300,11 @@ class EPUBInput(InputFormatPlugin):
f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2 f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
self.removed_cover = f(opf, log) self.removed_cover = f(opf, log)
if self.removed_cover:
self.removed_items_to_ignore = (self.removed_cover,)
epub3_nav = opf.epub3_nav
if epub3_nav is not None:
self.convert_epub3_nav(epub3_nav, opf, log, options)
for x in opf.itermanifest(): for x in opf.itermanifest():
if x.get('media-type', '') == 'application/x-dtbook+xml': if x.get('media-type', '') == 'application/x-dtbook+xml':
@ -350,7 +351,7 @@ class EPUBInput(InputFormatPlugin):
from lxml import etree from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.polish.parsing import parse
from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize
from calibre.ebooks.oeb.polish.toc import first_child from calibre.ebooks.oeb.polish.toc import first_child
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
with lopen(nav_path, 'rb') as f: with lopen(nav_path, 'rb') as f:
@ -401,9 +402,21 @@ class EPUBInput(InputFormatPlugin):
ncx_id = opf.add_path_to_manifest(f.name, NCX_MIME) ncx_id = opf.add_path_to_manifest(f.name, NCX_MIME)
for spine in opf.root.xpath('//*[local-name()="spine"]'): for spine in opf.root.xpath('//*[local-name()="spine"]'):
spine.set('toc', ncx_id) spine.set('toc', ncx_id)
href = os.path.relpath(nav_path).replace(os.sep, '/') opts.epub3_nav_href = urlnormalize(os.path.relpath(nav_path).replace(os.sep, '/'))
opts.epub3_nav_href = urlnormalize(href)
opts.epub3_nav_parsed = root opts.epub3_nav_parsed = root
if getattr(self, 'removed_cover', None):
changed = False
base_path = os.path.dirname(nav_path)
for elem in root.xpath('//*[@href]'):
href, frag = elem.get('href').partition('#')[::2]
link_path = os.path.relpath(os.path.join(base_path, urlunquote(href)), base_path)
abs_href = urlnormalize(link_path)
if abs_href == self.removed_cover:
changed = True
elem.set('data-calibre-removed-titlepage', '1')
if changed:
with open(nav_path, 'wb') as f:
f.write(serialize(root, 'application/xhtml+xml'))
def postprocess_book(self, oeb, opts, log): def postprocess_book(self, oeb, opts, log):
rc = getattr(self, 'removed_cover', None) rc = getattr(self, 'removed_cover', None)

View File

@ -286,6 +286,7 @@ class EPUBOutput(OutputFormatPlugin):
def upgrade_to_epub3(self, tdir, opf): def upgrade_to_epub3(self, tdir, opf):
self.log.info('Upgrading to EPUB 3...') self.log.info('Upgrading to EPUB 3...')
from calibre.ebooks.epub import simple_container_xml from calibre.ebooks.epub import simple_container_xml
from calibre.ebooks.oeb.polish.cover import fix_conversion_titlepage_links_in_nav
try: try:
os.mkdir(os.path.join(tdir, 'META-INF')) os.mkdir(os.path.join(tdir, 'META-INF'))
except EnvironmentError: except EnvironmentError:
@ -296,7 +297,9 @@ class EPUBOutput(OutputFormatPlugin):
container = EpubContainer(tdir, self.log) container = EpubContainer(tdir, self.log)
from calibre.ebooks.oeb.polish.upgrade import epub_2_to_3 from calibre.ebooks.oeb.polish.upgrade import epub_2_to_3
existing_nav = getattr(self.opts, 'epub3_nav_parsed', None) existing_nav = getattr(self.opts, 'epub3_nav_parsed', None)
epub_2_to_3(container, self.log.info, previous_nav=existing_nav) nav_href = getattr(self.opts, 'epub3_nav_href', None)
epub_2_to_3(container, self.log.info, previous_nav=(nav_href, existing_nav))
fix_conversion_titlepage_links_in_nav(container)
container.commit() container.commit()
os.remove(f.name) os.remove(f.name)
try: try:

View File

@ -131,8 +131,8 @@ OptionRecommendation(name='input_profile',
'conversion system information on how to interpret ' 'conversion system information on how to interpret '
'various information in the input document. For ' 'various information in the input document. For '
'example resolution dependent lengths (i.e. lengths in ' 'example resolution dependent lengths (i.e. lengths in '
'pixels). Choices are:')+ 'pixels). Choices are:')+ ', '.join([
', '.join([x.short_name for x in input_profiles()]) x.short_name for x in input_profiles()])
), ),
OptionRecommendation(name='output_profile', OptionRecommendation(name='output_profile',
@ -142,8 +142,8 @@ OptionRecommendation(name='output_profile',
'tells the conversion system how to optimize the ' 'tells the conversion system how to optimize the '
'created document for the specified device (such as by resizing images for the device screen size). In some cases, ' 'created document for the specified device (such as by resizing images for the device screen size). In some cases, '
'an output profile can be used to optimize the output for a particular device, but this is rarely necessary. ' 'an output profile can be used to optimize the output for a particular device, but this is rarely necessary. '
'Choices are:') + 'Choices are:') + ', '.join([
', '.join([x.short_name for x in output_profiles()]) x.short_name for x in output_profiles()])
), ),
OptionRecommendation(name='base_font_size', OptionRecommendation(name='base_font_size',
@ -897,8 +897,7 @@ OptionRecommendation(name='search_replace',
try: try:
val = parse_date(val, assume_utc=x=='timestamp') val = parse_date(val, assume_utc=x=='timestamp')
except: except:
self.log.exception(_('Failed to parse date/time') + ' ' + self.log.exception(_('Failed to parse date/time') + ' ' + unicode(val))
unicode(val))
continue continue
setattr(mi, x, val) setattr(mi, x, val)
@ -1096,7 +1095,7 @@ OptionRecommendation(name='search_replace',
self.oeb = create_oebbook( self.oeb = create_oebbook(
self.log, self.oeb, self.opts, self.log, self.oeb, self.opts,
encoding=self.input_plugin.output_encoding, encoding=self.input_plugin.output_encoding,
for_regex_wizard=self.for_regex_wizard) for_regex_wizard=self.for_regex_wizard, removed_items=getattr(self.input_plugin, 'removed_items_to_ignore', ()))
if self.for_regex_wizard: if self.for_regex_wizard:
return return
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log) self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
@ -1190,8 +1189,8 @@ OptionRecommendation(name='search_replace',
UnsmartenPunctuation()(self.oeb, self.opts) UnsmartenPunctuation()(self.oeb, self.opts)
mobi_file_type = getattr(self.opts, 'mobi_file_type', 'old') mobi_file_type = getattr(self.opts, 'mobi_file_type', 'old')
needs_old_markup = (self.output_plugin.file_type == 'lit' or needs_old_markup = (self.output_plugin.file_type == 'lit' or (
(self.output_plugin.file_type == 'mobi' and mobi_file_type == 'old')) self.output_plugin.file_type == 'mobi' and mobi_file_type == 'old'))
transform_css_rules = () transform_css_rules = ()
if self.opts.transform_css_rules: if self.opts.transform_css_rules:
transform_css_rules = self.opts.transform_css_rules transform_css_rules = self.opts.transform_css_rules
@ -1269,7 +1268,7 @@ def set_regex_wizard_callback(f):
def create_oebbook(log, path_or_stream, opts, reader=None, def create_oebbook(log, path_or_stream, opts, reader=None,
encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None): encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None, removed_items=()):
''' '''
Create an OEBBook. Create an OEBBook.
''' '''
@ -1285,6 +1284,7 @@ def create_oebbook(log, path_or_stream, opts, reader=None,
oeb = specialize(oeb) or oeb oeb = specialize(oeb) or oeb
# Read OEB Book into OEBBook # Read OEB Book into OEBBook
log('Parsing all content...') log('Parsing all content...')
oeb.removed_items_to_ignore = removed_items
if reader is None: if reader is None:
from calibre.ebooks.oeb.reader import OEBReader from calibre.ebooks.oeb.reader import OEBReader
reader = OEBReader reader = OEBReader

View File

@ -269,6 +269,20 @@ def find_cover_page(container):
return landmark['dest'] return landmark['dest']
def fix_conversion_titlepage_links_in_nav(container):
from calibre.ebooks.oeb.polish.toc import find_existing_nav_toc
cover_page_name = find_cover_page(container)
if not cover_page_name:
return
nav_page_name = find_existing_nav_toc(container)
if not nav_page_name:
return
for elem in container.parsed(nav_page_name).xpath('//*[@data-calibre-removed-titlepage]'):
elem.attrib.pop('data-calibre-removed-titlepage')
elem.set('href', container.name_to_href(cover_page_name, nav_page_name))
container.dirty(nav_page_name)
def find_cover_image_in_page(container, cover_page): def find_cover_image_in_page(container, cover_page):
root = container.parsed(cover_page) root = container.parsed(cover_page)
body = XPath('//h:body')(root) body = XPath('//h:body')(root)

View File

@ -658,12 +658,17 @@ def ensure_single_nav_of_type(root, ntype='toc'):
def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None): def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None):
from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree
tocname = find_existing_nav_toc(container) tocname = find_existing_nav_toc(container)
if previous_nav is not None:
nav_name = container.href_to_name(previous_nav[0])
if nav_name and container.exists(nav_name):
tocname = nav_name
container.apply_unique_properties(tocname, 'nav')
if tocname is None: if tocname is None:
item = container.generate_item('nav.xhtml', id_prefix='nav') item = container.generate_item('nav.xhtml', id_prefix='nav')
item.set('properties', 'nav') item.set('properties', 'nav')
tocname = container.href_to_name(item.get('href'), base=container.opf_name) tocname = container.href_to_name(item.get('href'), base=container.opf_name)
if previous_nav is not None: if previous_nav is not None:
root = previous_nav root = previous_nav[1]
else: else:
root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8')) root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8'))
container.replace(tocname, root) container.replace(tocname, root)

View File

@ -194,8 +194,7 @@ class OEBReader(object):
new = set() new = set()
for item in unchecked: for item in unchecked:
data = None data = None
if (item.media_type in cdoc or if (item.media_type in cdoc or item.media_type[-4:] in ('/xml', '+xml')):
item.media_type[-4:] in ('/xml', '+xml')):
try: try:
data = item.data data = item.data
except: except:
@ -206,8 +205,7 @@ class OEBReader(object):
if data is None: if data is None:
continue continue
if (item.media_type in OEB_DOCS or if (item.media_type in OEB_DOCS or item.media_type[-4:] in ('/xml', '+xml')):
item.media_type[-4:] in ('/xml', '+xml')):
hrefs = [r[2] for r in iterlinks(data)] hrefs = [r[2] for r in iterlinks(data)]
for href in hrefs: for href in hrefs:
if isinstance(href, bytes): if isinstance(href, bytes):
@ -320,7 +318,10 @@ class OEBReader(object):
extras.update(new) extras.update(new)
unchecked = new unchecked = new
version = int(self.oeb.version[0]) version = int(self.oeb.version[0])
removed_items_to_ignore = getattr(self.oeb, 'removed_items_to_ignore', ())
for item in sorted(extras): for item in sorted(extras):
if item.href in removed_items_to_ignore:
continue
if version >= 2: if version >= 2:
self.logger.warn( self.logger.warn(
'Spine-referenced file %r not in spine' % item.href) 'Spine-referenced file %r not in spine' % item.href)