EPUB3 Input: Fix titlepage being referred to in the nav causing two titlepage entries in the final book.

This commit is contained in:
Kovid Goyal 2018-05-24 12:37:17 +05:30
parent 09ffa06cc4
commit 1b89462d73
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
6 changed files with 59 additions and 23 deletions

View File

@ -287,10 +287,6 @@ class EPUBInput(InputFormatPlugin):
raise DRMError(os.path.basename(path))
self.encrypted_fonts = self._encrypted_font_uris
epub3_nav = opf.epub3_nav
if epub3_nav is not None:
self.convert_epub3_nav(epub3_nav, opf, log, options)
if len(parts) > 1 and parts[0]:
delta = '/'.join(parts[:-1])+'/'
@ -304,6 +300,11 @@ class EPUBInput(InputFormatPlugin):
f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
self.removed_cover = f(opf, log)
if self.removed_cover:
self.removed_items_to_ignore = (self.removed_cover,)
epub3_nav = opf.epub3_nav
if epub3_nav is not None:
self.convert_epub3_nav(epub3_nav, opf, log, options)
for x in opf.itermanifest():
if x.get('media-type', '') == 'application/x-dtbook+xml':
@ -350,7 +351,7 @@ class EPUBInput(InputFormatPlugin):
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.oeb.polish.parsing import parse
from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize
from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize
from calibre.ebooks.oeb.polish.toc import first_child
from tempfile import NamedTemporaryFile
with lopen(nav_path, 'rb') as f:
@ -401,9 +402,21 @@ class EPUBInput(InputFormatPlugin):
ncx_id = opf.add_path_to_manifest(f.name, NCX_MIME)
for spine in opf.root.xpath('//*[local-name()="spine"]'):
spine.set('toc', ncx_id)
href = os.path.relpath(nav_path).replace(os.sep, '/')
opts.epub3_nav_href = urlnormalize(href)
opts.epub3_nav_href = urlnormalize(os.path.relpath(nav_path).replace(os.sep, '/'))
opts.epub3_nav_parsed = root
if getattr(self, 'removed_cover', None):
changed = False
base_path = os.path.dirname(nav_path)
for elem in root.xpath('//*[@href]'):
href, frag = elem.get('href').partition('#')[::2]
link_path = os.path.relpath(os.path.join(base_path, urlunquote(href)), base_path)
abs_href = urlnormalize(link_path)
if abs_href == self.removed_cover:
changed = True
elem.set('data-calibre-removed-titlepage', '1')
if changed:
with open(nav_path, 'wb') as f:
f.write(serialize(root, 'application/xhtml+xml'))
def postprocess_book(self, oeb, opts, log):
rc = getattr(self, 'removed_cover', None)

View File

@ -286,6 +286,7 @@ class EPUBOutput(OutputFormatPlugin):
def upgrade_to_epub3(self, tdir, opf):
self.log.info('Upgrading to EPUB 3...')
from calibre.ebooks.epub import simple_container_xml
from calibre.ebooks.oeb.polish.cover import fix_conversion_titlepage_links_in_nav
try:
os.mkdir(os.path.join(tdir, 'META-INF'))
except EnvironmentError:
@ -296,7 +297,9 @@ class EPUBOutput(OutputFormatPlugin):
container = EpubContainer(tdir, self.log)
from calibre.ebooks.oeb.polish.upgrade import epub_2_to_3
existing_nav = getattr(self.opts, 'epub3_nav_parsed', None)
epub_2_to_3(container, self.log.info, previous_nav=existing_nav)
nav_href = getattr(self.opts, 'epub3_nav_href', None)
epub_2_to_3(container, self.log.info, previous_nav=(nav_href, existing_nav))
fix_conversion_titlepage_links_in_nav(container)
container.commit()
os.remove(f.name)
try:

View File

@ -131,8 +131,8 @@ OptionRecommendation(name='input_profile',
'conversion system information on how to interpret '
'various information in the input document. For '
'example resolution dependent lengths (i.e. lengths in '
'pixels). Choices are:')+
', '.join([x.short_name for x in input_profiles()])
'pixels). Choices are:')+ ', '.join([
x.short_name for x in input_profiles()])
),
OptionRecommendation(name='output_profile',
@ -142,8 +142,8 @@ OptionRecommendation(name='output_profile',
'tells the conversion system how to optimize the '
'created document for the specified device (such as by resizing images for the device screen size). In some cases, '
'an output profile can be used to optimize the output for a particular device, but this is rarely necessary. '
'Choices are:') +
', '.join([x.short_name for x in output_profiles()])
'Choices are:') + ', '.join([
x.short_name for x in output_profiles()])
),
OptionRecommendation(name='base_font_size',
@ -897,8 +897,7 @@ OptionRecommendation(name='search_replace',
try:
val = parse_date(val, assume_utc=x=='timestamp')
except:
self.log.exception(_('Failed to parse date/time') + ' ' +
unicode(val))
self.log.exception(_('Failed to parse date/time') + ' ' + unicode(val))
continue
setattr(mi, x, val)
@ -1096,7 +1095,7 @@ OptionRecommendation(name='search_replace',
self.oeb = create_oebbook(
self.log, self.oeb, self.opts,
encoding=self.input_plugin.output_encoding,
for_regex_wizard=self.for_regex_wizard)
for_regex_wizard=self.for_regex_wizard, removed_items=getattr(self.input_plugin, 'removed_items_to_ignore', ()))
if self.for_regex_wizard:
return
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
@ -1190,8 +1189,8 @@ OptionRecommendation(name='search_replace',
UnsmartenPunctuation()(self.oeb, self.opts)
mobi_file_type = getattr(self.opts, 'mobi_file_type', 'old')
needs_old_markup = (self.output_plugin.file_type == 'lit' or
(self.output_plugin.file_type == 'mobi' and mobi_file_type == 'old'))
needs_old_markup = (self.output_plugin.file_type == 'lit' or (
self.output_plugin.file_type == 'mobi' and mobi_file_type == 'old'))
transform_css_rules = ()
if self.opts.transform_css_rules:
transform_css_rules = self.opts.transform_css_rules
@ -1269,7 +1268,7 @@ def set_regex_wizard_callback(f):
def create_oebbook(log, path_or_stream, opts, reader=None,
encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None):
encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None, removed_items=()):
'''
Create an OEBBook.
'''
@ -1285,6 +1284,7 @@ def create_oebbook(log, path_or_stream, opts, reader=None,
oeb = specialize(oeb) or oeb
# Read OEB Book into OEBBook
log('Parsing all content...')
oeb.removed_items_to_ignore = removed_items
if reader is None:
from calibre.ebooks.oeb.reader import OEBReader
reader = OEBReader

View File

@ -269,6 +269,20 @@ def find_cover_page(container):
return landmark['dest']
def fix_conversion_titlepage_links_in_nav(container):
from calibre.ebooks.oeb.polish.toc import find_existing_nav_toc
cover_page_name = find_cover_page(container)
if not cover_page_name:
return
nav_page_name = find_existing_nav_toc(container)
if not nav_page_name:
return
for elem in container.parsed(nav_page_name).xpath('//*[@data-calibre-removed-titlepage]'):
elem.attrib.pop('data-calibre-removed-titlepage')
elem.set('href', container.name_to_href(cover_page_name, nav_page_name))
container.dirty(nav_page_name)
def find_cover_image_in_page(container, cover_page):
root = container.parsed(cover_page)
body = XPath('//h:body')(root)

View File

@ -658,12 +658,17 @@ def ensure_single_nav_of_type(root, ntype='toc'):
def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None):
from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree
tocname = find_existing_nav_toc(container)
if previous_nav is not None:
nav_name = container.href_to_name(previous_nav[0])
if nav_name and container.exists(nav_name):
tocname = nav_name
container.apply_unique_properties(tocname, 'nav')
if tocname is None:
item = container.generate_item('nav.xhtml', id_prefix='nav')
item.set('properties', 'nav')
tocname = container.href_to_name(item.get('href'), base=container.opf_name)
if previous_nav is not None:
root = previous_nav
root = previous_nav[1]
else:
root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8'))
container.replace(tocname, root)

View File

@ -194,8 +194,7 @@ class OEBReader(object):
new = set()
for item in unchecked:
data = None
if (item.media_type in cdoc or
item.media_type[-4:] in ('/xml', '+xml')):
if (item.media_type in cdoc or item.media_type[-4:] in ('/xml', '+xml')):
try:
data = item.data
except:
@ -206,8 +205,7 @@ class OEBReader(object):
if data is None:
continue
if (item.media_type in OEB_DOCS or
item.media_type[-4:] in ('/xml', '+xml')):
if (item.media_type in OEB_DOCS or item.media_type[-4:] in ('/xml', '+xml')):
hrefs = [r[2] for r in iterlinks(data)]
for href in hrefs:
if isinstance(href, bytes):
@ -320,7 +318,10 @@ class OEBReader(object):
extras.update(new)
unchecked = new
version = int(self.oeb.version[0])
removed_items_to_ignore = getattr(self.oeb, 'removed_items_to_ignore', ())
for item in sorted(extras):
if item.href in removed_items_to_ignore:
continue
if version >= 2:
self.logger.warn(
'Spine-referenced file %r not in spine' % item.href)