EPUB3 Input: Fix titlepage being referred to in the nav causing two titlepage entries in the final book.

2025-07-09 03:04:10 -04:00 · 2018-05-24 12:37:17 +05:30 · 2018-05-24 12:37:17 +05:30 · 1b89462d73
commit 1b89462d73
parent 09ffa06cc4
6 changed files with 59 additions and 23 deletions
--- a/src/calibre/ebooks/conversion/plugins/epub_input.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_input.py
@ -287,10 +287,6 @@ class EPUBInput(InputFormatPlugin):
                raise DRMError(os.path.basename(path))
        self.encrypted_fonts = self._encrypted_font_uris

-        epub3_nav = opf.epub3_nav
-        if epub3_nav is not None:
-            self.convert_epub3_nav(epub3_nav, opf, log, options)
-
        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1])+'/'

@ -304,6 +300,11 @@ class EPUBInput(InputFormatPlugin):

        f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
        self.removed_cover = f(opf, log)
+        if self.removed_cover:
+            self.removed_items_to_ignore = (self.removed_cover,)
+        epub3_nav = opf.epub3_nav
+        if epub3_nav is not None:
+            self.convert_epub3_nav(epub3_nav, opf, log, options)

        for x in opf.itermanifest():
            if x.get('media-type', '') == 'application/x-dtbook+xml':
@ -350,7 +351,7 @@ class EPUBInput(InputFormatPlugin):
        from lxml import etree
        from calibre.ebooks.chardet import xml_to_unicode
        from calibre.ebooks.oeb.polish.parsing import parse
-        from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize
+        from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize
        from calibre.ebooks.oeb.polish.toc import first_child
        from tempfile import NamedTemporaryFile
        with lopen(nav_path, 'rb') as f:
@ -401,9 +402,21 @@ class EPUBInput(InputFormatPlugin):
        ncx_id = opf.add_path_to_manifest(f.name, NCX_MIME)
        for spine in opf.root.xpath('//*[local-name()="spine"]'):
            spine.set('toc', ncx_id)
-        href = os.path.relpath(nav_path).replace(os.sep, '/')
-        opts.epub3_nav_href = urlnormalize(href)
+        opts.epub3_nav_href = urlnormalize(os.path.relpath(nav_path).replace(os.sep, '/'))
        opts.epub3_nav_parsed = root
+        if getattr(self, 'removed_cover', None):
+            changed = False
+            base_path = os.path.dirname(nav_path)
+            for elem in root.xpath('//*[@href]'):
+                href, frag = elem.get('href').partition('#')[::2]
+                link_path = os.path.relpath(os.path.join(base_path, urlunquote(href)), base_path)
+                abs_href = urlnormalize(link_path)
+                if abs_href == self.removed_cover:
+                    changed = True
+                    elem.set('data-calibre-removed-titlepage', '1')
+            if changed:
+                with open(nav_path, 'wb') as f:
+                    f.write(serialize(root, 'application/xhtml+xml'))

    def postprocess_book(self, oeb, opts, log):
        rc = getattr(self, 'removed_cover', None)
--- a/src/calibre/ebooks/conversion/plugins/epub_output.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_output.py
@ -286,6 +286,7 @@ class EPUBOutput(OutputFormatPlugin):
    def upgrade_to_epub3(self, tdir, opf):
        self.log.info('Upgrading to EPUB 3...')
        from calibre.ebooks.epub import simple_container_xml
+        from calibre.ebooks.oeb.polish.cover import fix_conversion_titlepage_links_in_nav
        try:
            os.mkdir(os.path.join(tdir, 'META-INF'))
        except EnvironmentError:
@ -296,7 +297,9 @@ class EPUBOutput(OutputFormatPlugin):
        container = EpubContainer(tdir, self.log)
        from calibre.ebooks.oeb.polish.upgrade import epub_2_to_3
        existing_nav = getattr(self.opts, 'epub3_nav_parsed', None)
-        epub_2_to_3(container, self.log.info, previous_nav=existing_nav)
+        nav_href = getattr(self.opts, 'epub3_nav_href', None)
+        epub_2_to_3(container, self.log.info, previous_nav=(nav_href, existing_nav))
+        fix_conversion_titlepage_links_in_nav(container)
        container.commit()
        os.remove(f.name)
        try:
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -131,8 +131,8 @@ OptionRecommendation(name='input_profile',
                   'conversion system information on how to interpret '
                   'various information in the input document. For '
                   'example resolution dependent lengths (i.e. lengths in '
-                   'pixels). Choices are:')+
-                        ', '.join([x.short_name for x in input_profiles()])
+                   'pixels). Choices are:')+ ', '.join([
+                       x.short_name for x in input_profiles()])
        ),

 OptionRecommendation(name='output_profile',
@ -142,8 +142,8 @@ OptionRecommendation(name='output_profile',
                   'tells the conversion system how to optimize the '
                   'created document for the specified device (such as by resizing images for the device screen size). In some cases, '
                   'an output profile can be used to optimize the output for a particular device, but this is rarely necessary. '
-                   'Choices are:') +
-                           ', '.join([x.short_name for x in output_profiles()])
+                   'Choices are:') + ', '.join([
+                       x.short_name for x in output_profiles()])
        ),

 OptionRecommendation(name='base_font_size',
@ -897,8 +897,7 @@ OptionRecommendation(name='search_replace',
                    try:
                        val = parse_date(val, assume_utc=x=='timestamp')
                    except:
-                        self.log.exception(_('Failed to parse date/time') + ' ' +
-                                unicode(val))
+                        self.log.exception(_('Failed to parse date/time') + ' ' + unicode(val))
                        continue
                setattr(mi, x, val)

@ -1096,7 +1095,7 @@ OptionRecommendation(name='search_replace',
                self.oeb = create_oebbook(
                    self.log, self.oeb, self.opts,
                    encoding=self.input_plugin.output_encoding,
-                    for_regex_wizard=self.for_regex_wizard)
+                    for_regex_wizard=self.for_regex_wizard, removed_items=getattr(self.input_plugin, 'removed_items_to_ignore', ()))
            if self.for_regex_wizard:
                return
            self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
@ -1190,8 +1189,8 @@ OptionRecommendation(name='search_replace',
            UnsmartenPunctuation()(self.oeb, self.opts)

        mobi_file_type = getattr(self.opts, 'mobi_file_type', 'old')
-        needs_old_markup = (self.output_plugin.file_type == 'lit' or
-                    (self.output_plugin.file_type == 'mobi' and mobi_file_type == 'old'))
+        needs_old_markup = (self.output_plugin.file_type == 'lit' or (
+            self.output_plugin.file_type == 'mobi' and mobi_file_type == 'old'))
        transform_css_rules = ()
        if self.opts.transform_css_rules:
            transform_css_rules = self.opts.transform_css_rules
@ -1269,7 +1268,7 @@ def set_regex_wizard_callback(f):


 def create_oebbook(log, path_or_stream, opts, reader=None,
-        encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None):
+        encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None, removed_items=()):
    '''
    Create an OEBBook.
    '''
@ -1285,6 +1284,7 @@ def create_oebbook(log, path_or_stream, opts, reader=None,
        oeb = specialize(oeb) or oeb
    # Read OEB Book into OEBBook
    log('Parsing all content...')
+    oeb.removed_items_to_ignore = removed_items
    if reader is None:
        from calibre.ebooks.oeb.reader import OEBReader
        reader = OEBReader
--- a/src/calibre/ebooks/oeb/polish/cover.py
+++ b/src/calibre/ebooks/oeb/polish/cover.py
@ -269,6 +269,20 @@ def find_cover_page(container):
                return landmark['dest']


+def fix_conversion_titlepage_links_in_nav(container):
+    from calibre.ebooks.oeb.polish.toc import find_existing_nav_toc
+    cover_page_name = find_cover_page(container)
+    if not cover_page_name:
+        return
+    nav_page_name = find_existing_nav_toc(container)
+    if not nav_page_name:
+        return
+    for elem in container.parsed(nav_page_name).xpath('//*[@data-calibre-removed-titlepage]'):
+        elem.attrib.pop('data-calibre-removed-titlepage')
+        elem.set('href', container.name_to_href(cover_page_name, nav_page_name))
+    container.dirty(nav_page_name)
+
+
 def find_cover_image_in_page(container, cover_page):
    root = container.parsed(cover_page)
    body = XPath('//h:body')(root)
--- a/src/calibre/ebooks/oeb/polish/toc.py
+++ b/src/calibre/ebooks/oeb/polish/toc.py
@ -658,12 +658,17 @@ def ensure_single_nav_of_type(root, ntype='toc'):
 def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None):
    from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree
    tocname = find_existing_nav_toc(container)
+    if previous_nav is not None:
+        nav_name = container.href_to_name(previous_nav[0])
+        if nav_name and container.exists(nav_name):
+            tocname = nav_name
+            container.apply_unique_properties(tocname, 'nav')
    if tocname is None:
        item = container.generate_item('nav.xhtml', id_prefix='nav')
        item.set('properties', 'nav')
        tocname = container.href_to_name(item.get('href'), base=container.opf_name)
        if previous_nav is not None:
-            root = previous_nav
+            root = previous_nav[1]
        else:
            root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8'))
        container.replace(tocname, root)
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -194,8 +194,7 @@ class OEBReader(object):
            new = set()
            for item in unchecked:
                data = None
-                if (item.media_type in cdoc or
-                        item.media_type[-4:] in ('/xml', '+xml')):
+                if (item.media_type in cdoc or item.media_type[-4:] in ('/xml', '+xml')):
                    try:
                        data = item.data
                    except:
@ -206,8 +205,7 @@ class OEBReader(object):
                if data is None:
                    continue

-                if (item.media_type in OEB_DOCS or
-                        item.media_type[-4:] in ('/xml', '+xml')):
+                if (item.media_type in OEB_DOCS or item.media_type[-4:] in ('/xml', '+xml')):
                    hrefs = [r[2] for r in iterlinks(data)]
                    for href in hrefs:
                        if isinstance(href, bytes):
@ -320,7 +318,10 @@ class OEBReader(object):
            extras.update(new)
            unchecked = new
        version = int(self.oeb.version[0])
+        removed_items_to_ignore = getattr(self.oeb, 'removed_items_to_ignore', ())
        for item in sorted(extras):
+            if item.href in removed_items_to_ignore:
+                continue
            if version >= 2:
                self.logger.warn(
                    'Spine-referenced file %r not in spine' % item.href)