diff --git a/src/calibre/ebooks/conversion/plugins/epub_input.py b/src/calibre/ebooks/conversion/plugins/epub_input.py index f021b16124..e13a7990a0 100644 --- a/src/calibre/ebooks/conversion/plugins/epub_input.py +++ b/src/calibre/ebooks/conversion/plugins/epub_input.py @@ -287,10 +287,6 @@ class EPUBInput(InputFormatPlugin): raise DRMError(os.path.basename(path)) self.encrypted_fonts = self._encrypted_font_uris - epub3_nav = opf.epub3_nav - if epub3_nav is not None: - self.convert_epub3_nav(epub3_nav, opf, log, options) - if len(parts) > 1 and parts[0]: delta = '/'.join(parts[:-1])+'/' @@ -304,6 +300,11 @@ class EPUBInput(InputFormatPlugin): f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2 self.removed_cover = f(opf, log) + if self.removed_cover: + self.removed_items_to_ignore = (self.removed_cover,) + epub3_nav = opf.epub3_nav + if epub3_nav is not None: + self.convert_epub3_nav(epub3_nav, opf, log, options) for x in opf.itermanifest(): if x.get('media-type', '') == 'application/x-dtbook+xml': @@ -350,7 +351,7 @@ class EPUBInput(InputFormatPlugin): from lxml import etree from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.polish.parsing import parse - from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize + from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize from calibre.ebooks.oeb.polish.toc import first_child from tempfile import NamedTemporaryFile with lopen(nav_path, 'rb') as f: @@ -401,9 +402,21 @@ class EPUBInput(InputFormatPlugin): ncx_id = opf.add_path_to_manifest(f.name, NCX_MIME) for spine in opf.root.xpath('//*[local-name()="spine"]'): spine.set('toc', ncx_id) - href = os.path.relpath(nav_path).replace(os.sep, '/') - opts.epub3_nav_href = urlnormalize(href) + opts.epub3_nav_href = urlnormalize(os.path.relpath(nav_path).replace(os.sep, '/')) opts.epub3_nav_parsed = root + if getattr(self, 'removed_cover', None): + changed = False + base_path = os.path.dirname(nav_path) + for elem in root.xpath('//*[@href]'): + href, frag = elem.get('href').partition('#')[::2] + link_path = os.path.relpath(os.path.join(base_path, urlunquote(href)), base_path) + abs_href = urlnormalize(link_path) + if abs_href == self.removed_cover: + changed = True + elem.set('data-calibre-removed-titlepage', '1') + if changed: + with open(nav_path, 'wb') as f: + f.write(serialize(root, 'application/xhtml+xml')) def postprocess_book(self, oeb, opts, log): rc = getattr(self, 'removed_cover', None) diff --git a/src/calibre/ebooks/conversion/plugins/epub_output.py b/src/calibre/ebooks/conversion/plugins/epub_output.py index e1523f452d..3741d3c9a4 100644 --- a/src/calibre/ebooks/conversion/plugins/epub_output.py +++ b/src/calibre/ebooks/conversion/plugins/epub_output.py @@ -286,6 +286,7 @@ class EPUBOutput(OutputFormatPlugin): def upgrade_to_epub3(self, tdir, opf): self.log.info('Upgrading to EPUB 3...') from calibre.ebooks.epub import simple_container_xml + from calibre.ebooks.oeb.polish.cover import fix_conversion_titlepage_links_in_nav try: os.mkdir(os.path.join(tdir, 'META-INF')) except EnvironmentError: @@ -296,7 +297,9 @@ class EPUBOutput(OutputFormatPlugin): container = EpubContainer(tdir, self.log) from calibre.ebooks.oeb.polish.upgrade import epub_2_to_3 existing_nav = getattr(self.opts, 'epub3_nav_parsed', None) - epub_2_to_3(container, self.log.info, previous_nav=existing_nav) + nav_href = getattr(self.opts, 'epub3_nav_href', None) + epub_2_to_3(container, self.log.info, previous_nav=(nav_href, existing_nav)) + fix_conversion_titlepage_links_in_nav(container) container.commit() os.remove(f.name) try: diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 8131e997f4..c8e3ab6bf6 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -131,8 +131,8 @@ OptionRecommendation(name='input_profile', 'conversion system information on how to interpret ' 'various information in the input document. For ' 'example resolution dependent lengths (i.e. lengths in ' - 'pixels). Choices are:')+ - ', '.join([x.short_name for x in input_profiles()]) + 'pixels). Choices are:')+ ', '.join([ + x.short_name for x in input_profiles()]) ), OptionRecommendation(name='output_profile', @@ -142,8 +142,8 @@ OptionRecommendation(name='output_profile', 'tells the conversion system how to optimize the ' 'created document for the specified device (such as by resizing images for the device screen size). In some cases, ' 'an output profile can be used to optimize the output for a particular device, but this is rarely necessary. ' - 'Choices are:') + - ', '.join([x.short_name for x in output_profiles()]) + 'Choices are:') + ', '.join([ + x.short_name for x in output_profiles()]) ), OptionRecommendation(name='base_font_size', @@ -897,8 +897,7 @@ OptionRecommendation(name='search_replace', try: val = parse_date(val, assume_utc=x=='timestamp') except: - self.log.exception(_('Failed to parse date/time') + ' ' + - unicode(val)) + self.log.exception(_('Failed to parse date/time') + ' ' + unicode(val)) continue setattr(mi, x, val) @@ -1096,7 +1095,7 @@ OptionRecommendation(name='search_replace', self.oeb = create_oebbook( self.log, self.oeb, self.opts, encoding=self.input_plugin.output_encoding, - for_regex_wizard=self.for_regex_wizard) + for_regex_wizard=self.for_regex_wizard, removed_items=getattr(self.input_plugin, 'removed_items_to_ignore', ())) if self.for_regex_wizard: return self.input_plugin.postprocess_book(self.oeb, self.opts, self.log) @@ -1190,8 +1189,8 @@ OptionRecommendation(name='search_replace', UnsmartenPunctuation()(self.oeb, self.opts) mobi_file_type = getattr(self.opts, 'mobi_file_type', 'old') - needs_old_markup = (self.output_plugin.file_type == 'lit' or - (self.output_plugin.file_type == 'mobi' and mobi_file_type == 'old')) + needs_old_markup = (self.output_plugin.file_type == 'lit' or ( + self.output_plugin.file_type == 'mobi' and mobi_file_type == 'old')) transform_css_rules = () if self.opts.transform_css_rules: transform_css_rules = self.opts.transform_css_rules @@ -1269,7 +1268,7 @@ def set_regex_wizard_callback(f): def create_oebbook(log, path_or_stream, opts, reader=None, - encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None): + encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None, removed_items=()): ''' Create an OEBBook. ''' @@ -1285,6 +1284,7 @@ def create_oebbook(log, path_or_stream, opts, reader=None, oeb = specialize(oeb) or oeb # Read OEB Book into OEBBook log('Parsing all content...') + oeb.removed_items_to_ignore = removed_items if reader is None: from calibre.ebooks.oeb.reader import OEBReader reader = OEBReader diff --git a/src/calibre/ebooks/oeb/polish/cover.py b/src/calibre/ebooks/oeb/polish/cover.py index d3cdf2ec1a..cb83820611 100644 --- a/src/calibre/ebooks/oeb/polish/cover.py +++ b/src/calibre/ebooks/oeb/polish/cover.py @@ -269,6 +269,20 @@ def find_cover_page(container): return landmark['dest'] +def fix_conversion_titlepage_links_in_nav(container): + from calibre.ebooks.oeb.polish.toc import find_existing_nav_toc + cover_page_name = find_cover_page(container) + if not cover_page_name: + return + nav_page_name = find_existing_nav_toc(container) + if not nav_page_name: + return + for elem in container.parsed(nav_page_name).xpath('//*[@data-calibre-removed-titlepage]'): + elem.attrib.pop('data-calibre-removed-titlepage') + elem.set('href', container.name_to_href(cover_page_name, nav_page_name)) + container.dirty(nav_page_name) + + def find_cover_image_in_page(container, cover_page): root = container.parsed(cover_page) body = XPath('//h:body')(root) diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index 68da36ebbe..d6a6fadb04 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -658,12 +658,17 @@ def ensure_single_nav_of_type(root, ntype='toc'): def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None): from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree tocname = find_existing_nav_toc(container) + if previous_nav is not None: + nav_name = container.href_to_name(previous_nav[0]) + if nav_name and container.exists(nav_name): + tocname = nav_name + container.apply_unique_properties(tocname, 'nav') if tocname is None: item = container.generate_item('nav.xhtml', id_prefix='nav') item.set('properties', 'nav') tocname = container.href_to_name(item.get('href'), base=container.opf_name) if previous_nav is not None: - root = previous_nav + root = previous_nav[1] else: root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8')) container.replace(tocname, root) diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index bf005087f2..dd145049d6 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -194,8 +194,7 @@ class OEBReader(object): new = set() for item in unchecked: data = None - if (item.media_type in cdoc or - item.media_type[-4:] in ('/xml', '+xml')): + if (item.media_type in cdoc or item.media_type[-4:] in ('/xml', '+xml')): try: data = item.data except: @@ -206,8 +205,7 @@ class OEBReader(object): if data is None: continue - if (item.media_type in OEB_DOCS or - item.media_type[-4:] in ('/xml', '+xml')): + if (item.media_type in OEB_DOCS or item.media_type[-4:] in ('/xml', '+xml')): hrefs = [r[2] for r in iterlinks(data)] for href in hrefs: if isinstance(href, bytes): @@ -320,7 +318,10 @@ class OEBReader(object): extras.update(new) unchecked = new version = int(self.oeb.version[0]) + removed_items_to_ignore = getattr(self.oeb, 'removed_items_to_ignore', ()) for item in sorted(extras): + if item.href in removed_items_to_ignore: + continue if version >= 2: self.logger.warn( 'Spine-referenced file %r not in spine' % item.href)