mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
EPUB3 Input: Fix titlepage being referred to in the nav causing two titlepage entries in the final book.
This commit is contained in:
parent
09ffa06cc4
commit
1b89462d73
@ -287,10 +287,6 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
raise DRMError(os.path.basename(path))
|
raise DRMError(os.path.basename(path))
|
||||||
self.encrypted_fonts = self._encrypted_font_uris
|
self.encrypted_fonts = self._encrypted_font_uris
|
||||||
|
|
||||||
epub3_nav = opf.epub3_nav
|
|
||||||
if epub3_nav is not None:
|
|
||||||
self.convert_epub3_nav(epub3_nav, opf, log, options)
|
|
||||||
|
|
||||||
if len(parts) > 1 and parts[0]:
|
if len(parts) > 1 and parts[0]:
|
||||||
delta = '/'.join(parts[:-1])+'/'
|
delta = '/'.join(parts[:-1])+'/'
|
||||||
|
|
||||||
@ -304,6 +300,11 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
|
|
||||||
f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
|
f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
|
||||||
self.removed_cover = f(opf, log)
|
self.removed_cover = f(opf, log)
|
||||||
|
if self.removed_cover:
|
||||||
|
self.removed_items_to_ignore = (self.removed_cover,)
|
||||||
|
epub3_nav = opf.epub3_nav
|
||||||
|
if epub3_nav is not None:
|
||||||
|
self.convert_epub3_nav(epub3_nav, opf, log, options)
|
||||||
|
|
||||||
for x in opf.itermanifest():
|
for x in opf.itermanifest():
|
||||||
if x.get('media-type', '') == 'application/x-dtbook+xml':
|
if x.get('media-type', '') == 'application/x-dtbook+xml':
|
||||||
@ -350,7 +351,7 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.oeb.polish.parsing import parse
|
from calibre.ebooks.oeb.polish.parsing import parse
|
||||||
from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize
|
from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize
|
||||||
from calibre.ebooks.oeb.polish.toc import first_child
|
from calibre.ebooks.oeb.polish.toc import first_child
|
||||||
from tempfile import NamedTemporaryFile
|
from tempfile import NamedTemporaryFile
|
||||||
with lopen(nav_path, 'rb') as f:
|
with lopen(nav_path, 'rb') as f:
|
||||||
@ -401,9 +402,21 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
ncx_id = opf.add_path_to_manifest(f.name, NCX_MIME)
|
ncx_id = opf.add_path_to_manifest(f.name, NCX_MIME)
|
||||||
for spine in opf.root.xpath('//*[local-name()="spine"]'):
|
for spine in opf.root.xpath('//*[local-name()="spine"]'):
|
||||||
spine.set('toc', ncx_id)
|
spine.set('toc', ncx_id)
|
||||||
href = os.path.relpath(nav_path).replace(os.sep, '/')
|
opts.epub3_nav_href = urlnormalize(os.path.relpath(nav_path).replace(os.sep, '/'))
|
||||||
opts.epub3_nav_href = urlnormalize(href)
|
|
||||||
opts.epub3_nav_parsed = root
|
opts.epub3_nav_parsed = root
|
||||||
|
if getattr(self, 'removed_cover', None):
|
||||||
|
changed = False
|
||||||
|
base_path = os.path.dirname(nav_path)
|
||||||
|
for elem in root.xpath('//*[@href]'):
|
||||||
|
href, frag = elem.get('href').partition('#')[::2]
|
||||||
|
link_path = os.path.relpath(os.path.join(base_path, urlunquote(href)), base_path)
|
||||||
|
abs_href = urlnormalize(link_path)
|
||||||
|
if abs_href == self.removed_cover:
|
||||||
|
changed = True
|
||||||
|
elem.set('data-calibre-removed-titlepage', '1')
|
||||||
|
if changed:
|
||||||
|
with open(nav_path, 'wb') as f:
|
||||||
|
f.write(serialize(root, 'application/xhtml+xml'))
|
||||||
|
|
||||||
def postprocess_book(self, oeb, opts, log):
|
def postprocess_book(self, oeb, opts, log):
|
||||||
rc = getattr(self, 'removed_cover', None)
|
rc = getattr(self, 'removed_cover', None)
|
||||||
|
@ -286,6 +286,7 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
def upgrade_to_epub3(self, tdir, opf):
|
def upgrade_to_epub3(self, tdir, opf):
|
||||||
self.log.info('Upgrading to EPUB 3...')
|
self.log.info('Upgrading to EPUB 3...')
|
||||||
from calibre.ebooks.epub import simple_container_xml
|
from calibre.ebooks.epub import simple_container_xml
|
||||||
|
from calibre.ebooks.oeb.polish.cover import fix_conversion_titlepage_links_in_nav
|
||||||
try:
|
try:
|
||||||
os.mkdir(os.path.join(tdir, 'META-INF'))
|
os.mkdir(os.path.join(tdir, 'META-INF'))
|
||||||
except EnvironmentError:
|
except EnvironmentError:
|
||||||
@ -296,7 +297,9 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
container = EpubContainer(tdir, self.log)
|
container = EpubContainer(tdir, self.log)
|
||||||
from calibre.ebooks.oeb.polish.upgrade import epub_2_to_3
|
from calibre.ebooks.oeb.polish.upgrade import epub_2_to_3
|
||||||
existing_nav = getattr(self.opts, 'epub3_nav_parsed', None)
|
existing_nav = getattr(self.opts, 'epub3_nav_parsed', None)
|
||||||
epub_2_to_3(container, self.log.info, previous_nav=existing_nav)
|
nav_href = getattr(self.opts, 'epub3_nav_href', None)
|
||||||
|
epub_2_to_3(container, self.log.info, previous_nav=(nav_href, existing_nav))
|
||||||
|
fix_conversion_titlepage_links_in_nav(container)
|
||||||
container.commit()
|
container.commit()
|
||||||
os.remove(f.name)
|
os.remove(f.name)
|
||||||
try:
|
try:
|
||||||
|
@ -131,8 +131,8 @@ OptionRecommendation(name='input_profile',
|
|||||||
'conversion system information on how to interpret '
|
'conversion system information on how to interpret '
|
||||||
'various information in the input document. For '
|
'various information in the input document. For '
|
||||||
'example resolution dependent lengths (i.e. lengths in '
|
'example resolution dependent lengths (i.e. lengths in '
|
||||||
'pixels). Choices are:')+
|
'pixels). Choices are:')+ ', '.join([
|
||||||
', '.join([x.short_name for x in input_profiles()])
|
x.short_name for x in input_profiles()])
|
||||||
),
|
),
|
||||||
|
|
||||||
OptionRecommendation(name='output_profile',
|
OptionRecommendation(name='output_profile',
|
||||||
@ -142,8 +142,8 @@ OptionRecommendation(name='output_profile',
|
|||||||
'tells the conversion system how to optimize the '
|
'tells the conversion system how to optimize the '
|
||||||
'created document for the specified device (such as by resizing images for the device screen size). In some cases, '
|
'created document for the specified device (such as by resizing images for the device screen size). In some cases, '
|
||||||
'an output profile can be used to optimize the output for a particular device, but this is rarely necessary. '
|
'an output profile can be used to optimize the output for a particular device, but this is rarely necessary. '
|
||||||
'Choices are:') +
|
'Choices are:') + ', '.join([
|
||||||
', '.join([x.short_name for x in output_profiles()])
|
x.short_name for x in output_profiles()])
|
||||||
),
|
),
|
||||||
|
|
||||||
OptionRecommendation(name='base_font_size',
|
OptionRecommendation(name='base_font_size',
|
||||||
@ -897,8 +897,7 @@ OptionRecommendation(name='search_replace',
|
|||||||
try:
|
try:
|
||||||
val = parse_date(val, assume_utc=x=='timestamp')
|
val = parse_date(val, assume_utc=x=='timestamp')
|
||||||
except:
|
except:
|
||||||
self.log.exception(_('Failed to parse date/time') + ' ' +
|
self.log.exception(_('Failed to parse date/time') + ' ' + unicode(val))
|
||||||
unicode(val))
|
|
||||||
continue
|
continue
|
||||||
setattr(mi, x, val)
|
setattr(mi, x, val)
|
||||||
|
|
||||||
@ -1096,7 +1095,7 @@ OptionRecommendation(name='search_replace',
|
|||||||
self.oeb = create_oebbook(
|
self.oeb = create_oebbook(
|
||||||
self.log, self.oeb, self.opts,
|
self.log, self.oeb, self.opts,
|
||||||
encoding=self.input_plugin.output_encoding,
|
encoding=self.input_plugin.output_encoding,
|
||||||
for_regex_wizard=self.for_regex_wizard)
|
for_regex_wizard=self.for_regex_wizard, removed_items=getattr(self.input_plugin, 'removed_items_to_ignore', ()))
|
||||||
if self.for_regex_wizard:
|
if self.for_regex_wizard:
|
||||||
return
|
return
|
||||||
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
|
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
|
||||||
@ -1190,8 +1189,8 @@ OptionRecommendation(name='search_replace',
|
|||||||
UnsmartenPunctuation()(self.oeb, self.opts)
|
UnsmartenPunctuation()(self.oeb, self.opts)
|
||||||
|
|
||||||
mobi_file_type = getattr(self.opts, 'mobi_file_type', 'old')
|
mobi_file_type = getattr(self.opts, 'mobi_file_type', 'old')
|
||||||
needs_old_markup = (self.output_plugin.file_type == 'lit' or
|
needs_old_markup = (self.output_plugin.file_type == 'lit' or (
|
||||||
(self.output_plugin.file_type == 'mobi' and mobi_file_type == 'old'))
|
self.output_plugin.file_type == 'mobi' and mobi_file_type == 'old'))
|
||||||
transform_css_rules = ()
|
transform_css_rules = ()
|
||||||
if self.opts.transform_css_rules:
|
if self.opts.transform_css_rules:
|
||||||
transform_css_rules = self.opts.transform_css_rules
|
transform_css_rules = self.opts.transform_css_rules
|
||||||
@ -1269,7 +1268,7 @@ def set_regex_wizard_callback(f):
|
|||||||
|
|
||||||
|
|
||||||
def create_oebbook(log, path_or_stream, opts, reader=None,
|
def create_oebbook(log, path_or_stream, opts, reader=None,
|
||||||
encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None):
|
encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None, removed_items=()):
|
||||||
'''
|
'''
|
||||||
Create an OEBBook.
|
Create an OEBBook.
|
||||||
'''
|
'''
|
||||||
@ -1285,6 +1284,7 @@ def create_oebbook(log, path_or_stream, opts, reader=None,
|
|||||||
oeb = specialize(oeb) or oeb
|
oeb = specialize(oeb) or oeb
|
||||||
# Read OEB Book into OEBBook
|
# Read OEB Book into OEBBook
|
||||||
log('Parsing all content...')
|
log('Parsing all content...')
|
||||||
|
oeb.removed_items_to_ignore = removed_items
|
||||||
if reader is None:
|
if reader is None:
|
||||||
from calibre.ebooks.oeb.reader import OEBReader
|
from calibre.ebooks.oeb.reader import OEBReader
|
||||||
reader = OEBReader
|
reader = OEBReader
|
||||||
|
@ -269,6 +269,20 @@ def find_cover_page(container):
|
|||||||
return landmark['dest']
|
return landmark['dest']
|
||||||
|
|
||||||
|
|
||||||
|
def fix_conversion_titlepage_links_in_nav(container):
|
||||||
|
from calibre.ebooks.oeb.polish.toc import find_existing_nav_toc
|
||||||
|
cover_page_name = find_cover_page(container)
|
||||||
|
if not cover_page_name:
|
||||||
|
return
|
||||||
|
nav_page_name = find_existing_nav_toc(container)
|
||||||
|
if not nav_page_name:
|
||||||
|
return
|
||||||
|
for elem in container.parsed(nav_page_name).xpath('//*[@data-calibre-removed-titlepage]'):
|
||||||
|
elem.attrib.pop('data-calibre-removed-titlepage')
|
||||||
|
elem.set('href', container.name_to_href(cover_page_name, nav_page_name))
|
||||||
|
container.dirty(nav_page_name)
|
||||||
|
|
||||||
|
|
||||||
def find_cover_image_in_page(container, cover_page):
|
def find_cover_image_in_page(container, cover_page):
|
||||||
root = container.parsed(cover_page)
|
root = container.parsed(cover_page)
|
||||||
body = XPath('//h:body')(root)
|
body = XPath('//h:body')(root)
|
||||||
|
@ -658,12 +658,17 @@ def ensure_single_nav_of_type(root, ntype='toc'):
|
|||||||
def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None):
|
def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None):
|
||||||
from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree
|
from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree
|
||||||
tocname = find_existing_nav_toc(container)
|
tocname = find_existing_nav_toc(container)
|
||||||
|
if previous_nav is not None:
|
||||||
|
nav_name = container.href_to_name(previous_nav[0])
|
||||||
|
if nav_name and container.exists(nav_name):
|
||||||
|
tocname = nav_name
|
||||||
|
container.apply_unique_properties(tocname, 'nav')
|
||||||
if tocname is None:
|
if tocname is None:
|
||||||
item = container.generate_item('nav.xhtml', id_prefix='nav')
|
item = container.generate_item('nav.xhtml', id_prefix='nav')
|
||||||
item.set('properties', 'nav')
|
item.set('properties', 'nav')
|
||||||
tocname = container.href_to_name(item.get('href'), base=container.opf_name)
|
tocname = container.href_to_name(item.get('href'), base=container.opf_name)
|
||||||
if previous_nav is not None:
|
if previous_nav is not None:
|
||||||
root = previous_nav
|
root = previous_nav[1]
|
||||||
else:
|
else:
|
||||||
root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8'))
|
root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8'))
|
||||||
container.replace(tocname, root)
|
container.replace(tocname, root)
|
||||||
|
@ -194,8 +194,7 @@ class OEBReader(object):
|
|||||||
new = set()
|
new = set()
|
||||||
for item in unchecked:
|
for item in unchecked:
|
||||||
data = None
|
data = None
|
||||||
if (item.media_type in cdoc or
|
if (item.media_type in cdoc or item.media_type[-4:] in ('/xml', '+xml')):
|
||||||
item.media_type[-4:] in ('/xml', '+xml')):
|
|
||||||
try:
|
try:
|
||||||
data = item.data
|
data = item.data
|
||||||
except:
|
except:
|
||||||
@ -206,8 +205,7 @@ class OEBReader(object):
|
|||||||
if data is None:
|
if data is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if (item.media_type in OEB_DOCS or
|
if (item.media_type in OEB_DOCS or item.media_type[-4:] in ('/xml', '+xml')):
|
||||||
item.media_type[-4:] in ('/xml', '+xml')):
|
|
||||||
hrefs = [r[2] for r in iterlinks(data)]
|
hrefs = [r[2] for r in iterlinks(data)]
|
||||||
for href in hrefs:
|
for href in hrefs:
|
||||||
if isinstance(href, bytes):
|
if isinstance(href, bytes):
|
||||||
@ -320,7 +318,10 @@ class OEBReader(object):
|
|||||||
extras.update(new)
|
extras.update(new)
|
||||||
unchecked = new
|
unchecked = new
|
||||||
version = int(self.oeb.version[0])
|
version = int(self.oeb.version[0])
|
||||||
|
removed_items_to_ignore = getattr(self.oeb, 'removed_items_to_ignore', ())
|
||||||
for item in sorted(extras):
|
for item in sorted(extras):
|
||||||
|
if item.href in removed_items_to_ignore:
|
||||||
|
continue
|
||||||
if version >= 2:
|
if version >= 2:
|
||||||
self.logger.warn(
|
self.logger.warn(
|
||||||
'Spine-referenced file %r not in spine' % item.href)
|
'Spine-referenced file %r not in spine' % item.href)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user