EPUB Output: Remove invalid fragment identifiers from the NCX TOC to keep Adobe Digital Editions from bursting into big wet tears

This commit is contained in:
Kovid Goyal 2010-02-14 21:13:47 -07:00
parent baccd08c49
commit 895bd5db70
2 changed files with 17 additions and 2 deletions

View File

@ -258,6 +258,18 @@ class EPUBOutput(OutputFormatPlugin):
'''
from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename
# ADE cries big wet tears when it encounters an invalid fragment
# identifier in the NCX toc.
frag_pat = re.compile(r'[-A-Za-z0-9_:.]+')
for node in self.oeb.toc.iter():
href = getattr(node, 'href', None)
if hasattr(href, 'partition'):
base, _, frag = href.partition('#')
if frag and frag_pat.match(frag) is None:
self.log.warn(
'Removing invalid fragment identifier %r from TOC'%frag)
node.href = base
for x in self.oeb.spine:
root = x.data
body = XPath('//h:body')(root)

View File

@ -1578,14 +1578,17 @@ class TOC(object):
parent = etree.Element(NCX('navMap'))
for node in self.nodes:
id = node.id or unicode(uuid.uuid4())
attrib = {'id': id, 'playOrder': str(node.play_order)}
po = node.play_order
if po == 0:
po = 1
attrib = {'id': id, 'playOrder': str(po)}
if node.klass:
attrib['class'] = node.klass
point = element(parent, NCX('navPoint'), attrib=attrib)
label = etree.SubElement(point, NCX('navLabel'))
title = node.title
if title:
title = re.sub(r'\s', ' ', title)
title = re.sub(r'\s+', ' ', title)
element(label, NCX('text')).text = title
element(point, NCX('content'), src=urlunquote(node.href))
node.to_ncx(point)