From 895bd5db70294d45daa99230c4b482664e470cb1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 14 Feb 2010 21:13:47 -0700 Subject: [PATCH] EPUB Output: Remove invalid fragment identifiers from the NCX TOC to keep Adobe Digital Editions from bursting into big wet tears --- src/calibre/ebooks/epub/output.py | 12 ++++++++++++ src/calibre/ebooks/oeb/base.py | 7 +++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index 6e381d5237..c6616d4591 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -258,6 +258,18 @@ class EPUBOutput(OutputFormatPlugin): ''' from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename + # ADE cries big wet tears when it encounters an invalid fragment + # identifier in the NCX toc. + frag_pat = re.compile(r'[-A-Za-z0-9_:.]+') + for node in self.oeb.toc.iter(): + href = getattr(node, 'href', None) + if hasattr(href, 'partition'): + base, _, frag = href.partition('#') + if frag and frag_pat.match(frag) is None: + self.log.warn( + 'Removing invalid fragment identifier %r from TOC'%frag) + node.href = base + for x in self.oeb.spine: root = x.data body = XPath('//h:body')(root) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index b885f08962..0d8eed4692 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1578,14 +1578,17 @@ class TOC(object): parent = etree.Element(NCX('navMap')) for node in self.nodes: id = node.id or unicode(uuid.uuid4()) - attrib = {'id': id, 'playOrder': str(node.play_order)} + po = node.play_order + if po == 0: + po = 1 + attrib = {'id': id, 'playOrder': str(po)} if node.klass: attrib['class'] = node.klass point = element(parent, NCX('navPoint'), attrib=attrib) label = etree.SubElement(point, NCX('navLabel')) title = node.title if title: - title = re.sub(r'\s', ' ', title) + title = re.sub(r'\s+', ' ', title) element(label, NCX('text')).text = title element(point, NCX('content'), src=urlunquote(node.href)) node.to_ncx(point)