From 895bd5db70294d45daa99230c4b482664e470cb1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 14 Feb 2010 21:13:47 -0700
Subject: [PATCH] EPUB Output: Remove invalid fragment identifiers from the NCX
 TOC to keep Adobe Digital Editions from bursting into big wet tears

---
 src/calibre/ebooks/epub/output.py | 12 ++++++++++++
 src/calibre/ebooks/oeb/base.py    |  7 +++++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py
index 6e381d5237..c6616d4591 100644
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@@ -258,6 +258,18 @@ class EPUBOutput(OutputFormatPlugin):
         '''
         from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename
 
+        # ADE cries big wet tears when it encounters an invalid fragment
+        # identifier in the NCX toc.
+        frag_pat = re.compile(r'[-A-Za-z0-9_:.]+')
+        for node in self.oeb.toc.iter():
+            href = getattr(node, 'href', None)
+            if hasattr(href, 'partition'):
+                base, _, frag = href.partition('#')
+                if frag and frag_pat.match(frag) is None:
+                    self.log.warn(
+                            'Removing invalid fragment identifier %r from TOC'%frag)
+                    node.href = base
+
         for x in self.oeb.spine:
             root = x.data
             body = XPath('//h:body')(root)
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index b885f08962..0d8eed4692 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -1578,14 +1578,17 @@ class TOC(object):
             parent = etree.Element(NCX('navMap'))
         for node in self.nodes:
             id = node.id or unicode(uuid.uuid4())
-            attrib = {'id': id, 'playOrder': str(node.play_order)}
+            po = node.play_order
+            if po == 0:
+                po = 1
+            attrib = {'id': id, 'playOrder': str(po)}
             if node.klass:
                 attrib['class'] = node.klass
             point = element(parent, NCX('navPoint'), attrib=attrib)
             label = etree.SubElement(point, NCX('navLabel'))
             title = node.title
             if title:
-                title = re.sub(r'\s', ' ', title)
+                title = re.sub(r'\s+', ' ', title)
             element(label, NCX('text')).text = title
             element(point, NCX('content'), src=urlunquote(node.href))
             node.to_ncx(point)