From 428ed899fcd5edb70ffc11bdb3aa63154e87aa75 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 15 Mar 2011 14:51:22 -0600
Subject: [PATCH] Conversion pipeline: When detecting chapters/toc links from
 HTML normalize spaces and increase maximum TOC title length to 1000
 characters from 100 characters. Fixes #9363 (Shortening text on generating
 TOC.)

---
 src/calibre/ebooks/oeb/transforms/structure.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py
index fc338da692..0d8bdcdf2e 100644
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@@ -81,6 +81,7 @@ class DetectStructure(object):
             page_break_after = 'display: block; page-break-after: always'
             for item, elem in self.detected_chapters:
                 text = xml2text(elem).strip()
+                text = re.sub(r'\s+', ' ', text.strip())
                 self.log('\tDetected chapter:', text[:50])
                 if chapter_mark == 'none':
                     continue
@@ -137,7 +138,8 @@ class DetectStructure(object):
             text = elem.get('title', '')
         if not text:
             text = elem.get('alt', '')
-        text = text[:100].strip()
+        text = re.sub(r'\s+', ' ', text.strip())
+        text = text[:1000].strip()
         id = elem.get('id', 'calibre_toc_%d'%counter)
         elem.set('id', id)
         href = '#'.join((item.href, id))