From 428ed899fcd5edb70ffc11bdb3aa63154e87aa75 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 15 Mar 2011 14:51:22 -0600 Subject: [PATCH] Conversion pipeline: When detecting chapters/toc links from HTML normalize spaces and increase maximum TOC title length to 1000 characters from 100 characters. Fixes #9363 (Shortening text on generating TOC.) --- src/calibre/ebooks/oeb/transforms/structure.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py index fc338da692..0d8bdcdf2e 100644 --- a/src/calibre/ebooks/oeb/transforms/structure.py +++ b/src/calibre/ebooks/oeb/transforms/structure.py @@ -81,6 +81,7 @@ class DetectStructure(object): page_break_after = 'display: block; page-break-after: always' for item, elem in self.detected_chapters: text = xml2text(elem).strip() + text = re.sub(r'\s+', ' ', text.strip()) self.log('\tDetected chapter:', text[:50]) if chapter_mark == 'none': continue @@ -137,7 +138,8 @@ class DetectStructure(object): text = elem.get('title', '') if not text: text = elem.get('alt', '') - text = text[:100].strip() + text = re.sub(r'\s+', ' ', text.strip()) + text = text[:1000].strip() id = elem.get('id', 'calibre_toc_%d'%counter) elem.set('id', id) href = '#'.join((item.href, id))