Conversion pipeline: When detecting chapters/toc links from HTML normalize spaces and increase maximum TOC title length to 1000 characters from 100 characters. Fixes #9363 (Shortening text on generating TOC.)

This commit is contained in:
Kovid Goyal 2011-03-15 14:51:22 -06:00
parent 2f4876f474
commit 428ed899fc

View File

@ -81,6 +81,7 @@ class DetectStructure(object):
page_break_after = 'display: block; page-break-after: always'
for item, elem in self.detected_chapters:
text = xml2text(elem).strip()
text = re.sub(r'\s+', ' ', text.strip())
self.log('\tDetected chapter:', text[:50])
if chapter_mark == 'none':
continue
@ -137,7 +138,8 @@ class DetectStructure(object):
text = elem.get('title', '')
if not text:
text = elem.get('alt', '')
text = text[:100].strip()
text = re.sub(r'\s+', ' ', text.strip())
text = text[:1000].strip()
id = elem.get('id', 'calibre_toc_%d'%counter)
elem.set('id', id)
href = '#'.join((item.href, id))