mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion pipeline: When detecting chapters/toc links from HTML normalize spaces and increase maximum TOC title length to 1000 characters from 100 characters. Fixes #9363 (Shortening text on generating TOC.)
This commit is contained in:
parent
2f4876f474
commit
428ed899fc
@ -81,6 +81,7 @@ class DetectStructure(object):
|
|||||||
page_break_after = 'display: block; page-break-after: always'
|
page_break_after = 'display: block; page-break-after: always'
|
||||||
for item, elem in self.detected_chapters:
|
for item, elem in self.detected_chapters:
|
||||||
text = xml2text(elem).strip()
|
text = xml2text(elem).strip()
|
||||||
|
text = re.sub(r'\s+', ' ', text.strip())
|
||||||
self.log('\tDetected chapter:', text[:50])
|
self.log('\tDetected chapter:', text[:50])
|
||||||
if chapter_mark == 'none':
|
if chapter_mark == 'none':
|
||||||
continue
|
continue
|
||||||
@ -137,7 +138,8 @@ class DetectStructure(object):
|
|||||||
text = elem.get('title', '')
|
text = elem.get('title', '')
|
||||||
if not text:
|
if not text:
|
||||||
text = elem.get('alt', '')
|
text = elem.get('alt', '')
|
||||||
text = text[:100].strip()
|
text = re.sub(r'\s+', ' ', text.strip())
|
||||||
|
text = text[:1000].strip()
|
||||||
id = elem.get('id', 'calibre_toc_%d'%counter)
|
id = elem.get('id', 'calibre_toc_%d'%counter)
|
||||||
elem.set('id', id)
|
elem.set('id', id)
|
||||||
href = '#'.join((item.href, id))
|
href = '#'.join((item.href, id))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user