mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
LRF Output:Improve extraction of text from tags for TOC entries and anchors. Fixes #1613 (Converting MS Lit to Lrf problems)
This commit is contained in:
parent
d579de1029
commit
ab0c2accef
@ -580,20 +580,20 @@ class HTMLConverter(object, LoggingInterface):
|
|||||||
if (css.has_key('display') and css['display'].lower() == 'none') or \
|
if (css.has_key('display') and css['display'].lower() == 'none') or \
|
||||||
(css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
|
(css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
|
||||||
return ''
|
return ''
|
||||||
text = u''
|
text, alt_text = u'', u''
|
||||||
for c in tag.contents:
|
for c in tag.contents:
|
||||||
if limit != None and len(text) > limit:
|
if limit != None and len(text) > limit:
|
||||||
break
|
break
|
||||||
if isinstance(c, HTMLConverter.IGNORED_TAGS):
|
if isinstance(c, HTMLConverter.IGNORED_TAGS):
|
||||||
return u''
|
continue
|
||||||
if isinstance(c, NavigableString):
|
if isinstance(c, NavigableString):
|
||||||
text += unicode(c)
|
text += unicode(c)
|
||||||
elif isinstance(c, Tag):
|
elif isinstance(c, Tag):
|
||||||
if c.name.lower() == 'img' and c.has_key('alt'):
|
if c.name.lower() == 'img' and c.has_key('alt'):
|
||||||
text += c['alt']
|
alt_text += c['alt']
|
||||||
return text
|
continue
|
||||||
text += self.get_text(c)
|
text += self.get_text(c)
|
||||||
return text
|
return text if text.strip() else alt_text
|
||||||
|
|
||||||
def process_links(self):
|
def process_links(self):
|
||||||
def add_toc_entry(text, target):
|
def add_toc_entry(text, target):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user