LRF Output:Improve extraction of text from tags for TOC entries and anchors. Fixes #1613 (Converting MS Lit to Lrf problems)

2025-08-30 23:00:21 -04:00 · 2009-01-14 00:08:42 -08:00 · 2009-01-14 00:08:42 -08:00 · ab0c2accef
commit ab0c2accef
parent d579de1029
1 changed files with 5 additions and 5 deletions
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -580,20 +580,20 @@ class HTMLConverter(object, LoggingInterface):
        if (css.has_key('display') and css['display'].lower() == 'none') or \
           (css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
            return ''
-        text = u''
+        text, alt_text = u'', u''
        for c in tag.contents:
            if limit != None and len(text) > limit:
                break
            if isinstance(c, HTMLConverter.IGNORED_TAGS):
-                return u''
+                continue
            if isinstance(c, NavigableString):
                text += unicode(c)                
            elif isinstance(c, Tag):
                if c.name.lower() == 'img' and c.has_key('alt'):
-                    text += c['alt']
+                    alt_text += c['alt']
-                    return text
+                    continue
                text += self.get_text(c)
-        return text
+        return text if text.strip() else alt_text
    def process_links(self):
        def add_toc_entry(text, target):