Conversion pipeline: Remove empty <b> and <i> tags. Fixes #3564 (PDF to EPUB formatting problems)

2025-07-09 03:04:10 -04:00 · 2009-09-27 23:43:04 -06:00 · 2009-09-27 23:43:04 -06:00 · f31692c268
commit f31692c268
parent a1a30a50ea
1 changed files with 17 additions and 13 deletions
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -912,11 +912,7 @@ class Manifest(object):
                if key == 'lang' or key.endswith('}lang'):
                    body.attrib.pop(key)

-            # Remove hyperlinks with no content as they cause rendering
-            # artifacts in browser based renderers
-            for a in xpath(data, '//h:a[@href]'):
-                if a.get('id', None) is None and a.get('name', None) is None \
-                        and len(a) == 0 and not a.text:
+            def remove_elem(a):
                p = a.getparent()
                idx = p.index(a) -1
                p.remove(a)
@ -930,6 +926,14 @@ class Manifest(object):
                            p[idx].tail = ''
                        p[idx].tail += a.tail

+            # Remove hyperlinks with no content as they cause rendering
+            # artifacts in browser based renderers
+            # Also remove empty <b> and <i> tags
+            for a in xpath(data, '//h:a[@href]|//h:i|//h:b'):
+                if a.get('id', None) is None and a.get('name', None) is None \
+                        and len(a) == 0 and not a.text:
+                    remove_elem(a)
+
            return data

        def _parse_txt(self, data):