From f31692c268cb8c02beaacd9dc2ac999d813971a6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 27 Sep 2009 23:43:04 -0600
Subject: [PATCH] Conversion pipeline: Remove empty <b> and <i> tags. Fixes
 #3564 (PDF to EPUB formatting problems)

---
 src/calibre/ebooks/oeb/base.py | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 5ee829c8f4..5e3d2296ae 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -912,23 +912,27 @@ class Manifest(object):
                 if key == 'lang' or key.endswith('}lang'):
                     body.attrib.pop(key)
 
+            def remove_elem(a):
+                p = a.getparent()
+                idx = p.index(a) -1
+                p.remove(a)
+                if a.tail:
+                    if idx <= 0:
+                        if p.text is None:
+                            p.text = ''
+                        p.text += a.tail
+                    else:
+                        if p[idx].tail is None:
+                            p[idx].tail = ''
+                        p[idx].tail += a.tail
+
             # Remove hyperlinks with no content as they cause rendering
             # artifacts in browser based renderers
-            for a in xpath(data, '//h:a[@href]'):
+            # Also remove empty <b> and <i> tags
+            for a in xpath(data, '//h:a[@href]|//h:i|//h:b'):
                 if a.get('id', None) is None and a.get('name', None) is None \
                         and len(a) == 0 and not a.text:
-                    p = a.getparent()
-                    idx = p.index(a) -1
-                    p.remove(a)
-                    if a.tail:
-                        if idx <= 0:
-                            if p.text is None:
-                                p.text = ''
-                            p.text += a.tail
-                        else:
-                            if p[idx].tail is None:
-                                p[idx].tail = ''
-                            p[idx].tail += a.tail
+                    remove_elem(a)
 
             return data