Conversion pipeline: Remove empty <b> and <i> tags. Fixes #3564 (PDF to EPUB formatting problems)

This commit is contained in:
Kovid Goyal 2009-09-27 23:43:04 -06:00
parent a1a30a50ea
commit f31692c268

View File

@ -912,11 +912,7 @@ class Manifest(object):
if key == 'lang' or key.endswith('}lang'):
body.attrib.pop(key)
# Remove hyperlinks with no content as they cause rendering
# artifacts in browser based renderers
for a in xpath(data, '//h:a[@href]'):
if a.get('id', None) is None and a.get('name', None) is None \
and len(a) == 0 and not a.text:
def remove_elem(a):
p = a.getparent()
idx = p.index(a) -1
p.remove(a)
@ -930,6 +926,14 @@ class Manifest(object):
p[idx].tail = ''
p[idx].tail += a.tail
# Remove hyperlinks with no content as they cause rendering
# artifacts in browser based renderers
# Also remove empty <b> and <i> tags
for a in xpath(data, '//h:a[@href]|//h:i|//h:b'):
if a.get('id', None) is None and a.get('name', None) is None \
and len(a) == 0 and not a.text:
remove_elem(a)
return data
def _parse_txt(self, data):