FB2 Output: Remove empty tags

This commit is contained in:
Andrey Efremov 2019-10-07 12:28:18 +07:00
parent 58352097ad
commit d02966423c

View File

@ -73,6 +73,8 @@ class FB2MLizer(object):
return '<?xml version="1.0" encoding="UTF-8"?>\n' + output return '<?xml version="1.0" encoding="UTF-8"?>\n' + output
def clean_text(self, text): def clean_text(self, text):
# Remove empty tags.
text = re.sub(r'(?miu)<(strong|emphasis|strikethrough|sub|sup)>\s*</\1>', '', text)
# Condense empty paragraphs into a line break. # Condense empty paragraphs into a line break.
text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<empty-line/>', text) text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<empty-line/>', text)
# Remove empty paragraphs. # Remove empty paragraphs.