FB2 Output: Keep contents of empty tags if they placed between words

This commit is contained in:
Andrey Efremov 2019-10-07 17:48:17 +07:00
parent 0c1ca17192
commit 8b11947309

View File

@ -73,8 +73,9 @@ class FB2MLizer(object):
return '<?xml version="1.0" encoding="UTF-8"?>\n' + output return '<?xml version="1.0" encoding="UTF-8"?>\n' + output
def clean_text(self, text): def clean_text(self, text):
# Remove empty tags. # Remove pointless tags, but keep their contents.
text = re.sub(r'(?miu)<(strong|emphasis|strikethrough|sub|sup)>\s*</\1>', '', text) text = re.sub(r'(?miu)<(strong|emphasis|strikethrough|sub|sup)>(\s*)</\1>', r'\2', text)
# Condense empty paragraphs into a line break. # Condense empty paragraphs into a line break.
text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<empty-line/>', text) text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<empty-line/>', text)
# Remove empty paragraphs. # Remove empty paragraphs.