FB2 Output: Revert some changes in regular expressions due to different behavior

This commit is contained in:
Andrey Efremov 2019-10-08 15:57:13 +07:00
parent 162d86429d
commit d7a4e10c60

View File

@ -81,22 +81,24 @@ class FB2MLizer(object):
# Condense empty paragraphs into a line break. # Condense empty paragraphs into a line break.
text = re.sub(r'(?mu)(?:<p></p>\s*){3,}', '<empty-line/>', text) text = re.sub(r'(?mu)(?:<p></p>\s*){3,}', '<empty-line/>', text)
# Remove empty paragraphs. # Remove empty paragraphs.
text = re.sub(r'(?mu)<p></p>', '', text) text = re.sub(r'(?mu)<p></p>\s*', '', text)
# Put the paragraph following a paragraph on a separate line. # Put the paragraph following a paragraph on a separate line.
text = re.sub(r'(?mu)</p>\s*<p>', '</p>\n<p>', text) text = re.sub(r'(?mu)</p>\s*<p>', '</p>\n<p>', text)
# Clean up title endings. # Clean up title endings.
text = re.sub(r'(?mu)\s+</title>', '</title>', text) text = re.sub(r'(?mu)\s+</title>', '</title>', text)
# Remove empty title elements. # Remove empty title elements.
text = re.sub(r'(?mu)<title></title>', '', text) text = re.sub(r'(?mu)<title></title>\s*', '', text)
# Put the paragraph following a title on a separate line. # Put the paragraph following a title on a separate line.
text = re.sub(r'(?mu)</title>\s*<p>', '</title>\n<p>', text) text = re.sub(r'(?mu)</title>\s*<p>', '</title>\n<p>', text)
# Remove empty sections. # Remove empty sections.
text = re.sub(r'(?mu)<section>\s*</section>', '', text) text = re.sub(r'(?mu)<section>\s*</section>', '', text)
# Clean up sections starts and ends. # Clean up sections starts and ends.
text = re.sub(r'(?mu)\s*<section>\s*', '\n<section>\n', text) text = re.sub(r'(?mu)\s*<section>', '\n<section>', text)
text = re.sub(r'(?mu)\s*</section>\s*', '\n</section>\n', text) text = re.sub(r'(?mu)<section>\s*', '<section>\n', text)
text = re.sub(r'(?mu)\s*</section>', '\n</section>', text)
text = re.sub(r'(?mu)</section>\s*', '</section>\n', text)
# Put the section following a section on a separate line. # Put the section following a section on a separate line.
text = re.sub(r'(?mu)</section>\s*<section>', '</section>\n<section>', text) text = re.sub(r'(?mu)</section>\s*<section>', '</section>\n<section>', text)