mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
py3: make pmlz output work
in python3, the re module is more picky about what arguments are used with it, and invalid escapes do not fall back on being treated as string literals, but raise an error. Use raw strings to ensure that the escaped backslashes are preserved all the way to the regular expressions themselves.
This commit is contained in:
parent
8e368c0d46
commit
1ed017fabd
@ -174,8 +174,8 @@ class PMLMLizer(object):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def prepare_text(self, text):
|
def prepare_text(self, text):
|
||||||
# Replace empty paragraphs with \c pml codes used to denote emtpy lines.
|
# Replace empty paragraphs with \c pml codes used to denote empty lines.
|
||||||
text = re.sub(unicode_type(r'(?<=</p>)\s*<p[^>]*>[\xc2\xa0\s]*</p>'), '\\c\n\\c', text)
|
text = re.sub(unicode_type(r'(?<=</p>)\s*<p[^>]*>[\xc2\xa0\s]*</p>'), r'\\c\n\\c', text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def clean_text(self, text):
|
def clean_text(self, text):
|
||||||
@ -207,7 +207,7 @@ class PMLMLizer(object):
|
|||||||
text = re.sub('[ ]{2,}', ' ', text)
|
text = re.sub('[ ]{2,}', ' ', text)
|
||||||
|
|
||||||
# Condense excessive \c empty line sequences.
|
# Condense excessive \c empty line sequences.
|
||||||
text = re.sub('(\\c\\s*\\c\\s*){2,}', '\\c \n\\c\n', text)
|
text = re.sub(r'(\\c\\s*\\c\\s*){2,}', r'\\c \n\\c\n', text)
|
||||||
|
|
||||||
# Remove excessive newlines.
|
# Remove excessive newlines.
|
||||||
text = re.sub('\n[ ]+\n', '\n\n', text)
|
text = re.sub('\n[ ]+\n', '\n\n', text)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user