Also recognize para breaks when the involved characters are quotes or parentheses

This commit is contained in:
Kovid Goyal 2022-07-31 09:32:31 +05:30
parent 353dd847d1
commit 25b3d5b054
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -36,7 +36,7 @@ def pretty_google_books_comments(raw):
# Paragraphs in the comments are removed but whatever software googl uses
# to do this does not insert a space so we often find the pattern
# word.Capital in the comments which can be used to find paragraph markers.
raw = re.sub(r'([a-z])\.([A-Z])', '\\1.\n\n\\2', raw)
raw = re.sub(r'([a-z)"])\.([A-Z("])', '\\1.\n\n\\2', raw)
return raw