mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
pdftohtml processing: fix spaces rule
This commit is contained in:
parent
f7ec532d57
commit
ac0af1b844
@ -72,11 +72,11 @@ class HTMLPreProcessor(object):
|
||||
# Have paragraphs show better
|
||||
(re.compile(r'<br.*?>'), lambda match : '<p>'),
|
||||
|
||||
# Re wrap lines
|
||||
# Un wrap lines
|
||||
(re.compile(r'(?<=\w)\s*</i>\s*<p.*?>\s*<i>\s*(?=\w)'), lambda match: ' '),
|
||||
(re.compile(r'(?<=\w)\s*<p.*?>\s*(?=\w)', re.UNICODE), lambda match: ' '),
|
||||
# Clean up spaces
|
||||
(re.compile(ru'(?<=\.|,|:|;|\?|!|”|"|\')[\s^ ]*(?=<)'), lambda match: ' '),
|
||||
(re.compile(u'(?<=\.|,|:|;|\?|!|”|"|\')[\s^ ]*(?=<)'), lambda match: ' '),
|
||||
]
|
||||
|
||||
# Fix Book Designer markup
|
||||
|
Loading…
x
Reference in New Issue
Block a user