mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
pdftohtml processing: fix spaces rule
This commit is contained in:
parent
f7ec532d57
commit
ac0af1b844
@ -72,11 +72,11 @@ class HTMLPreProcessor(object):
|
|||||||
# Have paragraphs show better
|
# Have paragraphs show better
|
||||||
(re.compile(r'<br.*?>'), lambda match : '<p>'),
|
(re.compile(r'<br.*?>'), lambda match : '<p>'),
|
||||||
|
|
||||||
# Re wrap lines
|
# Un wrap lines
|
||||||
(re.compile(r'(?<=\w)\s*</i>\s*<p.*?>\s*<i>\s*(?=\w)'), lambda match: ' '),
|
(re.compile(r'(?<=\w)\s*</i>\s*<p.*?>\s*<i>\s*(?=\w)'), lambda match: ' '),
|
||||||
(re.compile(r'(?<=\w)\s*<p.*?>\s*(?=\w)', re.UNICODE), lambda match: ' '),
|
(re.compile(r'(?<=\w)\s*<p.*?>\s*(?=\w)', re.UNICODE), lambda match: ' '),
|
||||||
# Clean up spaces
|
# Clean up spaces
|
||||||
(re.compile(ru'(?<=\.|,|:|;|\?|!|”|"|\')[\s^ ]*(?=<)'), lambda match: ' '),
|
(re.compile(u'(?<=\.|,|:|;|\?|!|”|"|\')[\s^ ]*(?=<)'), lambda match: ' '),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Fix Book Designer markup
|
# Fix Book Designer markup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user