mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
pdftohtml preprocess rules work
This commit is contained in:
parent
aafc6d9764
commit
575b021f48
@ -73,7 +73,7 @@ class HTMLPreProcessor(object):
|
||||
(re.compile(r'<br.*?>'), lambda match : '<p>'),
|
||||
|
||||
# Un wrap lines
|
||||
(re.compile(r'(?<=\w)\s*</i>\s*<p.*?>\s*<i>\s*(?=\w)'), lambda match: ' '),
|
||||
(re.compile(r'(?<=\w)\s*</(i|b|u)>\s*<p.*?>\s*<(i|b|u)>\s*(?=\w)'), lambda match: ' '),
|
||||
(re.compile(r'(?<=\w)\s*<p.*?>\s*(?=\w)', re.UNICODE), lambda match: ' '),
|
||||
# Clean up spaces
|
||||
(re.compile(u'(?<=\.|,|:|;|\?|!|”|"|\')[\s^ ]*(?=<)'), lambda match: ' '),
|
||||
|
Loading…
x
Reference in New Issue
Block a user