mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
PDF preprocessing rule additions.
This commit is contained in:
parent
0c3e217e41
commit
ab9c4b39f0
@ -130,7 +130,11 @@ class HTMLPreProcessor(object):
|
||||
# Have paragraphs show better
|
||||
(re.compile(r'<br.*?>'), lambda match : '<p>'),
|
||||
# Clean up spaces
|
||||
(re.compile(u'(?<=[\.,:;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '),
|
||||
(re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '),
|
||||
# Connect paragraphs split by -
|
||||
(re.compile(u'(?<=[^\s][-–])[\s]*(</p>)*[\s]*(<p>)*\s*(?=[^\s])'), lambda match: ''),
|
||||
# Remove - that splits words
|
||||
(re.compile(u'(?<=[^\s])[-–]+(?=[^\s])'), lambda match: ''),
|
||||
# Add space before and after italics
|
||||
(re.compile(u'(?<!“)<i>'), lambda match: ' <i>'),
|
||||
(re.compile(r'</i>(?=\w)'), lambda match: '</i> '),
|
||||
|
Loading…
x
Reference in New Issue
Block a user