mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-11 09:13:57 -04:00
tied rtf input to heuristics, removed option to not include softbreaks, users can combine delete_blank_paragraphs and remove_paragraph_spacing to achieve desired results
This commit is contained in:
parent
e99ab61dd0
commit
3379337275
@ -320,11 +320,10 @@ class RTFInput(InputFormatPlugin):
|
||||
res = transform.tostring(result)
|
||||
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
|
||||
if not getattr(self.opts, 'remove_paragraph_spacing', False):
|
||||
res = re.sub('\s*<body>', '<body>', res)
|
||||
res = re.sub('(?<=\n)\n{2}',
|
||||
u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
||||
if self.opts.preprocess_html:
|
||||
res = re.sub('\s*<body>', '<body>', res)
|
||||
res = re.sub('(?<=\n)\n{2}',
|
||||
u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
||||
if self.opts.enable_heuristics:
|
||||
preprocessor = PreProcessor(self.opts, log=getattr(self, 'log', None))
|
||||
res = preprocessor(res.decode('utf-8')).encode('utf-8')
|
||||
f.write(res)
|
||||
|
Loading…
x
Reference in New Issue
Block a user