mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
RTF Input: Fix regression that broke the Preprocess HTML option
This commit is contained in:
parent
ef15ee03a3
commit
03f70c156c
@ -191,15 +191,15 @@ class PreProcessor(object):
|
||||
blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
|
||||
line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*"
|
||||
txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
|
||||
|
||||
|
||||
unwrap_regex = lookahead+line_ending+blanklines+line_opening
|
||||
if format == 'txt':
|
||||
unwrap_regex = lookahead+txt_line_wrap
|
||||
|
||||
|
||||
unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE)
|
||||
content = unwrap.sub(' ', content)
|
||||
return content
|
||||
|
||||
|
||||
|
||||
def __call__(self, html):
|
||||
self.log("********* Preprocessing HTML *********")
|
||||
|
@ -296,7 +296,7 @@ class RTFInput(InputFormatPlugin):
|
||||
u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
||||
if self.opts.preprocess_html:
|
||||
preprocessor = PreProcessor(self.opts, log=getattr(self, 'log', None))
|
||||
res = preprocessor(res)
|
||||
res = preprocessor(res.decode('utf-8')).encode('utf-8')
|
||||
f.write(res)
|
||||
self.write_inline_css(inline_class, border_styles)
|
||||
stream.seek(0)
|
||||
|
@ -53,7 +53,7 @@ class TXTInput(InputFormatPlugin):
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
log.debug('Reading text from file...')
|
||||
|
||||
|
||||
txt = stream.read()
|
||||
# Get the encoding of the document.
|
||||
if options.input_encoding:
|
||||
@ -80,7 +80,7 @@ class TXTInput(InputFormatPlugin):
|
||||
# Get length for hyphen removal and punctuation unwrap
|
||||
docanalysis = DocAnalysis('txt', txt)
|
||||
length = docanalysis.line_length(.5)
|
||||
|
||||
|
||||
if options.formatting_type == 'auto':
|
||||
options.formatting_type = detect_formatting_type(txt)
|
||||
|
||||
@ -122,7 +122,7 @@ class TXTInput(InputFormatPlugin):
|
||||
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
|
||||
|
||||
flow_size = getattr(options, 'flow_size', 0)
|
||||
|
||||
|
||||
if options.formatting_type == 'heuristic':
|
||||
html = convert_heuristic(txt, epub_split_size_kb=flow_size)
|
||||
else:
|
||||
|
Loading…
x
Reference in New Issue
Block a user