mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
fixed handling of 'unformatted' text input
This commit is contained in:
parent
6c495f6a5a
commit
30212404de
@ -58,6 +58,7 @@ class TXTInput(InputFormatPlugin):
|
||||
accelerators):
|
||||
self.log = log
|
||||
log.debug('Reading text from file...')
|
||||
length = 0
|
||||
|
||||
txt = stream.read()
|
||||
|
||||
@ -109,11 +110,12 @@ class TXTInput(InputFormatPlugin):
|
||||
# Reformat paragraphs to block formatting based on the detected type.
|
||||
# We don't check for block because the processor assumes block.
|
||||
# single and print at transformed to block for processing.
|
||||
if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted':
|
||||
if options.paragraph_type == 'single':
|
||||
txt = separate_paragraphs_single_line(txt)
|
||||
elif options.paragraph_type == 'print':
|
||||
txt = separate_paragraphs_print_formatted(txt)
|
||||
elif options.paragraph_type == 'unformatted':
|
||||
print "unwrapping lines using heuristics"
|
||||
from calibre.ebooks.conversion.utils import HeuristicProcessor
|
||||
# unwrap lines based on punctuation
|
||||
docanalysis = DocAnalysis('txt', txt)
|
||||
@ -123,7 +125,8 @@ class TXTInput(InputFormatPlugin):
|
||||
|
||||
if getattr(options, 'enable_heuristics', False) and getattr(options, 'dehyphenate', False):
|
||||
docanalysis = DocAnalysis('txt', txt)
|
||||
length = docanalysis.line_length(.5)
|
||||
if not length:
|
||||
length = docanalysis.line_length(.5)
|
||||
dehyphenator = Dehyphenator(options.verbose, log=self.log)
|
||||
txt = dehyphenator(txt,'txt', length)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user