fixed handling of 'unformatted' text input

This commit is contained in:
ldolse 2011-02-06 14:15:39 +08:00
parent 6c495f6a5a
commit 30212404de

View File

@ -58,6 +58,7 @@ class TXTInput(InputFormatPlugin):
accelerators):
self.log = log
log.debug('Reading text from file...')
length = 0
txt = stream.read()
@ -109,11 +110,12 @@ class TXTInput(InputFormatPlugin):
# Reformat paragraphs to block formatting based on the detected type.
# We don't check for block because the processor assumes block.
# single and print at transformed to block for processing.
if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted':
if options.paragraph_type == 'single':
txt = separate_paragraphs_single_line(txt)
elif options.paragraph_type == 'print':
txt = separate_paragraphs_print_formatted(txt)
elif options.paragraph_type == 'unformatted':
print "unwrapping lines using heuristics"
from calibre.ebooks.conversion.utils import HeuristicProcessor
# unwrap lines based on punctuation
docanalysis = DocAnalysis('txt', txt)
@ -123,7 +125,8 @@ class TXTInput(InputFormatPlugin):
if getattr(options, 'enable_heuristics', False) and getattr(options, 'dehyphenate', False):
docanalysis = DocAnalysis('txt', txt)
length = docanalysis.line_length(.5)
if not length:
length = docanalysis.line_length(.5)
dehyphenator = Dehyphenator(options.verbose, log=self.log)
txt = dehyphenator(txt,'txt', length)