TXT Input: Retain indents with print formatted paragraphs. Move remove indents to keep print formatting working.

This commit is contained in:
John Schember 2011-02-06 08:35:55 -05:00
parent ed3b2866cf
commit 92ee46cdb9
2 changed files with 10 additions and 9 deletions

View File

@ -99,14 +99,6 @@ class TXTInput(InputFormatPlugin):
setattr(options, 'enable_heuristics', True) setattr(options, 'enable_heuristics', True)
setattr(options, 'unwrap_lines', False) setattr(options, 'unwrap_lines', False)
if options.txt_in_remove_indents:
txt = remove_indents(txt)
# Preserve spaces will replace multiple spaces to a space
# followed by the   entity.
if options.preserve_spaces:
txt = preserve_spaces(txt)
# Reformat paragraphs to block formatting based on the detected type. # Reformat paragraphs to block formatting based on the detected type.
# We don't check for block because the processor assumes block. # We don't check for block because the processor assumes block.
# single and print at transformed to block for processing. # single and print at transformed to block for processing.
@ -130,6 +122,15 @@ class TXTInput(InputFormatPlugin):
dehyphenator = Dehyphenator(options.verbose, log=self.log) dehyphenator = Dehyphenator(options.verbose, log=self.log)
txt = dehyphenator(txt,'txt', length) txt = dehyphenator(txt,'txt', length)
# User requested transformation on the text.
if options.txt_in_remove_indents:
txt = remove_indents(txt)
# Preserve spaces will replace multiple spaces to a space
# followed by the   entity.
if options.preserve_spaces:
txt = preserve_spaces(txt)
# Process the text using the appropriate text processor. # Process the text using the appropriate text processor.
html = '' html = ''
if options.formatting_type == 'markdown': if options.formatting_type == 'markdown':

View File

@ -99,7 +99,7 @@ def separate_paragraphs_single_line(txt):
return txt return txt
def separate_paragraphs_print_formatted(txt): def separate_paragraphs_print_formatted(txt):
txt = re.sub(u'(?miu)^(\t+|[ ]{2,})(?=.)', '\n\t', txt) txt = re.sub(u'(?miu)^(?P<indent>\t+|[ ]{2,})(?=.)', lambda mo: '%s\n\t' % mo.group('indent'), txt)
return txt return txt
def preserve_spaces(txt): def preserve_spaces(txt):