diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 85bd781ff8..b1374bbeec 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -12,7 +12,7 @@ from calibre.ebooks.chardet import detect from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ preserve_spaces, detect_paragraph_type, detect_formatting_type, \ - normalize_line_endings, convert_textile + normalize_line_endings, convert_textile, remove_indents from calibre import _ent_pat, xml_entity_to_unicode class TXTInput(InputFormatPlugin): @@ -47,6 +47,9 @@ class TXTInput(InputFormatPlugin): OptionRecommendation(name='preserve_spaces', recommended_value=False, help=_('Normally extra spaces are condensed into a single space. ' 'With this option all spaces will be displayed.')), + OptionRecommendation(name='txt_in_remove_indents', recommended_value=False, + help=_('Normally extra space at the beginning of lines is retained. ' + 'With this option they will be removed.')), OptionRecommendation(name="markdown_disable_toc", recommended_value=False, help=_('Do not insert a Table of Contents into the output text.')), ]) @@ -101,6 +104,9 @@ class TXTInput(InputFormatPlugin): setattr(options, 'enable_heuristics', True) setattr(options, 'unwrap_lines', False) + if options.txt_in_remove_indents: + txt = remove_indents(txt) + # Preserve spaces will replace multiple spaces to a space # followed by the   entity. if options.preserve_spaces: diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 546d3f1842..987d7cdc73 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -24,14 +24,14 @@ def clean_txt(txt): # all line breaks with \n. txt = '\n'.join([line.rstrip() for line in txt.splitlines()]) - # Replace whitespace at the beginning of the list with   - txt = re.sub('(?m)(?P[ ]+)', lambda mo: ' ' * mo.groups('space').count(' '), txt) - txt = re.sub('(?m)(?P[\t]+)', lambda mo: ' ' * 4 * mo.groups('space').count('\t'), txt) + # Replace whitespace at the beginning of the line with   + txt = re.sub('(?m)(?P^[ ]+)(?=.)', lambda mo: ' ' * mo.groups('space').count(' '), txt) + txt = re.sub('(?m)(?P^[\t]+)(?=.)', lambda mo: ' ' * 4 * mo.groups('space').count('\t'), txt) # Condense redundant spaces txt = re.sub('[ ]{2,}', ' ', txt) - # Remove blank lines from the beginning and end of the document. + # Remove blank space from the beginning and end of the document. txt = re.sub('^\s+(?=.)', '', txt) txt = re.sub('(?<=.)\s+$', '', txt) # Remove excessive line breaks. @@ -107,6 +107,10 @@ def preserve_spaces(txt): txt = txt.replace('\t', '    ') return txt +def remove_indents(txt): + txt = re.sub('(?miu)^\s+', '', txt) + return txt + def opf_writer(path, opf_name, manifest, spine, mi): opf = OPFCreator(path, mi) opf.create_manifest(manifest)