TXT Input: Fix bug where spaces were not retained properly. Fix bug where spaces were replaced with entities (this should only have happened at the beginning of lines). Add option to remove indents.

This commit is contained in:
John Schember 2011-02-05 13:35:41 -05:00
parent 2796960f42
commit 1f708746d0
2 changed files with 15 additions and 5 deletions

View File

@ -12,7 +12,7 @@ from calibre.ebooks.chardet import detect
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
preserve_spaces, detect_paragraph_type, detect_formatting_type, \ preserve_spaces, detect_paragraph_type, detect_formatting_type, \
normalize_line_endings, convert_textile normalize_line_endings, convert_textile, remove_indents
from calibre import _ent_pat, xml_entity_to_unicode from calibre import _ent_pat, xml_entity_to_unicode
class TXTInput(InputFormatPlugin): class TXTInput(InputFormatPlugin):
@ -47,6 +47,9 @@ class TXTInput(InputFormatPlugin):
OptionRecommendation(name='preserve_spaces', recommended_value=False, OptionRecommendation(name='preserve_spaces', recommended_value=False,
help=_('Normally extra spaces are condensed into a single space. ' help=_('Normally extra spaces are condensed into a single space. '
'With this option all spaces will be displayed.')), 'With this option all spaces will be displayed.')),
OptionRecommendation(name='txt_in_remove_indents', recommended_value=False,
help=_('Normally extra space at the beginning of lines is retained. '
'With this option they will be removed.')),
OptionRecommendation(name="markdown_disable_toc", recommended_value=False, OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
help=_('Do not insert a Table of Contents into the output text.')), help=_('Do not insert a Table of Contents into the output text.')),
]) ])
@ -101,6 +104,9 @@ class TXTInput(InputFormatPlugin):
setattr(options, 'enable_heuristics', True) setattr(options, 'enable_heuristics', True)
setattr(options, 'unwrap_lines', False) setattr(options, 'unwrap_lines', False)
if options.txt_in_remove_indents:
txt = remove_indents(txt)
# Preserve spaces will replace multiple spaces to a space # Preserve spaces will replace multiple spaces to a space
# followed by the   entity. # followed by the   entity.
if options.preserve_spaces: if options.preserve_spaces:

View File

@ -24,14 +24,14 @@ def clean_txt(txt):
# all line breaks with \n. # all line breaks with \n.
txt = '\n'.join([line.rstrip() for line in txt.splitlines()]) txt = '\n'.join([line.rstrip() for line in txt.splitlines()])
# Replace whitespace at the beginning of the list with   # Replace whitespace at the beginning of the line with  
txt = re.sub('(?m)(?P<space>[ ]+)', lambda mo: '&nbsp;' * mo.groups('space').count(' '), txt) txt = re.sub('(?m)(?P<space>^[ ]+)(?=.)', lambda mo: '&nbsp;' * mo.groups('space').count(' '), txt)
txt = re.sub('(?m)(?P<space>[\t]+)', lambda mo: '&nbsp;' * 4 * mo.groups('space').count('\t'), txt) txt = re.sub('(?m)(?P<space>^[\t]+)(?=.)', lambda mo: '&nbsp;' * 4 * mo.groups('space').count('\t'), txt)
# Condense redundant spaces # Condense redundant spaces
txt = re.sub('[ ]{2,}', ' ', txt) txt = re.sub('[ ]{2,}', ' ', txt)
# Remove blank lines from the beginning and end of the document. # Remove blank space from the beginning and end of the document.
txt = re.sub('^\s+(?=.)', '', txt) txt = re.sub('^\s+(?=.)', '', txt)
txt = re.sub('(?<=.)\s+$', '', txt) txt = re.sub('(?<=.)\s+$', '', txt)
# Remove excessive line breaks. # Remove excessive line breaks.
@ -107,6 +107,10 @@ def preserve_spaces(txt):
txt = txt.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;') txt = txt.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')
return txt return txt
def remove_indents(txt):
txt = re.sub('(?miu)^\s+', '', txt)
return txt
def opf_writer(path, opf_name, manifest, spine, mi): def opf_writer(path, opf_name, manifest, spine, mi):
opf = OPFCreator(path, mi) opf = OPFCreator(path, mi)
opf.create_manifest(manifest) opf.create_manifest(manifest)