mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
TXT Input: Fix bug where spaces were not retained properly. Fix bug where spaces were replaced with entities (this should only have happened at the beginning of lines). Add option to remove indents.
This commit is contained in:
parent
2796960f42
commit
1f708746d0
@ -12,7 +12,7 @@ from calibre.ebooks.chardet import detect
|
|||||||
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
||||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
||||||
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
|
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
|
||||||
normalize_line_endings, convert_textile
|
normalize_line_endings, convert_textile, remove_indents
|
||||||
from calibre import _ent_pat, xml_entity_to_unicode
|
from calibre import _ent_pat, xml_entity_to_unicode
|
||||||
|
|
||||||
class TXTInput(InputFormatPlugin):
|
class TXTInput(InputFormatPlugin):
|
||||||
@ -47,6 +47,9 @@ class TXTInput(InputFormatPlugin):
|
|||||||
OptionRecommendation(name='preserve_spaces', recommended_value=False,
|
OptionRecommendation(name='preserve_spaces', recommended_value=False,
|
||||||
help=_('Normally extra spaces are condensed into a single space. '
|
help=_('Normally extra spaces are condensed into a single space. '
|
||||||
'With this option all spaces will be displayed.')),
|
'With this option all spaces will be displayed.')),
|
||||||
|
OptionRecommendation(name='txt_in_remove_indents', recommended_value=False,
|
||||||
|
help=_('Normally extra space at the beginning of lines is retained. '
|
||||||
|
'With this option they will be removed.')),
|
||||||
OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
|
OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
|
||||||
help=_('Do not insert a Table of Contents into the output text.')),
|
help=_('Do not insert a Table of Contents into the output text.')),
|
||||||
])
|
])
|
||||||
@ -101,6 +104,9 @@ class TXTInput(InputFormatPlugin):
|
|||||||
setattr(options, 'enable_heuristics', True)
|
setattr(options, 'enable_heuristics', True)
|
||||||
setattr(options, 'unwrap_lines', False)
|
setattr(options, 'unwrap_lines', False)
|
||||||
|
|
||||||
|
if options.txt_in_remove_indents:
|
||||||
|
txt = remove_indents(txt)
|
||||||
|
|
||||||
# Preserve spaces will replace multiple spaces to a space
|
# Preserve spaces will replace multiple spaces to a space
|
||||||
# followed by the entity.
|
# followed by the entity.
|
||||||
if options.preserve_spaces:
|
if options.preserve_spaces:
|
||||||
|
@ -24,14 +24,14 @@ def clean_txt(txt):
|
|||||||
# all line breaks with \n.
|
# all line breaks with \n.
|
||||||
txt = '\n'.join([line.rstrip() for line in txt.splitlines()])
|
txt = '\n'.join([line.rstrip() for line in txt.splitlines()])
|
||||||
|
|
||||||
# Replace whitespace at the beginning of the list with
|
# Replace whitespace at the beginning of the line with
|
||||||
txt = re.sub('(?m)(?P<space>[ ]+)', lambda mo: ' ' * mo.groups('space').count(' '), txt)
|
txt = re.sub('(?m)(?P<space>^[ ]+)(?=.)', lambda mo: ' ' * mo.groups('space').count(' '), txt)
|
||||||
txt = re.sub('(?m)(?P<space>[\t]+)', lambda mo: ' ' * 4 * mo.groups('space').count('\t'), txt)
|
txt = re.sub('(?m)(?P<space>^[\t]+)(?=.)', lambda mo: ' ' * 4 * mo.groups('space').count('\t'), txt)
|
||||||
|
|
||||||
# Condense redundant spaces
|
# Condense redundant spaces
|
||||||
txt = re.sub('[ ]{2,}', ' ', txt)
|
txt = re.sub('[ ]{2,}', ' ', txt)
|
||||||
|
|
||||||
# Remove blank lines from the beginning and end of the document.
|
# Remove blank space from the beginning and end of the document.
|
||||||
txt = re.sub('^\s+(?=.)', '', txt)
|
txt = re.sub('^\s+(?=.)', '', txt)
|
||||||
txt = re.sub('(?<=.)\s+$', '', txt)
|
txt = re.sub('(?<=.)\s+$', '', txt)
|
||||||
# Remove excessive line breaks.
|
# Remove excessive line breaks.
|
||||||
@ -107,6 +107,10 @@ def preserve_spaces(txt):
|
|||||||
txt = txt.replace('\t', ' ')
|
txt = txt.replace('\t', ' ')
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
|
def remove_indents(txt):
|
||||||
|
txt = re.sub('(?miu)^\s+', '', txt)
|
||||||
|
return txt
|
||||||
|
|
||||||
def opf_writer(path, opf_name, manifest, spine, mi):
|
def opf_writer(path, opf_name, manifest, spine, mi):
|
||||||
opf = OPFCreator(path, mi)
|
opf = OPFCreator(path, mi)
|
||||||
opf.create_manifest(manifest)
|
opf.create_manifest(manifest)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user