From 8bd3c8d53e0d7d14784fdc9027cd02dd32564b03 Mon Sep 17 00:00:00 2001 From: John Schember Date: Wed, 26 Jan 2011 20:15:44 -0500 Subject: [PATCH 1/2] TXT Input: Detect and retain soft scene breaks. --- src/calibre/ebooks/txt/processor.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 43aadc6576..9d6868467e 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -31,7 +31,7 @@ def clean_txt(txt): txt = re.sub('^\s+(?=.)', '', txt) txt = re.sub('(?<=.)\s+$', '', txt) # Remove excessive line breaks. - txt = re.sub('\n{3,}', '\n\n', txt) + txt = re.sub('\n{5,}', '\n\n\n\n', txt) #remove ASCII invalid chars : 0 to 8 and 11-14 to 24 txt = clean_ascii_chars(txt) @@ -59,10 +59,16 @@ def convert_basic(txt, title='', epub_split_size_kb=0): txt = split_txt(txt, epub_split_size_kb) lines = [] + blank_count = 0 # Split into paragraphs based on having a blank line between text. - for line in txt.split('\n\n'): + for line in txt.split('\n'): if line.strip(): + blank_count = 0 lines.append(u'

%s

' % prepare_string_for_xml(line.replace('\n', ' '))) + else: + blank_count += 1 + if blank_count == 2: + lines.append(u'

 

') return HTML_TEMPLATE % (title, u'\n'.join(lines)) @@ -85,7 +91,8 @@ def normalize_line_endings(txt): return txt def separate_paragraphs_single_line(txt): - txt = re.sub(u'(?<=.)\n(?=.)', '\n\n', txt) + #txt = re.sub(u'(?<=.)\n(?=.)', '\n\n', txt) + txt = txt.replace('\n', '\n\n') return txt def separate_paragraphs_print_formatted(txt): From b3ec6480600acb0c701df9fc309abb37d6e3fa81 Mon Sep 17 00:00:00 2001 From: John Schember Date: Wed, 26 Jan 2011 20:17:09 -0500 Subject: [PATCH 2/2] TXT Input: Don't preserve spaces in heuristic processing --- src/calibre/ebooks/txt/input.py | 1 - src/calibre/ebooks/txt/processor.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 6ec1edb65c..2399e599ae 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -83,7 +83,6 @@ class TXTInput(InputFormatPlugin): setattr(options, 'markup_chapter_headings', True) setattr(options, 'italicize_common_cases', True) setattr(options, 'fix_indents', True) - setattr(options, 'preserve_spaces', True) setattr(options, 'delete_blank_paragraphs', True) setattr(options, 'format_scene_breaks', True) setattr(options, 'dehyphenate', True) diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 9d6868467e..926e9a8dd6 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -91,7 +91,6 @@ def normalize_line_endings(txt): return txt def separate_paragraphs_single_line(txt): - #txt = re.sub(u'(?<=.)\n(?=.)', '\n\n', txt) txt = txt.replace('\n', '\n\n') return txt