From 3405615e54da2f2aa7345d1f51525acd250cbd91 Mon Sep 17 00:00:00 2001 From: Sengian Date: Sat, 31 Jul 2010 13:15:47 +0200 Subject: [PATCH] Remove invalid ASCII characters from plain text files --- src/calibre/ebooks/txt/input.py | 3 ++- src/calibre/ebooks/txt/processor.py | 25 +++++++++++++++---------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index b444bf1cf4..935a187d5d 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -57,6 +57,7 @@ class TXTInput(InputFormatPlugin): txt = preserve_spaces(txt) txt = _ent_pat.sub(xml_entity_to_unicode, txt) + txt = txt.encode('utf-8') if options.markdown: log.debug('Running text though markdown conversion...') @@ -79,7 +80,7 @@ class TXTInput(InputFormatPlugin): base = os.path.dirname(stream.name) htmlfile = open(os.path.join(base, 'temp_calibre_txt_input_to_html.html'), 'wb') - htmlfile.write(html.encode('utf-8')) + htmlfile.write(html) #html.encode('utf-8') htmlfile.close() cwd = os.getcwdu() odi = options.debug_pipeline diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 91c274a7b1..6bd635b6df 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -19,7 +19,7 @@ HTML_TEMPLATE = u'