From 1aa66f42fe809583a5fa462e26a9514042864db2 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 6 Feb 2011 19:45:39 -0500 Subject: [PATCH] TXT Output: clean ascii characters. Textile output remove span attributes. --- src/calibre/ebooks/txt/output.py | 2 ++ src/calibre/ebooks/txt/textileml.py | 1 + 2 files changed, 3 insertions(+) diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py index 3905081a84..d021cbbba6 100644 --- a/src/calibre/ebooks/txt/output.py +++ b/src/calibre/ebooks/txt/output.py @@ -15,6 +15,7 @@ from calibre.ebooks.oeb.base import OEB_IMAGES from calibre.ebooks.txt.txtml import TXTMLizer from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines from calibre.ptempfile import TemporaryDirectory, TemporaryFile +from calibre.utils.cleantext import clean_ascii_chars from calibre.utils.zipfile import ZipFile class TXTOutput(OutputFormatPlugin): @@ -79,6 +80,7 @@ class TXTOutput(OutputFormatPlugin): writer = TXTMLizer(log) txt = writer.extract_content(oeb_book, opts) + txt = clean_ascii_chars(txt) log.debug('\tReplacing newlines with selected type...') txt = specified_newlines(TxtNewlines(opts.newline).newline, txt) diff --git a/src/calibre/ebooks/txt/textileml.py b/src/calibre/ebooks/txt/textileml.py index d7e11695c5..284e4846d9 100644 --- a/src/calibre/ebooks/txt/textileml.py +++ b/src/calibre/ebooks/txt/textileml.py @@ -41,6 +41,7 @@ class TextileMLizer(object): html = re.sub(r'<\s*img[^>]*>', '', html) text = html2textile(html) + text = text.replace('%', '') # Ensure the section ends with at least two new line characters. # This is to prevent the last paragraph from a section being