TXT Output: clean ascii characters. Textile output remove span attributes.

This commit is contained in:
John Schember 2011-02-06 19:45:39 -05:00
parent 6548dbd33c
commit 1aa66f42fe
2 changed files with 3 additions and 0 deletions

View File

@ -15,6 +15,7 @@ from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.ebooks.txt.txtml import TXTMLizer from calibre.ebooks.txt.txtml import TXTMLizer
from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
from calibre.ptempfile import TemporaryDirectory, TemporaryFile from calibre.ptempfile import TemporaryDirectory, TemporaryFile
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
class TXTOutput(OutputFormatPlugin): class TXTOutput(OutputFormatPlugin):
@ -79,6 +80,7 @@ class TXTOutput(OutputFormatPlugin):
writer = TXTMLizer(log) writer = TXTMLizer(log)
txt = writer.extract_content(oeb_book, opts) txt = writer.extract_content(oeb_book, opts)
txt = clean_ascii_chars(txt)
log.debug('\tReplacing newlines with selected type...') log.debug('\tReplacing newlines with selected type...')
txt = specified_newlines(TxtNewlines(opts.newline).newline, txt) txt = specified_newlines(TxtNewlines(opts.newline).newline, txt)

View File

@ -41,6 +41,7 @@ class TextileMLizer(object):
html = re.sub(r'<\s*img[^>]*>', '', html) html = re.sub(r'<\s*img[^>]*>', '', html)
text = html2textile(html) text = html2textile(html)
text = text.replace('%', '')
# Ensure the section ends with at least two new line characters. # Ensure the section ends with at least two new line characters.
# This is to prevent the last paragraph from a section being # This is to prevent the last paragraph from a section being