Use clean_ascii_chars in txt/processor

This commit is contained in:
Sengian 2010-12-07 21:43:09 +01:00
parent 251cde2902
commit 824f8b5a67

View File

@ -9,6 +9,7 @@ import os, re
from calibre import prepare_string_for_xml, isbytestring from calibre import prepare_string_for_xml, isbytestring
from calibre.ebooks.markdown import markdown from calibre.ebooks.markdown import markdown
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.utils.cleantext import clean_ascii_chars
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -31,10 +32,8 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
txt = re.sub('(?<=.)\s+$', '', txt) txt = re.sub('(?<=.)\s+$', '', txt)
# Remove excessive line breaks. # Remove excessive line breaks.
txt = re.sub('\n{3,}', '\n\n', txt) txt = re.sub('\n{3,}', '\n\n', txt)
#remove ASCII invalid chars : 0 to 8 and 11-14 to 24 #remove ASCII invalid chars
chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) txt = clean_ascii_chars(txt)
illegal_chars = re.compile(u'|'.join(map(unichr, chars)))
txt = illegal_chars.sub('', txt)
#Takes care if there is no point to split #Takes care if there is no point to split
if epub_split_size_kb > 0: if epub_split_size_kb > 0:
if isinstance(txt, unicode): if isinstance(txt, unicode):