From 24e60cc35778d6699e8f90d5a1308e93d75c0a86 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 12 Jan 2011 12:55:15 -0700 Subject: [PATCH] Remove code duplication --- src/calibre/ebooks/txt/processor.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 3702bbfabe..e1979063c0 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -1,4 +1,8 @@ # -*- coding: utf-8 -*- +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + ''' Read content from txt file. @@ -10,10 +14,7 @@ from calibre import prepare_string_for_xml, isbytestring from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor from calibre.ebooks.conversion.preprocess import DocAnalysis - -__license__ = 'GPL v3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' +from calibre.utils.cleantext import clean_ascii_chars HTML_TEMPLATE = u'%s\n%s\n' @@ -33,9 +34,7 @@ def clean_txt(txt): # Remove excessive line breaks. txt = re.sub('\n{3,}', '\n\n', txt) #remove ASCII invalid chars : 0 to 8 and 11-14 to 24 - chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) - illegal_chars = re.compile(u'|'.join(map(unichr, chars))) - txt = illegal_chars.sub('', txt) + txt = clean_ascii_chars(txt) return txt