From 428fcdd1415194c62b2726ae26d2ce842a3536da Mon Sep 17 00:00:00 2001 From: Sengian Date: Thu, 6 Jan 2011 00:01:24 +0100 Subject: [PATCH] Move check encoding --- src/calibre/ebooks/rtf2xml/ParseRtf.py | 24 +++++++++++-------- .../ebooks/rtf2xml/default_encoding.py | 4 ++-- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py index 33dc585579..fdd17e3f78 100755 --- a/src/calibre/ebooks/rtf2xml/ParseRtf.py +++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py @@ -193,21 +193,13 @@ class ParseRtf: copy_obj.set_dir(self.__debug_dir) copy_obj.remove_files() copy_obj.copy_file(self.__temp_file, "original_file") - #Check to see if the file is correct ascii - check_encoding_obj = check_encoding.CheckEncoding( - bug_handler = RtfInvalidCodeException, - ) - if check_encoding_obj.check_encoding(self.__file): - file_name = self.__file if isinstance(self.__file, str) else self.__file.encode('utf-8') - msg = _('File %s does not appear to be ascii.\n') % file_name - raise InvalidRtfException, msg # Function to check if bracket are well handled if self.__debug_dir or self.__run_level > 2: self.__check_brack_obj = check_brackets.CheckBrackets\ (file = self.__temp_file, bug_handler = RtfInvalidCodeException, ) - # convert Macintosh and Windows line endings to Unix line endings + #convert Macintosh and Windows line endings to Unix line endings #why do this if you don't wb after? line_obj = line_endings.FixLineEndings( in_file = self.__temp_file, @@ -238,7 +230,19 @@ class ParseRtf: os.remove(self.__temp_file) except OSError: pass - raise InvalidRtfException, msg + #Check to see if the file is correctly encoded + check_encoding_obj = check_encoding.CheckEncoding( + bug_handler = RtfInvalidCodeException, + ) + if check_encoding_obj.check_encoding(self.__file, 'cp1252') and \ + check_encoding_obj.check_encoding(self.__file, 'cp437') and \ + check_encoding_obj.check_encoding(self.__file, 'cp850') and \ + check_encoding_obj.check_encoding(self.__file, 'mac_roman'): + file_name = self.__file if isinstance(self.__file, str) \ + else self.__file.encode('utf-8') + msg = _('File %s does not appear to be correctly encoded.\n') % file_name + raise InvalidRtfException, msg + delete_info_obj = delete_info.DeleteInfo( in_file = self.__temp_file, copy = self.__copy, diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py index f89f54ada8..a5c2ab9561 100755 --- a/src/calibre/ebooks/rtf2xml/default_encoding.py +++ b/src/calibre/ebooks/rtf2xml/default_encoding.py @@ -74,9 +74,9 @@ class DefaultEncoding: self.__datafetched = True if self.__platform == 'Macintosh': code_page = self.__code_page - else + else: code_page = 'ansicpg' + self.__code_page - return platform, code_page, self.__default_num + return self.__platform, code_page, self.__default_num def get_codepage(self): if not self.__datafetched: