Correct splitting problem

2025-07-09 03:04:10 -04:00 · 2011-02-05 12:19:46 +01:00 · 2011-02-05 12:19:46 +01:00 · 056f97c700
commit 056f97c700
parent 1f10817e13
3 changed files with 5 additions and 13 deletions
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -321,6 +321,6 @@ class RTFInput(InputFormatPlugin):
        opf.render(open('metadata.opf', 'wb'))
        return os.path.abspath('metadata.opf')

-#ebook-convert "bad.rtf" test.epub -v -d "D:\Mes eBooks\Developpement\debug"
-# os.makedirs('D:\\Mes eBooks\\Developpement\\rtfdebug')
-# debug_dir = 'D:\\Mes eBooks\\Developpement\\rtfdebug'
+#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
+# os.makedirs('E:\\Mes eBooks\\Developpement\\rtfdebug')
+# debug_dir = 'E:\\Mes eBooks\\Developpement\\rtfdebug'
--- a/src/calibre/ebooks/rtf2xml/colors.py
+++ b/src/calibre/ebooks/rtf2xml/colors.py
@ -210,7 +210,7 @@ class Colors:
            hex_num = self.__color_dict.get(num)
        if hex_num is None:
            hex_num = '0'
-            if self.__run_level > 5:
+            if self.__run_level > 3:
                msg = 'no value in self.__color_dict' \
                'for key %s at line %d\n' % (num, self.__line)
                raise self.__bug_handler, msg
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@ -117,6 +117,7 @@ class Tokenize:
        input_file = self.__replace_spchar.mreplace(input_file)
        # this is for older RTF
        input_file = self.__par_exp.sub('\n\\par \n', input_file)
+        input_file = self.__cwdigit_exp.sub("\g<1>\n\g<2>", input_file)
        input_file = self.__cs_ast.sub("\g<1>", input_file)
        input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
        input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
@ -177,13 +178,6 @@ class Tokenize:
        #self.__remove_line = re.compile(r'\n+')
        ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")

-    def __correct_spliting(self, token):
-        match_obj = re.search(self.__cwdigit_exp, token)
-        if match_obj is None:
-            return token
-        else:
-            return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
-
    def tokenize(self):
        """Main class for handling other methods. Reads the file \
        , uses method self.sub_reg to make basic substitutions,\
@ -199,8 +193,6 @@ class Tokenize:
        tokens = map(self.__unicode_process, tokens)
        #remove empty items created by removing \uc
        tokens = filter(lambda x: len(x) > 0, tokens)
-        #handles bothersome cases
-        tokens = map(self.__correct_spliting, tokens)

        #write
        with open(self.__write_to, 'wb') as write_obj: