Merge+RTF cleaning

2025-07-09 03:04:10 -04:00 · 2011-10-18 23:09:40 +02:00 · 2011-10-18 23:09:40 +02:00 · ed972d487b
commit ed972d487b
parent b027cafa1f 4736f0df8a
5 changed files with 85 additions and 67 deletions
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -49,6 +49,15 @@ class ANDROID(USBMS):
                       0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
                       0x70c6 : [0x226]
                     },
+            # Freescale
+            0x15a2 : {
+                0x0c01 : [0x226]
+            },
+
+            # Alcatel
+            0x05c6 : {
+                0x9018 : [0x0226],
+            },

            # Sony Ericsson
            0xfce : {
@ -139,7 +148,8 @@ class ANDROID(USBMS):
    VENDOR_NAME      = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
-            'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO']
+            'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
+            'VIZIO', 'GOOGLE', 'FREESCAL']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -150,7 +160,7 @@ class ANDROID(USBMS):
            'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
            'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
            'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
-            'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008']
+            'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -693,6 +693,8 @@ OptionRecommendation(name='sr3_replace',
    def unarchive(self, path, tdir):
        extract(path, tdir)
        files = list(walk(tdir))
+        files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
+                for f in files]
        from calibre.customize.ui import available_input_formats
        fmts = available_input_formats()
        for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@ -1,4 +1,5 @@
 import os, tempfile, sys
+from codecs import EncodedFile

 from calibre.ebooks.rtf2xml import copy, check_encoding

@ -40,6 +41,7 @@ class ConvertToTags:
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
        self.__convert_utf = False
+        self.__bad_encoding = False

    def __initiate_values(self):
        """
@ -219,6 +221,7 @@ class ConvertToTags:
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
            sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
                    ' hope for the best')
+            self.__bad_encoding = True
        self.__new_line = 0
        self.__write_new_line()
        if self.__no_dtd:
@ -246,7 +249,7 @@ class ConvertToTags:
        the appropriate function.
        The functions that are called:
            a text function for text
-            an open funciton for open tags
+            an open function for open tags
            an open with attribute function for tags with attributes
            an empty with attribute function for tags that are empty but have
            attribtes.
@ -262,20 +265,28 @@ class ConvertToTags:
                    action = self.__state_dict.get(self.__token_info)
                    if action is not None:
                        action(line)
-        self.__write_obj.close()
-        #convert all encodings to UTF8 to avoid unsupported encodings in lxml
-        if self.__convert_utf:
+        #convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
+        if self.__convert_utf or self.__bad_encoding:
            copy_obj = copy.Copy(bug_handler = self.__bug_handler)
            copy_obj.rename(self.__write_to, self.__file)
+            file_encoding = "utf-8"
+            if self.__bad_encoding:
+                file_encoding = "us-ascii"
            with open(self.__file, 'r') as read_obj:
                with open(self.__write_to, 'w') as write_obj:
-                    file = read_obj.read()
                    try:
-                        file = file.decode(self.__encoding)
-                        write_obj.write(file.encode('utf-8'))
+                        write_objenc = EncodedFile(write_obj, self.__encoding,
+                                        file_encoding, 'strict')
+                        for line in read_obj:
+                            write_objenc.write(line)
                    except:
-                        sys.stderr.write('Conversion to UTF-8 is not possible,'
-                        ' encoding should be very carefully checked')
+                        if self.__convert_utf:
+                            sys.stderr.write('Conversion to UTF-8 is problematic,'
+                            ' encoding should be very carefully checked')
+                        write_objenc = EncodedFile(write_obj, self.__encoding,
+                                        file_encoding, 'replace')
+                        for line in read_obj:
+                            write_objenc.write(line)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
--- a/src/calibre/ebooks/rtf2xml/output.py
+++ b/src/calibre/ebooks/rtf2xml/output.py
@ -10,7 +10,9 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import sys, os, codecs
+import sys, os
+# , codecs
+
 class Output:
    """
    Output file
@ -19,7 +21,8 @@ class Output:
            file,
            orig_file,
            output_dir = None,
-            out_file = None
+            out_file = None,
+            no_ask = True
            ):
        """
        Required:
@ -33,8 +36,9 @@ class Output:
        self.__file = file
        self.__orig_file = orig_file
        self.__output_dir = output_dir
-        self.__no_ask = 1
+        self.__no_ask = no_ask
        self.__out_file = out_file
+
    def output(self):
        """
        Required:
@ -45,13 +49,14 @@ class Output:
            output the line to the screen if no output file given. Otherwise, output to
            the file.
        """
-        # self.__output_xml(self.__file, self.__out_file)
        if self.__output_dir:
            self.__output_to_dir_func()
        elif self.__out_file:
-            self.__output_xml(self.__file, self.__out_file)
+            self.__output_to_file_func()
+            # self.__output_xml(self.__file, self.__out_file)
        else:
            self.__output_to_standard_func()
+
    def __output_to_dir_func(self):
        """
        Requires:
@ -64,32 +69,25 @@ class Output:
        """
        base_name = os.path.basename(self.__orig_file)
        base_name, ext  = os.path.splitext(base_name)
-        output_file = '%s.xml' % base_name
-        output_file = os.path.join(self.__output_dir, output_file)
+        output_file = os.path.join(self.__output_dir, '%s.xml' % base_name)
        # change if user wants to output to a specific file
        if self.__out_file:
            output_file = os.path.join(self.__output_dir, self.__out_file)
        user_response = 'o'
-        if os.path.isfile(output_file):
-            if self.__no_ask:
-                user_response = 'o'
-            else:
-                msg = 'Do you want to over-write %s?\n' % output_file
-                msg += 'Type "o" to over-write.\n'
-                msg += 'Type any other key to print to standard output.\n'
-                sys.stderr.write(msg)
-                user_response = raw_input()
+        if os.path.isfile(output_file) and not self.__no_ask:
+            msg = 'Do you want to overwrite %s?\n' % output_file
+            msg += ('Type "o" to overwrite.\n'
+                    'Type any other key to print to standard output.\n')
+            sys.stderr.write(msg)
+            user_response = raw_input()
        if user_response == 'o':
-            read_obj = open(self.__file, 'r')
-            write_obj = open(output_file, 'w')
-            line = 1
-            while line:
-                line = read_obj.readline()
-                write_obj.write(line)
-            read_obj.close()
-            write_obj.close()
+            with open(self.__file, 'r') as read_obj:
+                with open(self.output_file, 'w') as write_obj:
+                    for line in read_obj:
+                        write_obj.write(line)
        else:
            self.__output_to_standard_func()
+
    def __output_to_file_func(self):
        """
        Required:
@ -99,14 +97,11 @@ class Output:
        Logic:
            read one line at a time. Output to standard
        """
-        read_obj = open(self.__file, 'r')
-        write_obj = open(self.__out_file, 'w')
-        line = 1
-        while line:
-            line = read_obj.readline()
-            write_obj.write(line)
-        read_obj.close()
-        write_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__out_file, 'w') as write_obj:
+                for line in read_obj:
+                    write_obj.write(line)
+
    def __output_to_standard_func(self):
        """
        Required:
@ -116,26 +111,24 @@ class Output:
        Logic:
            read one line at a time. Output to standard
        """
-        read_obj = open(self.__file, 'r')
-        line = 1
-        while line:
-            line = read_obj.readline()
-            sys.stdout.write(line)
-        read_obj.close()
-    def __output_xml(self, in_file, out_file):
-        """
-        output the ill-formed xml file
-        """
-        (utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
-        write_obj = utf8_writer(open(out_file, 'w'))
-        write_obj = open(out_file, 'w')
-        read_obj = utf8_writer(open(in_file, 'r'))
-        read_obj = open(in_file, 'r')
-        line = 1
-        while line:
-            line = read_obj.readline()
-            if isinstance(line, type(u"")):
-                line = line.encode("utf-8")
-            write_obj.write(line)
-        read_obj.close()
-        write_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            for line in read_obj:
+                sys.stdout.write(line)
+
+    # def __output_xml(self, in_file, out_file):
+        # """
+        # output the ill-formed xml file
+        # """
+        # (utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
+        # write_obj = utf8_writer(open(out_file, 'w'))
+        # write_obj = open(out_file, 'w')
+        # read_obj = utf8_writer(open(in_file, 'r'))
+        # read_obj = open(in_file, 'r')
+        # line = 1
+        # while line:
+            # line = read_obj.readline()
+            # if isinstance(line, type(u"")):
+                # line = line.encode("utf-8")
+            # write_obj.write(line)
+        # read_obj.close()
+        # write_obj.close()
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -2103,7 +2103,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        user_mi = mi.get_all_user_metadata(make_copy=False)
        for key in user_mi.iterkeys():
            if key in self.field_metadata and \
-                    user_mi[key]['datatype'] == self.field_metadata[key]['datatype']:
+                    user_mi[key]['datatype'] == self.field_metadata[key]['datatype'] and \
+                    (user_mi[key]['datatype'] != 'text' or
+                     user_mi[key]['is_multiple'] == self.field_metadata[key]['is_multiple']):
                val = mi.get(key, None)
                if force_changes or val is not None:
                    doit(self.set_custom, id, val=val, extra=mi.get_extra(key),