Merge+RTF cleaning

This commit is contained in:
Sengian 2011-10-18 23:09:40 +02:00
commit ed972d487b
5 changed files with 85 additions and 67 deletions

View File

@ -49,6 +49,15 @@ class ANDROID(USBMS):
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216], 0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
0x70c6 : [0x226] 0x70c6 : [0x226]
}, },
# Freescale
0x15a2 : {
0x0c01 : [0x226]
},
# Alcatel
0x05c6 : {
0x9018 : [0x0226],
},
# Sony Ericsson # Sony Ericsson
0xfce : { 0xfce : {
@ -139,7 +148,8 @@ class ANDROID(USBMS):
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS', 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA', 'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO'] 'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
'VIZIO', 'GOOGLE', 'FREESCAL']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -150,7 +160,7 @@ class ANDROID(USBMS):
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK', 'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612', 'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A', 'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008'] 'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',

View File

@ -693,6 +693,8 @@ OptionRecommendation(name='sr3_replace',
def unarchive(self, path, tdir): def unarchive(self, path, tdir):
extract(path, tdir) extract(path, tdir)
files = list(walk(tdir)) files = list(walk(tdir))
files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
for f in files]
from calibre.customize.ui import available_input_formats from calibre.customize.ui import available_input_formats
fmts = available_input_formats() fmts = available_input_formats()
for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x) for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)

View File

@ -1,4 +1,5 @@
import os, tempfile, sys import os, tempfile, sys
from codecs import EncodedFile
from calibre.ebooks.rtf2xml import copy, check_encoding from calibre.ebooks.rtf2xml import copy, check_encoding
@ -40,6 +41,7 @@ class ConvertToTags:
self.__run_level = run_level self.__run_level = run_level
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__convert_utf = False self.__convert_utf = False
self.__bad_encoding = False
def __initiate_values(self): def __initiate_values(self):
""" """
@ -219,6 +221,7 @@ class ConvertToTags:
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>') self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and' sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
' hope for the best') ' hope for the best')
self.__bad_encoding = True
self.__new_line = 0 self.__new_line = 0
self.__write_new_line() self.__write_new_line()
if self.__no_dtd: if self.__no_dtd:
@ -246,7 +249,7 @@ class ConvertToTags:
the appropriate function. the appropriate function.
The functions that are called: The functions that are called:
a text function for text a text function for text
an open funciton for open tags an open function for open tags
an open with attribute function for tags with attributes an open with attribute function for tags with attributes
an empty with attribute function for tags that are empty but have an empty with attribute function for tags that are empty but have
attribtes. attribtes.
@ -262,20 +265,28 @@ class ConvertToTags:
action = self.__state_dict.get(self.__token_info) action = self.__state_dict.get(self.__token_info)
if action is not None: if action is not None:
action(line) action(line)
self.__write_obj.close() #convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
#convert all encodings to UTF8 to avoid unsupported encodings in lxml if self.__convert_utf or self.__bad_encoding:
if self.__convert_utf:
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
file_encoding = "utf-8"
if self.__bad_encoding:
file_encoding = "us-ascii"
with open(self.__file, 'r') as read_obj: with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as write_obj: with open(self.__write_to, 'w') as write_obj:
file = read_obj.read()
try: try:
file = file.decode(self.__encoding) write_objenc = EncodedFile(write_obj, self.__encoding,
write_obj.write(file.encode('utf-8')) file_encoding, 'strict')
for line in read_obj:
write_objenc.write(line)
except: except:
sys.stderr.write('Conversion to UTF-8 is not possible,' if self.__convert_utf:
sys.stderr.write('Conversion to UTF-8 is problematic,'
' encoding should be very carefully checked') ' encoding should be very carefully checked')
write_objenc = EncodedFile(write_obj, self.__encoding,
file_encoding, 'replace')
for line in read_obj:
write_objenc.write(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "convert_to_tags.data") copy_obj.copy_file(self.__write_to, "convert_to_tags.data")

View File

@ -10,7 +10,9 @@
# # # #
# # # #
######################################################################### #########################################################################
import sys, os, codecs import sys, os
# , codecs
class Output: class Output:
""" """
Output file Output file
@ -19,7 +21,8 @@ class Output:
file, file,
orig_file, orig_file,
output_dir = None, output_dir = None,
out_file = None out_file = None,
no_ask = True
): ):
""" """
Required: Required:
@ -33,8 +36,9 @@ class Output:
self.__file = file self.__file = file
self.__orig_file = orig_file self.__orig_file = orig_file
self.__output_dir = output_dir self.__output_dir = output_dir
self.__no_ask = 1 self.__no_ask = no_ask
self.__out_file = out_file self.__out_file = out_file
def output(self): def output(self):
""" """
Required: Required:
@ -45,13 +49,14 @@ class Output:
output the line to the screen if no output file given. Otherwise, output to output the line to the screen if no output file given. Otherwise, output to
the file. the file.
""" """
# self.__output_xml(self.__file, self.__out_file)
if self.__output_dir: if self.__output_dir:
self.__output_to_dir_func() self.__output_to_dir_func()
elif self.__out_file: elif self.__out_file:
self.__output_xml(self.__file, self.__out_file) self.__output_to_file_func()
# self.__output_xml(self.__file, self.__out_file)
else: else:
self.__output_to_standard_func() self.__output_to_standard_func()
def __output_to_dir_func(self): def __output_to_dir_func(self):
""" """
Requires: Requires:
@ -64,32 +69,25 @@ class Output:
""" """
base_name = os.path.basename(self.__orig_file) base_name = os.path.basename(self.__orig_file)
base_name, ext = os.path.splitext(base_name) base_name, ext = os.path.splitext(base_name)
output_file = '%s.xml' % base_name output_file = os.path.join(self.__output_dir, '%s.xml' % base_name)
output_file = os.path.join(self.__output_dir, output_file)
# change if user wants to output to a specific file # change if user wants to output to a specific file
if self.__out_file: if self.__out_file:
output_file = os.path.join(self.__output_dir, self.__out_file) output_file = os.path.join(self.__output_dir, self.__out_file)
user_response = 'o' user_response = 'o'
if os.path.isfile(output_file): if os.path.isfile(output_file) and not self.__no_ask:
if self.__no_ask: msg = 'Do you want to overwrite %s?\n' % output_file
user_response = 'o' msg += ('Type "o" to overwrite.\n'
else: 'Type any other key to print to standard output.\n')
msg = 'Do you want to over-write %s?\n' % output_file
msg += 'Type "o" to over-write.\n'
msg += 'Type any other key to print to standard output.\n'
sys.stderr.write(msg) sys.stderr.write(msg)
user_response = raw_input() user_response = raw_input()
if user_response == 'o': if user_response == 'o':
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
write_obj = open(output_file, 'w') with open(self.output_file, 'w') as write_obj:
line = 1 for line in read_obj:
while line:
line = read_obj.readline()
write_obj.write(line) write_obj.write(line)
read_obj.close()
write_obj.close()
else: else:
self.__output_to_standard_func() self.__output_to_standard_func()
def __output_to_file_func(self): def __output_to_file_func(self):
""" """
Required: Required:
@ -99,14 +97,11 @@ class Output:
Logic: Logic:
read one line at a time. Output to standard read one line at a time. Output to standard
""" """
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
write_obj = open(self.__out_file, 'w') with open(self.__out_file, 'w') as write_obj:
line = 1 for line in read_obj:
while line:
line = read_obj.readline()
write_obj.write(line) write_obj.write(line)
read_obj.close()
write_obj.close()
def __output_to_standard_func(self): def __output_to_standard_func(self):
""" """
Required: Required:
@ -116,26 +111,24 @@ class Output:
Logic: Logic:
read one line at a time. Output to standard read one line at a time. Output to standard
""" """
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
line = 1 for line in read_obj:
while line:
line = read_obj.readline()
sys.stdout.write(line) sys.stdout.write(line)
read_obj.close()
def __output_xml(self, in_file, out_file): # def __output_xml(self, in_file, out_file):
""" # """
output the ill-formed xml file # output the ill-formed xml file
""" # """
(utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8") # (utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
write_obj = utf8_writer(open(out_file, 'w')) # write_obj = utf8_writer(open(out_file, 'w'))
write_obj = open(out_file, 'w') # write_obj = open(out_file, 'w')
read_obj = utf8_writer(open(in_file, 'r')) # read_obj = utf8_writer(open(in_file, 'r'))
read_obj = open(in_file, 'r') # read_obj = open(in_file, 'r')
line = 1 # line = 1
while line: # while line:
line = read_obj.readline() # line = read_obj.readline()
if isinstance(line, type(u"")): # if isinstance(line, type(u"")):
line = line.encode("utf-8") # line = line.encode("utf-8")
write_obj.write(line) # write_obj.write(line)
read_obj.close() # read_obj.close()
write_obj.close() # write_obj.close()

View File

@ -2103,7 +2103,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
user_mi = mi.get_all_user_metadata(make_copy=False) user_mi = mi.get_all_user_metadata(make_copy=False)
for key in user_mi.iterkeys(): for key in user_mi.iterkeys():
if key in self.field_metadata and \ if key in self.field_metadata and \
user_mi[key]['datatype'] == self.field_metadata[key]['datatype']: user_mi[key]['datatype'] == self.field_metadata[key]['datatype'] and \
(user_mi[key]['datatype'] != 'text' or
user_mi[key]['is_multiple'] == self.field_metadata[key]['is_multiple']):
val = mi.get(key, None) val = mi.get(key, None)
if force_changes or val is not None: if force_changes or val is not None:
doit(self.set_custom, id, val=val, extra=mi.get_extra(key), doit(self.set_custom, id, val=val, extra=mi.get_extra(key),