Merge+RTF cleaning

This commit is contained in:
Sengian 2011-10-18 23:09:40 +02:00
commit ed972d487b
5 changed files with 85 additions and 67 deletions

View File

@ -49,6 +49,15 @@ class ANDROID(USBMS):
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
0x70c6 : [0x226]
},
# Freescale
0x15a2 : {
0x0c01 : [0x226]
},
# Alcatel
0x05c6 : {
0x9018 : [0x0226],
},
# Sony Ericsson
0xfce : {
@ -139,7 +148,8 @@ class ANDROID(USBMS):
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO']
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
'VIZIO', 'GOOGLE', 'FREESCAL']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -150,7 +160,7 @@ class ANDROID(USBMS):
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008']
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',

View File

@ -693,6 +693,8 @@ OptionRecommendation(name='sr3_replace',
def unarchive(self, path, tdir):
extract(path, tdir)
files = list(walk(tdir))
files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
for f in files]
from calibre.customize.ui import available_input_formats
fmts = available_input_formats()
for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)

View File

@ -1,4 +1,5 @@
import os, tempfile, sys
from codecs import EncodedFile
from calibre.ebooks.rtf2xml import copy, check_encoding
@ -40,6 +41,7 @@ class ConvertToTags:
self.__run_level = run_level
self.__write_to = tempfile.mktemp()
self.__convert_utf = False
self.__bad_encoding = False
def __initiate_values(self):
"""
@ -219,6 +221,7 @@ class ConvertToTags:
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
' hope for the best')
self.__bad_encoding = True
self.__new_line = 0
self.__write_new_line()
if self.__no_dtd:
@ -246,7 +249,7 @@ class ConvertToTags:
the appropriate function.
The functions that are called:
a text function for text
an open funciton for open tags
an open function for open tags
an open with attribute function for tags with attributes
an empty with attribute function for tags that are empty but have
attribtes.
@ -262,20 +265,28 @@ class ConvertToTags:
action = self.__state_dict.get(self.__token_info)
if action is not None:
action(line)
self.__write_obj.close()
#convert all encodings to UTF8 to avoid unsupported encodings in lxml
if self.__convert_utf:
#convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
if self.__convert_utf or self.__bad_encoding:
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
copy_obj.rename(self.__write_to, self.__file)
file_encoding = "utf-8"
if self.__bad_encoding:
file_encoding = "us-ascii"
with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as write_obj:
file = read_obj.read()
try:
file = file.decode(self.__encoding)
write_obj.write(file.encode('utf-8'))
write_objenc = EncodedFile(write_obj, self.__encoding,
file_encoding, 'strict')
for line in read_obj:
write_objenc.write(line)
except:
sys.stderr.write('Conversion to UTF-8 is not possible,'
' encoding should be very carefully checked')
if self.__convert_utf:
sys.stderr.write('Conversion to UTF-8 is problematic,'
' encoding should be very carefully checked')
write_objenc = EncodedFile(write_obj, self.__encoding,
file_encoding, 'replace')
for line in read_obj:
write_objenc.write(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")

View File

@ -10,7 +10,9 @@
# #
# #
#########################################################################
import sys, os, codecs
import sys, os
# , codecs
class Output:
"""
Output file
@ -19,7 +21,8 @@ class Output:
file,
orig_file,
output_dir = None,
out_file = None
out_file = None,
no_ask = True
):
"""
Required:
@ -33,8 +36,9 @@ class Output:
self.__file = file
self.__orig_file = orig_file
self.__output_dir = output_dir
self.__no_ask = 1
self.__no_ask = no_ask
self.__out_file = out_file
def output(self):
"""
Required:
@ -45,13 +49,14 @@ class Output:
output the line to the screen if no output file given. Otherwise, output to
the file.
"""
# self.__output_xml(self.__file, self.__out_file)
if self.__output_dir:
self.__output_to_dir_func()
elif self.__out_file:
self.__output_xml(self.__file, self.__out_file)
self.__output_to_file_func()
# self.__output_xml(self.__file, self.__out_file)
else:
self.__output_to_standard_func()
def __output_to_dir_func(self):
"""
Requires:
@ -64,32 +69,25 @@ class Output:
"""
base_name = os.path.basename(self.__orig_file)
base_name, ext = os.path.splitext(base_name)
output_file = '%s.xml' % base_name
output_file = os.path.join(self.__output_dir, output_file)
output_file = os.path.join(self.__output_dir, '%s.xml' % base_name)
# change if user wants to output to a specific file
if self.__out_file:
output_file = os.path.join(self.__output_dir, self.__out_file)
user_response = 'o'
if os.path.isfile(output_file):
if self.__no_ask:
user_response = 'o'
else:
msg = 'Do you want to over-write %s?\n' % output_file
msg += 'Type "o" to over-write.\n'
msg += 'Type any other key to print to standard output.\n'
sys.stderr.write(msg)
user_response = raw_input()
if os.path.isfile(output_file) and not self.__no_ask:
msg = 'Do you want to overwrite %s?\n' % output_file
msg += ('Type "o" to overwrite.\n'
'Type any other key to print to standard output.\n')
sys.stderr.write(msg)
user_response = raw_input()
if user_response == 'o':
read_obj = open(self.__file, 'r')
write_obj = open(output_file, 'w')
line = 1
while line:
line = read_obj.readline()
write_obj.write(line)
read_obj.close()
write_obj.close()
with open(self.__file, 'r') as read_obj:
with open(self.output_file, 'w') as write_obj:
for line in read_obj:
write_obj.write(line)
else:
self.__output_to_standard_func()
def __output_to_file_func(self):
"""
Required:
@ -99,14 +97,11 @@ class Output:
Logic:
read one line at a time. Output to standard
"""
read_obj = open(self.__file, 'r')
write_obj = open(self.__out_file, 'w')
line = 1
while line:
line = read_obj.readline()
write_obj.write(line)
read_obj.close()
write_obj.close()
with open(self.__file, 'r') as read_obj:
with open(self.__out_file, 'w') as write_obj:
for line in read_obj:
write_obj.write(line)
def __output_to_standard_func(self):
"""
Required:
@ -116,26 +111,24 @@ class Output:
Logic:
read one line at a time. Output to standard
"""
read_obj = open(self.__file, 'r')
line = 1
while line:
line = read_obj.readline()
sys.stdout.write(line)
read_obj.close()
def __output_xml(self, in_file, out_file):
"""
output the ill-formed xml file
"""
(utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
write_obj = utf8_writer(open(out_file, 'w'))
write_obj = open(out_file, 'w')
read_obj = utf8_writer(open(in_file, 'r'))
read_obj = open(in_file, 'r')
line = 1
while line:
line = read_obj.readline()
if isinstance(line, type(u"")):
line = line.encode("utf-8")
write_obj.write(line)
read_obj.close()
write_obj.close()
with open(self.__file, 'r') as read_obj:
for line in read_obj:
sys.stdout.write(line)
# def __output_xml(self, in_file, out_file):
# """
# output the ill-formed xml file
# """
# (utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
# write_obj = utf8_writer(open(out_file, 'w'))
# write_obj = open(out_file, 'w')
# read_obj = utf8_writer(open(in_file, 'r'))
# read_obj = open(in_file, 'r')
# line = 1
# while line:
# line = read_obj.readline()
# if isinstance(line, type(u"")):
# line = line.encode("utf-8")
# write_obj.write(line)
# read_obj.close()
# write_obj.close()

View File

@ -2103,7 +2103,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
user_mi = mi.get_all_user_metadata(make_copy=False)
for key in user_mi.iterkeys():
if key in self.field_metadata and \
user_mi[key]['datatype'] == self.field_metadata[key]['datatype']:
user_mi[key]['datatype'] == self.field_metadata[key]['datatype'] and \
(user_mi[key]['datatype'] != 'text' or
user_mi[key]['is_multiple'] == self.field_metadata[key]['is_multiple']):
val = mi.get(key, None)
if force_changes or val is not None:
doit(self.set_custom, id, val=val, extra=mi.get_extra(key),