mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge+RTF cleaning
This commit is contained in:
commit
ed972d487b
@ -49,6 +49,15 @@ class ANDROID(USBMS):
|
||||
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
|
||||
0x70c6 : [0x226]
|
||||
},
|
||||
# Freescale
|
||||
0x15a2 : {
|
||||
0x0c01 : [0x226]
|
||||
},
|
||||
|
||||
# Alcatel
|
||||
0x05c6 : {
|
||||
0x9018 : [0x0226],
|
||||
},
|
||||
|
||||
# Sony Ericsson
|
||||
0xfce : {
|
||||
@ -139,7 +148,8 @@ class ANDROID(USBMS):
|
||||
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
|
||||
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
|
||||
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
|
||||
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO']
|
||||
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
|
||||
'VIZIO', 'GOOGLE', 'FREESCAL']
|
||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||
@ -150,7 +160,7 @@ class ANDROID(USBMS):
|
||||
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
|
||||
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
|
||||
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
|
||||
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008']
|
||||
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||
|
@ -693,6 +693,8 @@ OptionRecommendation(name='sr3_replace',
|
||||
def unarchive(self, path, tdir):
|
||||
extract(path, tdir)
|
||||
files = list(walk(tdir))
|
||||
files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
|
||||
for f in files]
|
||||
from calibre.customize.ui import available_input_formats
|
||||
fmts = available_input_formats()
|
||||
for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)
|
||||
|
@ -1,4 +1,5 @@
|
||||
import os, tempfile, sys
|
||||
from codecs import EncodedFile
|
||||
|
||||
from calibre.ebooks.rtf2xml import copy, check_encoding
|
||||
|
||||
@ -40,6 +41,7 @@ class ConvertToTags:
|
||||
self.__run_level = run_level
|
||||
self.__write_to = tempfile.mktemp()
|
||||
self.__convert_utf = False
|
||||
self.__bad_encoding = False
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
@ -219,6 +221,7 @@ class ConvertToTags:
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
|
||||
' hope for the best')
|
||||
self.__bad_encoding = True
|
||||
self.__new_line = 0
|
||||
self.__write_new_line()
|
||||
if self.__no_dtd:
|
||||
@ -246,7 +249,7 @@ class ConvertToTags:
|
||||
the appropriate function.
|
||||
The functions that are called:
|
||||
a text function for text
|
||||
an open funciton for open tags
|
||||
an open function for open tags
|
||||
an open with attribute function for tags with attributes
|
||||
an empty with attribute function for tags that are empty but have
|
||||
attribtes.
|
||||
@ -262,20 +265,28 @@ class ConvertToTags:
|
||||
action = self.__state_dict.get(self.__token_info)
|
||||
if action is not None:
|
||||
action(line)
|
||||
self.__write_obj.close()
|
||||
#convert all encodings to UTF8 to avoid unsupported encodings in lxml
|
||||
if self.__convert_utf:
|
||||
#convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
|
||||
if self.__convert_utf or self.__bad_encoding:
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
copy_obj.rename(self.__write_to, self.__file)
|
||||
file_encoding = "utf-8"
|
||||
if self.__bad_encoding:
|
||||
file_encoding = "us-ascii"
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__write_to, 'w') as write_obj:
|
||||
file = read_obj.read()
|
||||
try:
|
||||
file = file.decode(self.__encoding)
|
||||
write_obj.write(file.encode('utf-8'))
|
||||
write_objenc = EncodedFile(write_obj, self.__encoding,
|
||||
file_encoding, 'strict')
|
||||
for line in read_obj:
|
||||
write_objenc.write(line)
|
||||
except:
|
||||
sys.stderr.write('Conversion to UTF-8 is not possible,'
|
||||
' encoding should be very carefully checked')
|
||||
if self.__convert_utf:
|
||||
sys.stderr.write('Conversion to UTF-8 is problematic,'
|
||||
' encoding should be very carefully checked')
|
||||
write_objenc = EncodedFile(write_obj, self.__encoding,
|
||||
file_encoding, 'replace')
|
||||
for line in read_obj:
|
||||
write_objenc.write(line)
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
|
||||
|
@ -10,7 +10,9 @@
|
||||
# #
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os, codecs
|
||||
import sys, os
|
||||
# , codecs
|
||||
|
||||
class Output:
|
||||
"""
|
||||
Output file
|
||||
@ -19,7 +21,8 @@ class Output:
|
||||
file,
|
||||
orig_file,
|
||||
output_dir = None,
|
||||
out_file = None
|
||||
out_file = None,
|
||||
no_ask = True
|
||||
):
|
||||
"""
|
||||
Required:
|
||||
@ -33,8 +36,9 @@ class Output:
|
||||
self.__file = file
|
||||
self.__orig_file = orig_file
|
||||
self.__output_dir = output_dir
|
||||
self.__no_ask = 1
|
||||
self.__no_ask = no_ask
|
||||
self.__out_file = out_file
|
||||
|
||||
def output(self):
|
||||
"""
|
||||
Required:
|
||||
@ -45,13 +49,14 @@ class Output:
|
||||
output the line to the screen if no output file given. Otherwise, output to
|
||||
the file.
|
||||
"""
|
||||
# self.__output_xml(self.__file, self.__out_file)
|
||||
if self.__output_dir:
|
||||
self.__output_to_dir_func()
|
||||
elif self.__out_file:
|
||||
self.__output_xml(self.__file, self.__out_file)
|
||||
self.__output_to_file_func()
|
||||
# self.__output_xml(self.__file, self.__out_file)
|
||||
else:
|
||||
self.__output_to_standard_func()
|
||||
|
||||
def __output_to_dir_func(self):
|
||||
"""
|
||||
Requires:
|
||||
@ -64,32 +69,25 @@ class Output:
|
||||
"""
|
||||
base_name = os.path.basename(self.__orig_file)
|
||||
base_name, ext = os.path.splitext(base_name)
|
||||
output_file = '%s.xml' % base_name
|
||||
output_file = os.path.join(self.__output_dir, output_file)
|
||||
output_file = os.path.join(self.__output_dir, '%s.xml' % base_name)
|
||||
# change if user wants to output to a specific file
|
||||
if self.__out_file:
|
||||
output_file = os.path.join(self.__output_dir, self.__out_file)
|
||||
user_response = 'o'
|
||||
if os.path.isfile(output_file):
|
||||
if self.__no_ask:
|
||||
user_response = 'o'
|
||||
else:
|
||||
msg = 'Do you want to over-write %s?\n' % output_file
|
||||
msg += 'Type "o" to over-write.\n'
|
||||
msg += 'Type any other key to print to standard output.\n'
|
||||
sys.stderr.write(msg)
|
||||
user_response = raw_input()
|
||||
if os.path.isfile(output_file) and not self.__no_ask:
|
||||
msg = 'Do you want to overwrite %s?\n' % output_file
|
||||
msg += ('Type "o" to overwrite.\n'
|
||||
'Type any other key to print to standard output.\n')
|
||||
sys.stderr.write(msg)
|
||||
user_response = raw_input()
|
||||
if user_response == 'o':
|
||||
read_obj = open(self.__file, 'r')
|
||||
write_obj = open(output_file, 'w')
|
||||
line = 1
|
||||
while line:
|
||||
line = read_obj.readline()
|
||||
write_obj.write(line)
|
||||
read_obj.close()
|
||||
write_obj.close()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.output_file, 'w') as write_obj:
|
||||
for line in read_obj:
|
||||
write_obj.write(line)
|
||||
else:
|
||||
self.__output_to_standard_func()
|
||||
|
||||
def __output_to_file_func(self):
|
||||
"""
|
||||
Required:
|
||||
@ -99,14 +97,11 @@ class Output:
|
||||
Logic:
|
||||
read one line at a time. Output to standard
|
||||
"""
|
||||
read_obj = open(self.__file, 'r')
|
||||
write_obj = open(self.__out_file, 'w')
|
||||
line = 1
|
||||
while line:
|
||||
line = read_obj.readline()
|
||||
write_obj.write(line)
|
||||
read_obj.close()
|
||||
write_obj.close()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__out_file, 'w') as write_obj:
|
||||
for line in read_obj:
|
||||
write_obj.write(line)
|
||||
|
||||
def __output_to_standard_func(self):
|
||||
"""
|
||||
Required:
|
||||
@ -116,26 +111,24 @@ class Output:
|
||||
Logic:
|
||||
read one line at a time. Output to standard
|
||||
"""
|
||||
read_obj = open(self.__file, 'r')
|
||||
line = 1
|
||||
while line:
|
||||
line = read_obj.readline()
|
||||
sys.stdout.write(line)
|
||||
read_obj.close()
|
||||
def __output_xml(self, in_file, out_file):
|
||||
"""
|
||||
output the ill-formed xml file
|
||||
"""
|
||||
(utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
|
||||
write_obj = utf8_writer(open(out_file, 'w'))
|
||||
write_obj = open(out_file, 'w')
|
||||
read_obj = utf8_writer(open(in_file, 'r'))
|
||||
read_obj = open(in_file, 'r')
|
||||
line = 1
|
||||
while line:
|
||||
line = read_obj.readline()
|
||||
if isinstance(line, type(u"")):
|
||||
line = line.encode("utf-8")
|
||||
write_obj.write(line)
|
||||
read_obj.close()
|
||||
write_obj.close()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
for line in read_obj:
|
||||
sys.stdout.write(line)
|
||||
|
||||
# def __output_xml(self, in_file, out_file):
|
||||
# """
|
||||
# output the ill-formed xml file
|
||||
# """
|
||||
# (utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
|
||||
# write_obj = utf8_writer(open(out_file, 'w'))
|
||||
# write_obj = open(out_file, 'w')
|
||||
# read_obj = utf8_writer(open(in_file, 'r'))
|
||||
# read_obj = open(in_file, 'r')
|
||||
# line = 1
|
||||
# while line:
|
||||
# line = read_obj.readline()
|
||||
# if isinstance(line, type(u"")):
|
||||
# line = line.encode("utf-8")
|
||||
# write_obj.write(line)
|
||||
# read_obj.close()
|
||||
# write_obj.close()
|
||||
|
@ -2103,7 +2103,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
user_mi = mi.get_all_user_metadata(make_copy=False)
|
||||
for key in user_mi.iterkeys():
|
||||
if key in self.field_metadata and \
|
||||
user_mi[key]['datatype'] == self.field_metadata[key]['datatype']:
|
||||
user_mi[key]['datatype'] == self.field_metadata[key]['datatype'] and \
|
||||
(user_mi[key]['datatype'] != 'text' or
|
||||
user_mi[key]['is_multiple'] == self.field_metadata[key]['is_multiple']):
|
||||
val = mi.get(key, None)
|
||||
if force_changes or val is not None:
|
||||
doit(self.set_custom, id, val=val, extra=mi.get_extra(key),
|
||||
|
Loading…
x
Reference in New Issue
Block a user