mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
RTF: First modification for chinese language addition
This commit is contained in:
parent
ed972d487b
commit
0dc5004ae1
@ -14,7 +14,6 @@
|
|||||||
import sys, os, tempfile
|
import sys, os, tempfile
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import copy, check_brackets
|
from calibre.ebooks.rtf2xml import copy, check_brackets
|
||||||
# note to self. This is the first module in which I use tempfile. A good idea?
|
|
||||||
|
|
||||||
class AddBrackets:
|
class AddBrackets:
|
||||||
"""
|
"""
|
||||||
@ -54,8 +53,8 @@ class AddBrackets:
|
|||||||
self.__state = 'before_body'
|
self.__state = 'before_body'
|
||||||
self.__inline = {}
|
self.__inline = {}
|
||||||
self.__temp_group = []
|
self.__temp_group = []
|
||||||
self.__open_bracket = 0
|
self.__open_bracket = False
|
||||||
self.__found_brackets = 0
|
self.__found_brackets = False
|
||||||
self.__accept = [
|
self.__accept = [
|
||||||
'cw<ci<bold______',
|
'cw<ci<bold______',
|
||||||
'cw<ci<annotation' ,
|
'cw<ci<annotation' ,
|
||||||
@ -102,7 +101,7 @@ class AddBrackets:
|
|||||||
)
|
)
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
elif self.__token_info == 'ob<nu<open-brack':
|
elif self.__token_info == 'ob<nu<open-brack':
|
||||||
self.__found_brackets = 1
|
self.__found_brackets = True
|
||||||
self.__state = 'in_ignore'
|
self.__state = 'in_ignore'
|
||||||
self.__ignore_count = self.__ob_count
|
self.__ignore_count = self.__ob_count
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
@ -134,7 +133,7 @@ class AddBrackets:
|
|||||||
self.__write_obj.write(
|
self.__write_obj.write(
|
||||||
'cb<nu<clos-brack<0003\n'
|
'cb<nu<clos-brack<0003\n'
|
||||||
)
|
)
|
||||||
self.__open_bracket = 0
|
self.__open_bracket = False
|
||||||
inline_string = ''
|
inline_string = ''
|
||||||
the_keys = self.__inline.keys()
|
the_keys = self.__inline.keys()
|
||||||
for the_key in the_keys:
|
for the_key in the_keys:
|
||||||
@ -144,7 +143,7 @@ class AddBrackets:
|
|||||||
if inline_string:
|
if inline_string:
|
||||||
self.__write_obj.write('ob<nu<open-brack<0003\n')
|
self.__write_obj.write('ob<nu<open-brack<0003\n')
|
||||||
self.__write_obj.write(inline_string)
|
self.__write_obj.write(inline_string)
|
||||||
self.__open_bracket = 1
|
self.__open_bracket = True
|
||||||
self.__temp_group = []
|
self.__temp_group = []
|
||||||
|
|
||||||
def __change_permanent_group(self):
|
def __change_permanent_group(self):
|
||||||
@ -172,32 +171,28 @@ class AddBrackets:
|
|||||||
(file = in_file)
|
(file = in_file)
|
||||||
good_br = self.__check_brack_obj.check_brackets()[0]
|
good_br = self.__check_brack_obj.check_brackets()[0]
|
||||||
if not good_br:
|
if not good_br:
|
||||||
return 1
|
return True
|
||||||
|
|
||||||
def add_brackets(self):
|
def add_brackets(self):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
read_obj = open(self.__file, 'r')
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
line_to_read = 1
|
for line in read_obj:
|
||||||
while line_to_read:
|
self.__token_info = line[:16]
|
||||||
line_to_read = read_obj.readline()
|
if self.__token_info == 'ob<nu<open-brack':
|
||||||
line = line_to_read
|
self.__ob_count = line[-5:-1]
|
||||||
self.__token_info = line[:16]
|
if self.__token_info == 'cb<nu<clos-brack':
|
||||||
if self.__token_info == 'ob<nu<open-brack':
|
self.__cb_count = line[-5:-1]
|
||||||
self.__ob_count = line[-5:-1]
|
action = self.__state_dict.get(self.__state)
|
||||||
if self.__token_info == 'cb<nu<clos-brack':
|
if action is None:
|
||||||
self.__cb_count = line[-5:-1]
|
sys.stderr.write(
|
||||||
action = self.__state_dict.get(self.__state)
|
'No matching state in module add_brackets.py\n'
|
||||||
if action == None:
|
'%s\n' % self.__state)
|
||||||
sys.stderr.write('No matching state in module add_brackets.py\n')
|
action(line)
|
||||||
sys.stderr.write(self.__state + '\n')
|
#Check bad brackets
|
||||||
action(line)
|
if not self.__check_brackets(self.__write_to):
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
bad_brackets = self.__check_brackets(self.__write_to)
|
|
||||||
if not bad_brackets:
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "add_brackets.data")
|
copy_obj.copy_file(self.__write_to, "add_brackets.data")
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -214,7 +214,7 @@ class ConvertToTags:
|
|||||||
|
|
||||||
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
|
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
|
||||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||||
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
|
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding, verbose=False):
|
||||||
self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
|
self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
|
||||||
self.__convert_utf = True
|
self.__convert_utf = True
|
||||||
else:
|
else:
|
||||||
@ -274,19 +274,10 @@ class ConvertToTags:
|
|||||||
file_encoding = "us-ascii"
|
file_encoding = "us-ascii"
|
||||||
with open(self.__file, 'r') as read_obj:
|
with open(self.__file, 'r') as read_obj:
|
||||||
with open(self.__write_to, 'w') as write_obj:
|
with open(self.__write_to, 'w') as write_obj:
|
||||||
try:
|
write_objenc = EncodedFile(write_obj, self.__encoding,
|
||||||
write_objenc = EncodedFile(write_obj, self.__encoding,
|
file_encoding, 'replace')
|
||||||
file_encoding, 'strict')
|
for line in read_obj:
|
||||||
for line in read_obj:
|
write_objenc.write(line)
|
||||||
write_objenc.write(line)
|
|
||||||
except:
|
|
||||||
if self.__convert_utf:
|
|
||||||
sys.stderr.write('Conversion to UTF-8 is problematic,'
|
|
||||||
' encoding should be very carefully checked')
|
|
||||||
write_objenc = EncodedFile(write_obj, self.__encoding,
|
|
||||||
file_encoding, 'replace')
|
|
||||||
for line in read_obj:
|
|
||||||
write_objenc.write(line)
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
|
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
|
||||||
|
@ -180,7 +180,7 @@ class Hex2Utf8:
|
|||||||
self.__dingbats_dict.update(dingbats_base_dict)
|
self.__dingbats_dict.update(dingbats_base_dict)
|
||||||
self.__dingbats_dict.update(ms_dingbats_dict)
|
self.__dingbats_dict.update(ms_dingbats_dict)
|
||||||
# load dictionary for caps, and make a string for the replacement
|
# load dictionary for caps, and make a string for the replacement
|
||||||
self.__caps_uni_dict = char_map_obj.get_char_map(map='caps_uni')
|
self.__caps_uni_dict = char_map_obj.get_char_map(map = 'caps_uni')
|
||||||
# # print self.__caps_uni_dict
|
# # print self.__caps_uni_dict
|
||||||
# don't think I'll need this
|
# don't think I'll need this
|
||||||
##keys = self.__caps_uni_dict.keys()
|
##keys = self.__caps_uni_dict.keys()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user