mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
RTF: First modification for chinese language addition
This commit is contained in:
parent
ed972d487b
commit
0dc5004ae1
@ -14,7 +14,6 @@
|
||||
import sys, os, tempfile
|
||||
|
||||
from calibre.ebooks.rtf2xml import copy, check_brackets
|
||||
# note to self. This is the first module in which I use tempfile. A good idea?
|
||||
|
||||
class AddBrackets:
|
||||
"""
|
||||
@ -54,8 +53,8 @@ class AddBrackets:
|
||||
self.__state = 'before_body'
|
||||
self.__inline = {}
|
||||
self.__temp_group = []
|
||||
self.__open_bracket = 0
|
||||
self.__found_brackets = 0
|
||||
self.__open_bracket = False
|
||||
self.__found_brackets = False
|
||||
self.__accept = [
|
||||
'cw<ci<bold______',
|
||||
'cw<ci<annotation' ,
|
||||
@ -102,7 +101,7 @@ class AddBrackets:
|
||||
)
|
||||
self.__write_obj.write(line)
|
||||
elif self.__token_info == 'ob<nu<open-brack':
|
||||
self.__found_brackets = 1
|
||||
self.__found_brackets = True
|
||||
self.__state = 'in_ignore'
|
||||
self.__ignore_count = self.__ob_count
|
||||
self.__write_obj.write(line)
|
||||
@ -134,7 +133,7 @@ class AddBrackets:
|
||||
self.__write_obj.write(
|
||||
'cb<nu<clos-brack<0003\n'
|
||||
)
|
||||
self.__open_bracket = 0
|
||||
self.__open_bracket = False
|
||||
inline_string = ''
|
||||
the_keys = self.__inline.keys()
|
||||
for the_key in the_keys:
|
||||
@ -144,7 +143,7 @@ class AddBrackets:
|
||||
if inline_string:
|
||||
self.__write_obj.write('ob<nu<open-brack<0003\n')
|
||||
self.__write_obj.write(inline_string)
|
||||
self.__open_bracket = 1
|
||||
self.__open_bracket = True
|
||||
self.__temp_group = []
|
||||
|
||||
def __change_permanent_group(self):
|
||||
@ -172,32 +171,28 @@ class AddBrackets:
|
||||
(file = in_file)
|
||||
good_br = self.__check_brack_obj.check_brackets()[0]
|
||||
if not good_br:
|
||||
return 1
|
||||
return True
|
||||
|
||||
def add_brackets(self):
|
||||
"""
|
||||
"""
|
||||
self.__initiate_values()
|
||||
read_obj = open(self.__file, 'r')
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
line_to_read = 1
|
||||
while line_to_read:
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
self.__token_info = line[:16]
|
||||
if self.__token_info == 'ob<nu<open-brack':
|
||||
self.__ob_count = line[-5:-1]
|
||||
if self.__token_info == 'cb<nu<clos-brack':
|
||||
self.__cb_count = line[-5:-1]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action == None:
|
||||
sys.stderr.write('No matching state in module add_brackets.py\n')
|
||||
sys.stderr.write(self.__state + '\n')
|
||||
action(line)
|
||||
read_obj.close()
|
||||
self.__write_obj.close()
|
||||
bad_brackets = self.__check_brackets(self.__write_to)
|
||||
if not bad_brackets:
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
if self.__token_info == 'ob<nu<open-brack':
|
||||
self.__ob_count = line[-5:-1]
|
||||
if self.__token_info == 'cb<nu<clos-brack':
|
||||
self.__cb_count = line[-5:-1]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action is None:
|
||||
sys.stderr.write(
|
||||
'No matching state in module add_brackets.py\n'
|
||||
'%s\n' % self.__state)
|
||||
action(line)
|
||||
#Check bad brackets
|
||||
if not self.__check_brackets(self.__write_to):
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "add_brackets.data")
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -214,7 +214,7 @@ class ConvertToTags:
|
||||
|
||||
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
|
||||
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding, verbose=False):
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
|
||||
self.__convert_utf = True
|
||||
else:
|
||||
@ -274,19 +274,10 @@ class ConvertToTags:
|
||||
file_encoding = "us-ascii"
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__write_to, 'w') as write_obj:
|
||||
try:
|
||||
write_objenc = EncodedFile(write_obj, self.__encoding,
|
||||
file_encoding, 'strict')
|
||||
for line in read_obj:
|
||||
write_objenc.write(line)
|
||||
except:
|
||||
if self.__convert_utf:
|
||||
sys.stderr.write('Conversion to UTF-8 is problematic,'
|
||||
' encoding should be very carefully checked')
|
||||
write_objenc = EncodedFile(write_obj, self.__encoding,
|
||||
file_encoding, 'replace')
|
||||
for line in read_obj:
|
||||
write_objenc.write(line)
|
||||
write_objenc = EncodedFile(write_obj, self.__encoding,
|
||||
file_encoding, 'replace')
|
||||
for line in read_obj:
|
||||
write_objenc.write(line)
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
|
||||
|
@ -180,7 +180,7 @@ class Hex2Utf8:
|
||||
self.__dingbats_dict.update(dingbats_base_dict)
|
||||
self.__dingbats_dict.update(ms_dingbats_dict)
|
||||
# load dictionary for caps, and make a string for the replacement
|
||||
self.__caps_uni_dict = char_map_obj.get_char_map(map='caps_uni')
|
||||
self.__caps_uni_dict = char_map_obj.get_char_map(map = 'caps_uni')
|
||||
# # print self.__caps_uni_dict
|
||||
# don't think I'll need this
|
||||
##keys = self.__caps_uni_dict.keys()
|
||||
|
Loading…
x
Reference in New Issue
Block a user