mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
RTF hex_2_utf8 cleaning
This commit is contained in:
parent
12e1c5219b
commit
ba1e8510fa
@ -115,7 +115,7 @@ class Hex2Utf8:
|
|||||||
"""
|
"""
|
||||||
self.__file=file
|
self.__file=file
|
||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
if area_to_convert != 'preamble' and area_to_convert != 'body':
|
if area_to_convert not in ('preamble', 'body'):
|
||||||
msg = (
|
msg = (
|
||||||
'in module "hex_2_utf8.py\n'
|
'in module "hex_2_utf8.py\n'
|
||||||
'"area_to_convert" must be "body" or "preamble"\n'
|
'"area_to_convert" must be "body" or "preamble"\n'
|
||||||
@ -143,18 +143,19 @@ class Hex2Utf8:
|
|||||||
Set values, including those for the dictionaries.
|
Set values, including those for the dictionaries.
|
||||||
The file that contains the maps is broken down into many different
|
The file that contains the maps is broken down into many different
|
||||||
sets. For example, for the Symbol font, there is the standard part for
|
sets. For example, for the Symbol font, there is the standard part for
|
||||||
hexidecimal numbers, and the the part for Microsoft charcters. Read
|
hexidecimal numbers, and the part for Microsoft characters. Read
|
||||||
each part in, and then combine them.
|
each part in, and then combine them.
|
||||||
"""
|
"""
|
||||||
# the default encoding system, the lower map for characters 0 through
|
# the default encoding system, the lower map for characters 0 through
|
||||||
# 128, and the encoding system for Microsoft characters.
|
# 128, and the encoding system for Microsoft characters.
|
||||||
# New on 2004-05-8: the self.__char_map is not in diretory with other
|
# New on 2004-05-8: the self.__char_map is not in directory with other
|
||||||
# modules
|
# modules
|
||||||
self.__char_file = cStringIO.StringIO(char_set)
|
self.__char_file = cStringIO.StringIO(char_set)
|
||||||
char_map_obj = get_char_map.GetCharMap(
|
char_map_obj = get_char_map.GetCharMap(
|
||||||
char_file = self.__char_file,
|
char_file = self.__char_file,
|
||||||
bug_handler = self.__bug_handler,
|
bug_handler = self.__bug_handler,
|
||||||
)
|
)
|
||||||
|
print self.__default_char_map
|
||||||
up_128_dict = char_map_obj.get_char_map(map=self.__default_char_map)
|
up_128_dict = char_map_obj.get_char_map(map=self.__default_char_map)
|
||||||
bt_128_dict = char_map_obj.get_char_map(map = 'bottom_128')
|
bt_128_dict = char_map_obj.get_char_map(map = 'bottom_128')
|
||||||
ms_standard_dict = char_map_obj.get_char_map(map = 'ms_standard')
|
ms_standard_dict = char_map_obj.get_char_map(map = 'ms_standard')
|
||||||
@ -195,7 +196,6 @@ class Hex2Utf8:
|
|||||||
'body' : self.__body_func,
|
'body' : self.__body_func,
|
||||||
'mi<mk<body-open_' : self.__found_body_func,
|
'mi<mk<body-open_' : self.__found_body_func,
|
||||||
'tx<hx<__________' : self.__hex_text_func,
|
'tx<hx<__________' : self.__hex_text_func,
|
||||||
# 'tx<nu<__________' : self.__text_func,
|
|
||||||
}
|
}
|
||||||
self.__body_state_dict = {
|
self.__body_state_dict = {
|
||||||
'preamble' : self.__preamble_for_body_func,
|
'preamble' : self.__preamble_for_body_func,
|
||||||
@ -235,9 +235,7 @@ class Hex2Utf8:
|
|||||||
font = self.__current_dict_name
|
font = self.__current_dict_name
|
||||||
if self.__convert_caps\
|
if self.__convert_caps\
|
||||||
and self.__caps_list[-1] == 'true'\
|
and self.__caps_list[-1] == 'true'\
|
||||||
and font != 'Symbol'\
|
and font not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
|
||||||
and font != 'Wingdings'\
|
|
||||||
and font != 'Zapf Dingbats':
|
|
||||||
converted = self.__utf_token_to_caps_func(converted)
|
converted = self.__utf_token_to_caps_func(converted)
|
||||||
self.__write_obj.write(
|
self.__write_obj.write(
|
||||||
'tx<ut<__________<%s\n' % converted
|
'tx<ut<__________<%s\n' % converted
|
||||||
@ -247,9 +245,7 @@ class Hex2Utf8:
|
|||||||
font = self.__current_dict_name
|
font = self.__current_dict_name
|
||||||
if self.__convert_caps\
|
if self.__convert_caps\
|
||||||
and self.__caps_list[-1] == 'true'\
|
and self.__caps_list[-1] == 'true'\
|
||||||
and font != 'Symbol'\
|
and font not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
|
||||||
and font != 'Wingdings'\
|
|
||||||
and font != 'Zapf Dingbats':
|
|
||||||
converted = converted.upper()
|
converted = converted.upper()
|
||||||
self.__write_obj.write(
|
self.__write_obj.write(
|
||||||
'tx<nu<__________<%s\n' % converted
|
'tx<nu<__________<%s\n' % converted
|
||||||
@ -289,7 +285,7 @@ class Hex2Utf8:
|
|||||||
|
|
||||||
def __convert_preamble(self):
|
def __convert_preamble(self):
|
||||||
self.__state = 'preamble'
|
self.__state = 'preamble'
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
with open(self.__file, 'r') as read_obj:
|
with open(self.__file, 'r') as read_obj:
|
||||||
for line in read_obj:
|
for line in read_obj:
|
||||||
self.__token_info = line[:16]
|
self.__token_info = line[:16]
|
||||||
@ -299,7 +295,6 @@ class Hex2Utf8:
|
|||||||
self.__state
|
self.__state
|
||||||
)
|
)
|
||||||
action(line)
|
action(line)
|
||||||
self.__write_obj.close()
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data")
|
copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data")
|
||||||
@ -468,9 +463,9 @@ class Hex2Utf8:
|
|||||||
if len(self.__caps_list) > 1:
|
if len(self.__caps_list) > 1:
|
||||||
self.__caps_list.pop()
|
self.__caps_list.pop()
|
||||||
else:
|
else:
|
||||||
sys.stderr.write('Module is hex_2_utf8\n')
|
sys.stderr.write('Module is hex_2_utf8\n'
|
||||||
sys.stderr.write('method is __end_caps_func\n')
|
'method is __end_caps_func\n'
|
||||||
sys.stderr.write('caps list should be more than one?\n') #self.__in_caps not set
|
'caps list should be more than one?\n') #self.__in_caps not set
|
||||||
|
|
||||||
def __text_func(self, line):
|
def __text_func(self, line):
|
||||||
"""
|
"""
|
||||||
@ -493,8 +488,7 @@ class Hex2Utf8:
|
|||||||
hex_num = '\'%s' % hex_num
|
hex_num = '\'%s' % hex_num
|
||||||
converted = self.__current_dict.get(hex_num)
|
converted = self.__current_dict.get(hex_num)
|
||||||
if converted is None:
|
if converted is None:
|
||||||
sys.stderr.write('module is hex_2_ut8\n')
|
sys.stderr.write('module is hex_2_ut8\nmethod is __text_func\n')
|
||||||
sys.stderr.write('method is __text_func\n')
|
|
||||||
sys.stderr.write('no hex value for "%s"\n' % hex_num)
|
sys.stderr.write('no hex value for "%s"\n' % hex_num)
|
||||||
else:
|
else:
|
||||||
the_string += converted
|
the_string += converted
|
||||||
@ -550,7 +544,7 @@ class Hex2Utf8:
|
|||||||
def __convert_body(self):
|
def __convert_body(self):
|
||||||
self.__state = 'body'
|
self.__state = 'body'
|
||||||
with open(self.__file, 'r') as read_obj:
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
for line in read_obj:
|
for line in read_obj:
|
||||||
self.__token_info = line[:16]
|
self.__token_info = line[:16]
|
||||||
action = self.__body_state_dict.get(self.__state)
|
action = self.__body_state_dict.get(self.__state)
|
||||||
@ -559,7 +553,6 @@ class Hex2Utf8:
|
|||||||
self.__state
|
self.__state
|
||||||
)
|
)
|
||||||
action(line)
|
action(line)
|
||||||
self.__write_obj.close()
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
|
copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user