various lttle modification in rtf2xml

This commit is contained in:
Sengian 2011-01-07 07:36:20 +01:00
parent 18df9457bb
commit b2187360ec
3 changed files with 59 additions and 46 deletions

View File

@ -326,6 +326,7 @@ class ParseRtf:
invalid_rtf_handler = InvalidRtfException, invalid_rtf_handler = InvalidRtfException,
) )
hex2utf_obj.convert_hex_2_utf8() hex2utf_obj.convert_hex_2_utf8()
# raise RtfInvalidCodeException, 'stop'
self.__bracket_match('hex_2_utf_preamble') self.__bracket_match('hex_2_utf_preamble')
fonts_obj = fonts.Fonts( fonts_obj = fonts.Fonts(
in_file = self.__temp_file, in_file = self.__temp_file,
@ -381,7 +382,7 @@ class ParseRtf:
msg += 'self.__run_level is "%s"\n' % self.__run_level msg += 'self.__run_level is "%s"\n' % self.__run_level
raise RtfInvalidCodeException, msg raise RtfInvalidCodeException, msg
if self.__run_level > 1: if self.__run_level > 1:
sys.stderr.write('File could be older RTF...\n') sys.stderr.write(_('File could be older RTF...\n'))
if found_destination: if found_destination:
if self.__run_level > 1: if self.__run_level > 1:
sys.stderr.write(_( sys.stderr.write(_(

View File

@ -54,10 +54,10 @@ class Hex2Utf8:
'convert_to_caps'--wether to convert caps to utf-8 'convert_to_caps'--wether to convert caps to utf-8
Returns: Returns:
nothing nothing
""" """
self.__file = in_file self.__file = in_file
self.__copy = copy self.__copy = copy
if area_to_convert != 'preamble' and area_to_convert != 'body': if area_to_convert not in ('preamble', 'body'):
msg = ( msg = (
'Developer error! Wrong flag.\n' 'Developer error! Wrong flag.\n'
'in module "hex_2_utf8.py\n' 'in module "hex_2_utf8.py\n'
@ -79,7 +79,8 @@ class Hex2Utf8:
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__invalid_rtf_handler = invalid_rtf_handler self.__invalid_rtf_handler = invalid_rtf_handler
def update_values( self,
def update_values(self,
file, file,
area_to_convert, area_to_convert,
char_file, char_file,
@ -132,6 +133,7 @@ class Hex2Utf8:
# self.__convert_symbol = 0 # self.__convert_symbol = 0
# self.__convert_wingdings = 0 # self.__convert_wingdings = 0
# self.__convert_zapf = 0 # self.__convert_zapf = 0
def __initiate_values(self): def __initiate_values(self):
""" """
Required: Required:
@ -191,6 +193,7 @@ class Hex2Utf8:
'body' : self.__body_func, 'body' : self.__body_func,
'mi<mk<body-open_' : self.__found_body_func, 'mi<mk<body-open_' : self.__found_body_func,
'tx<hx<__________' : self.__hex_text_func, 'tx<hx<__________' : self.__hex_text_func,
# 'tx<nu<__________' : self.__text_func,
} }
self.__body_state_dict = { self.__body_state_dict = {
'preamble' : self.__preamble_for_body_func, 'preamble' : self.__preamble_for_body_func,
@ -209,6 +212,7 @@ class Hex2Utf8:
} }
self.__caps_list = ['false'] self.__caps_list = ['false']
self.__font_list = ['not-defined'] self.__font_list = ['not-defined']
def __hex_text_func(self, line): def __hex_text_func(self, line):
""" """
Required: Required:
@ -218,12 +222,12 @@ class Hex2Utf8:
token is in the dictionary, then check if the value starts with a token is in the dictionary, then check if the value starts with a
"&". If it does, then tag the result as utf text. Otherwise, tag it "&". If it does, then tag the result as utf text. Otherwise, tag it
as normal text. as normal text.
If the nex_num is not in the dictionary, then a mistake has been If the hex_num is not in the dictionary, then a mistake has been
made. made.
""" """
hex_num = line[17:-1] hex_num = line[17:-1]
converted = self.__current_dict.get(hex_num) converted = self.__current_dict.get(hex_num)
if converted != None: if converted is not None:
# tag as utf-8 # tag as utf-8
if converted[0:1] == "&": if converted[0:1] == "&":
font = self.__current_dict_name font = self.__current_dict_name
@ -261,44 +265,45 @@ class Hex2Utf8:
# msg = 'no dictionary entry for %s\n' # msg = 'no dictionary entry for %s\n'
# msg += 'the hexidecimal num is "%s"\n' % (hex_num) # msg += 'the hexidecimal num is "%s"\n' % (hex_num)
# msg += 'dictionary is %s\n' % self.__current_dict_name # msg += 'dictionary is %s\n' % self.__current_dict_name
msg = 'Character "&#x%s;" does not appear to be valid (or is a control character)\n' % token msg = _('Character "&#x%s;" does not appear to be valid (or is a control character)\n') % token
raise self.__bug_handler, msg raise self.__bug_handler, msg
def __found_body_func(self, line): def __found_body_func(self, line):
self.__state = 'body' self.__state = 'body'
self.__write_obj.write(line) self.__write_obj.write(line)
def __body_func(self, line): def __body_func(self, line):
""" """
When parsing preamble When parsing preamble
""" """
self.__write_obj.write(line) self.__write_obj.write(line)
def __preamble_func(self, line): def __preamble_func(self, line):
action = self.__preamble_state_dict.get(self.__token_info) action = self.__preamble_state_dict.get(self.__token_info)
if action != None: if action is not None:
action(line) action(line)
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __convert_preamble(self): def __convert_preamble(self):
self.__state = 'preamble' self.__state = 'preamble'
read_obj = open(self.__file, 'r')
self.__write_obj = open(self.__write_to, 'w') self.__write_obj = open(self.__write_to, 'w')
line_to_read = 1 with open(self.__file, 'r') as read_obj:
while line_to_read: for line in read_obj:
line_to_read = read_obj.readline() self.__token_info = line[:16]
line = line_to_read action = self.__preamble_state_dict.get(self.__state)
self.__token_info = line[:16] if action is None:
action = self.__preamble_state_dict.get(self.__state) sys.stderr.write(_('error no state found in hex_2_utf8'),
if action == None: self.__state
sys.stderr.write('error no state found in hex_2_utf8', )
self.__state action(line)
)
action(line)
read_obj.close()
self.__write_obj.close() self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data") copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data")
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
def __preamble_for_body_func(self, line): def __preamble_for_body_func(self, line):
""" """
Required: Required:
@ -311,6 +316,7 @@ class Hex2Utf8:
if self.__token_info == 'mi<mk<body-open_': if self.__token_info == 'mi<mk<body-open_':
self.__found_body_func(line) self.__found_body_func(line)
self.__write_obj.write(line) self.__write_obj.write(line)
def __body_for_body_func(self, line): def __body_for_body_func(self, line):
""" """
Required: Required:
@ -321,10 +327,11 @@ class Hex2Utf8:
Used when parsing the body. Used when parsing the body.
""" """
action = self.__in_body_dict.get(self.__token_info) action = self.__in_body_dict.get(self.__token_info)
if action != None: if action is not None:
action(line) action(line)
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __start_font_func(self, line): def __start_font_func(self, line):
""" """
Required: Required:
@ -348,6 +355,7 @@ class Hex2Utf8:
else: else:
self.__current_dict_name = 'default' self.__current_dict_name = 'default'
self.__current_dict = self.__def_dict self.__current_dict = self.__def_dict
def __end_font_func(self, line): def __end_font_func(self, line):
""" """
Required: Required:
@ -376,6 +384,7 @@ class Hex2Utf8:
else: else:
self.__current_dict_name = 'default' self.__current_dict_name = 'default'
self.__current_dict = self.__def_dict self.__current_dict = self.__def_dict
def __start_special_font_func_old(self, line): def __start_special_font_func_old(self, line):
""" """
Required: Required:
@ -398,6 +407,7 @@ class Hex2Utf8:
self.__current_dict.append(self.__dingbats_dict) self.__current_dict.append(self.__dingbats_dict)
self.__special_fonts_found += 1 self.__special_fonts_found += 1
self.__current_dict_name = 'Zapf Dingbats' self.__current_dict_name = 'Zapf Dingbats'
def __end_special_font_func(self, line): def __end_special_font_func(self, line):
""" """
Required: Required:
@ -416,6 +426,7 @@ class Hex2Utf8:
self.__current_dict.pop() self.__current_dict.pop()
self.__special_fonts_found -= 1 self.__special_fonts_found -= 1
self.__dict_name = 'default' self.__dict_name = 'default'
def __start_caps_func_old(self, line): def __start_caps_func_old(self, line):
""" """
Required: Required:
@ -427,6 +438,7 @@ class Hex2Utf8:
self.__in_caps to 1 self.__in_caps to 1
""" """
self.__in_caps = 1 self.__in_caps = 1
def __start_caps_func(self, line): def __start_caps_func(self, line):
""" """
Required: Required:
@ -440,6 +452,7 @@ class Hex2Utf8:
self.__in_caps = 1 self.__in_caps = 1
value = line[17:-1] value = line[17:-1]
self.__caps_list.append(value) self.__caps_list.append(value)
def __end_caps_func(self, line): def __end_caps_func(self, line):
""" """
Required: Required:
@ -455,7 +468,8 @@ class Hex2Utf8:
else: else:
sys.stderr.write('Module is hex_2_utf8\n') sys.stderr.write('Module is hex_2_utf8\n')
sys.stderr.write('method is __end_caps_func\n') sys.stderr.write('method is __end_caps_func\n')
sys.stderr.write('caps list should be more than one?\n') sys.stderr.write('caps list should be more than one?\n') #self.__in_caps not set
def __text_func(self, line): def __text_func(self, line):
""" """
Required: Required:
@ -466,9 +480,8 @@ class Hex2Utf8:
if in caps, convert. Otherwise, print out. if in caps, convert. Otherwise, print out.
""" """
text = line[17:-1] text = line[17:-1]
if self.__current_dict_name == 'Symbol'\ # print line
or self.__current_dict_name == 'Wingdings'\ if self.__current_dict_name in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
or self.__current_dict_name == 'Zapf Dingbats':
the_string = '' the_string = ''
for letter in text: for letter in text:
hex_num = hex(ord(letter)) hex_num = hex(ord(letter))
@ -477,21 +490,21 @@ class Hex2Utf8:
hex_num = hex_num[2:] hex_num = hex_num[2:]
hex_num = '\'%s' % hex_num hex_num = '\'%s' % hex_num
converted = self.__current_dict.get(hex_num) converted = self.__current_dict.get(hex_num)
if converted == None: if converted is None:
sys.stderr.write('module is hex_2_ut8\n') sys.stderr.write('module is hex_2_ut8\n')
sys.stderr.write('method is __text_func\n') sys.stderr.write('method is __text_func\n')
sys.stderr.write('no hex value for "%s"\n' % hex_num) sys.stderr.write('no hex value for "%s"\n' % hex_num)
else: else:
the_string += converted the_string += converted
self.__write_obj.write('tx<nu<__________<%s\n' % the_string) self.__write_obj.write('tx<nu<__________<%s\n' % the_string)
# print the_string
else: else:
if self.__caps_list[-1] == 'true' \ if self.__caps_list[-1] == 'true' \
and self.__convert_caps\ and self.__convert_caps\
and self.__current_dict_name != 'Symbol'\ and self.__current_dict_name not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
and self.__current_dict_name != 'Wingdings'\
and self.__current_dict_name != 'Zapf Dingbats':
text = text.upper() text = text.upper()
self.__write_obj.write('tx<nu<__________<%s\n' % text) self.__write_obj.write('tx<nu<__________<%s\n' % text)
def __utf_to_caps_func(self, line): def __utf_to_caps_func(self, line):
""" """
Required: Required:
@ -506,6 +519,7 @@ class Hex2Utf8:
# utf_text = utf_text.upper() # utf_text = utf_text.upper()
utf_text = self.__utf_token_to_caps_func(utf_text) utf_text = self.__utf_token_to_caps_func(utf_text)
self.__write_obj.write('tx<ut<__________<%s\n' % utf_text) self.__write_obj.write('tx<ut<__________<%s\n' % utf_text)
def __utf_token_to_caps_func(self, char_entity): def __utf_token_to_caps_func(self, char_entity):
""" """
Required: Required:
@ -530,28 +544,26 @@ class Hex2Utf8:
return char_entity return char_entity
else: else:
return converted return converted
def __convert_body(self): def __convert_body(self):
self.__state = 'body' self.__state = 'body'
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') self.__write_obj = open(self.__write_to, 'w')
line_to_read = 1 for line in read_obj:
while line_to_read: self.__token_info = line[:16]
line_to_read = read_obj.readline() action = self.__body_state_dict.get(self.__state)
line = line_to_read if action is None:
self.__token_info = line[:16] sys.stderr.write(_('error no state found in hex_2_utf8'),
action = self.__body_state_dict.get(self.__state) self.__state
if action == None: )
sys.stderr.write('error no state found in hex_2_utf8', action(line)
self.__state
)
action(line)
read_obj.close()
self.__write_obj.close() self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "body_utf_convert.data") copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
def convert_hex_2_utf8(self): def convert_hex_2_utf8(self):
self.__initiate_values() self.__initiate_values()
if self.__area_to_convert == 'preamble': if self.__area_to_convert == 'preamble':

View File

@ -606,13 +606,13 @@ class ProcessTokens:
return 'tx<mc<__________<%s\n' % token return 'tx<mc<__________<%s\n' % token
def default_func(self, pre, token, num): def default_func(self, pre, token, num):
if num == None: if num is None:
num = 'true' num = 'true'
return 'cw<%s<%s<nu<%s\n' % (pre, token, num) return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
def __list_type_func(self, pre, token, num): def __list_type_func(self, pre, token, num):
type = 'arabic' type = 'arabic'
if num == None: if num is None:
type = 'Arabic' type = 'Arabic'
else: else:
try: try: