Various RTF minor changes

This commit is contained in:
Sengian 2011-01-07 22:12:49 +01:00
parent bab204cd0f
commit 56bb15d6ff
4 changed files with 69 additions and 69 deletions

View File

@ -249,7 +249,6 @@ class ParseRtf:
else self.__file.encode('utf-8')
msg = _('File %s does not appear to be correctly encoded.\n') % file_name
raise InvalidRtfException, msg
delete_info_obj = delete_info.DeleteInfo(
in_file = self.__temp_file,
copy = self.__copy,

View File

@ -16,7 +16,9 @@
# #
#########################################################################
import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy
class DeleteInfo:
"""Delelet unecessary destination groups"""
def __init__(self,
@ -29,17 +31,18 @@ class DeleteInfo:
self.__bug_handler = bug_handler
self.__copy = copy
self.__write_to = tempfile.mktemp()
self.__bracket_count=0
self.__bracket_count= 0
self.__ob_count = 0
self.__cb_count = 0
self.__after_asterisk = 0
self.__after_asterisk = False
self.__delete = 0
self.__initiate_allow()
self.__ob = 0
self.__write_cb = 0
self.__run_level = run_level
self.__found_delete = 0
self.__list = 0
self.__found_delete = False
self.__list = False
def __initiate_allow(self):
"""
Initiate a list of destination groups which should be printed out.
@ -69,6 +72,7 @@ class DeleteInfo:
'delete' : self.__delete_func,
'list' : self.__list_func,
}
def __default_func(self,line):
"""Handle lines when in no special state. Look for an asterisk to
begin a special state. Otherwise, print out line."""
@ -81,13 +85,14 @@ class DeleteInfo:
if self.__ob:
self.__write_obj.write(self.__ob)
self.__ob = line
return 0
return False
else:
# write previous bracket, since didn't fine asterisk
if self.__ob:
self.__write_obj.write(self.__ob)
self.__ob = 0
return 1
return True
def __delete_func(self,line):
"""Handle lines when in delete state. Don't print out lines
unless the state has ended."""
@ -95,13 +100,14 @@ class DeleteInfo:
self.__state = 'default'
if self.__write_cb:
self.__write_cb = 0
return 1
return 0
return True
return False
def __asterisk_func(self,line):
"""
Determine whether to delete info in group
Note on self.__cb flag.
If you find that you are in a delete group, and the preivous
If you find that you are in a delete group, and the previous
token in not an open bracket (self.__ob = 0), that means
that the delete group is nested inside another acceptable
detination group. In this case, you have alrady written
@ -110,21 +116,21 @@ class DeleteInfo:
"""
# Test for {\*}, in which case don't enter
# delete state
self.__after_asterisk = 0 # only enter this function once
self.__found_delete = 1
self.__after_asterisk = False # only enter this function once
self.__found_delete = True
if self.__token_info == 'cb<nu<clos-brack':
if self.__delete_count == self.__cb_count:
self.__state = 'default'
self.__ob = 0
# changed this because haven't printed out start
return 0
return False
else:
# not sure what happens here!
# believe I have a '{\*}
if self.__run_level > 3:
msg = 'flag problem\n'
raise self.__bug_handler, msg
return 1
return True
elif self.__token_info in self.__allowable :
if self.__ob:
self.__write_obj.write(self.__ob)
@ -132,7 +138,7 @@ class DeleteInfo:
self.__state = 'default'
else:
pass
return 1
return True
elif self.__token_info == 'cw<ls<list______':
self.__ob = 0
self.__found_list_func(line)
@ -142,75 +148,74 @@ class DeleteInfo:
self.__ob = 0
self.__state = 'delete'
self.__cb_count = 0
return 0
return False
else:
if self.__run_level > 5:
msg = 'After an asterisk, and found neither an allowable or non-allowble token\n'
msg += 'token is "%s"\n' % self.__token_info
msg = _('After an asterisk, and found neither an allowable or non-allowble token\n\
token is "%s"\n') % self.__token_info
raise self.__bug_handler
if not self.__ob:
self.__write_cb = 1
self.__ob = 0
self.__state = 'delete'
self.__cb_count = 0
return 0
return False
def __found_list_func(self, line):
"""
print out control words in this group
"""
self.__state = 'list'
def __list_func(self, line):
"""
Check to see if the group has ended.
Return 1 for all control words.
Return 0 otherwise.
Return True for all control words.
Return False otherwise.
"""
if self.__delete_count == self.__cb_count and self.__token_info ==\
'cb<nu<clos-brack':
self.__state = 'default'
if self.__write_cb:
self.__write_cb = 0
return 1
return 0
return True
return False
elif line[0:2] == 'cw':
return 1
return True
else:
return 0
return False
def delete_info(self):
"""Main method for handling other methods. Read one line in at
a time, and determine wheter to print the line based on the state."""
line_to_read = 'dummy'
read_obj = open(self.__file, 'r')
self.__write_obj = open(self.__write_to, 'w')
while line_to_read:
#ob<nu<open-brack<0001
to_print =1
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1]
action = self.__state_dict.get(self.__state)
if not action:
sys.stderr.write('No action in dictionary state is "%s" \n'
% self.__state)
to_print = action(line)
"""
if self.__after_asterisk:
to_print = self.__asterisk_func(line)
elif self.__list:
self.__in_list_func(line)
elif self.__delete:
to_print = self.__delete_func(line)
else:
to_print = self.__default_func(line)
"""
if to_print:
self.__write_obj.write(line)
with open(self.__file, 'r') as read_obj:
for line in read_obj:
#ob<nu<open-brack<0001
to_print = True
self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1]
action = self.__state_dict.get(self.__state)
if not action:
sys.stderr.write(_('No action in dictionary state is "%s" \n')
% self.__state)
to_print = action(line)
"""
if self.__after_asterisk:
to_print = self.__asterisk_func(line)
elif self.__list:
self.__in_list_func(line)
elif self.__delete:
to_print = self.__delete_func(line)
else:
to_print = self.__default_func(line)
"""
if to_print:
self.__write_obj.write(line)
self.__write_obj.close()
read_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "delete_info.data")

View File

@ -622,7 +622,7 @@ class ProcessTokens:
msg = _('Number "%s" cannot be converted to integer\n') % num
raise self.__bug_handler, msg
type = self.__number_type_dict.get(num)
if type == None:
if type is None:
if self.__run_level > 3:
msg = _('No type for "%s" in self.__number_type_dict\n')
raise self.__bug_handler
@ -634,7 +634,7 @@ class ProcessTokens:
if not lang_name:
lang_name = "not defined"
if self.__run_level > 3:
msg = 'No entry for number "%s"' % num
msg = _('No entry for number "%s"') % num
raise self.__bug_handler, msg
return 'cw<%s<%s<nu<%s\n' % (pre, token, lang_name)
@ -686,9 +686,7 @@ class ProcessTokens:
return 'cw<%s<%s<nu<false\n' % (pre, token)
##return 'cw<nu<nu<nu<%s>false<%s\n' % (token, token)
else:
msg = 'boolean should have some value module process tokens\n'
msg += 'token is ' + token + "\n"
msg += "'" + num + "'" + "\n"
msg = _("boolean should have some value module process tokens\ntoken is %s\n'%s'\n") % (token, num)
raise self.__bug_handler, msg
def __no_sup_sub_func(self, pre, token, num):
@ -702,11 +700,9 @@ class ProcessTokens:
numerator = float(re.search('[0-9.\-]+', numerator).group())
except TypeError, msg:
if self.__run_level > 3:
msg = 'no number to process?\n'
msg += 'this indicates that the token '
msg += ' \(\\li\) should have a number and does not\n'
msg += 'numerator is "%s"\n' % numerator
msg += 'denominator is "%s"\n' % denominator
msg = _('No number to process?\nthis indicates that the token \(\\li\) \
should have a number and does not\nnumerator is \
"%s"\ndenominator is "%s"\n') % (numerator, denominator)
raise self.__bug_handler, msg
if 5 > self.__return_code:
self.__return_code = 5
@ -720,17 +716,17 @@ class ProcessTokens:
def split_let_num(self, token):
match_obj = re.search(self.__num_exp,token)
if match_obj != None:
if match_obj is not None:
first = match_obj.group(1)
second = match_obj.group(2)
if not second:
if self.__run_level > 3:
msg = "token is '%s' \n" % token
msg = _("token is '%s' \n") % token
raise self.__bug_handler, msg
return first, 0
else:
if self.__run_level > 3:
msg = "token is '%s' \n" % token
msg = _("token is '%s' \n") % token
raise self.__bug_handler
return token, 0
return first, second

View File

@ -117,7 +117,7 @@ class Tokenize:
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
#remove \n in bin data
input_file = self.__bin_exp.sub(lambda x: \
x.group().replace('\n', '') +'\n', input_file)
x.group().replace('\n', '') + '\n', input_file)
#split
tokens = re.split(self.__splitexp, input_file)
#remove empty tokens and \n