mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Various RTF minor changes
This commit is contained in:
parent
bab204cd0f
commit
56bb15d6ff
@ -249,7 +249,6 @@ class ParseRtf:
|
||||
else self.__file.encode('utf-8')
|
||||
msg = _('File %s does not appear to be correctly encoded.\n') % file_name
|
||||
raise InvalidRtfException, msg
|
||||
|
||||
delete_info_obj = delete_info.DeleteInfo(
|
||||
in_file = self.__temp_file,
|
||||
copy = self.__copy,
|
||||
|
@ -16,7 +16,9 @@
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os, tempfile
|
||||
|
||||
from calibre.ebooks.rtf2xml import copy
|
||||
|
||||
class DeleteInfo:
|
||||
"""Delelet unecessary destination groups"""
|
||||
def __init__(self,
|
||||
@ -29,17 +31,18 @@ class DeleteInfo:
|
||||
self.__bug_handler = bug_handler
|
||||
self.__copy = copy
|
||||
self.__write_to = tempfile.mktemp()
|
||||
self.__bracket_count=0
|
||||
self.__bracket_count= 0
|
||||
self.__ob_count = 0
|
||||
self.__cb_count = 0
|
||||
self.__after_asterisk = 0
|
||||
self.__after_asterisk = False
|
||||
self.__delete = 0
|
||||
self.__initiate_allow()
|
||||
self.__ob = 0
|
||||
self.__write_cb = 0
|
||||
self.__run_level = run_level
|
||||
self.__found_delete = 0
|
||||
self.__list = 0
|
||||
self.__found_delete = False
|
||||
self.__list = False
|
||||
|
||||
def __initiate_allow(self):
|
||||
"""
|
||||
Initiate a list of destination groups which should be printed out.
|
||||
@ -69,6 +72,7 @@ class DeleteInfo:
|
||||
'delete' : self.__delete_func,
|
||||
'list' : self.__list_func,
|
||||
}
|
||||
|
||||
def __default_func(self,line):
|
||||
"""Handle lines when in no special state. Look for an asterisk to
|
||||
begin a special state. Otherwise, print out line."""
|
||||
@ -81,13 +85,14 @@ class DeleteInfo:
|
||||
if self.__ob:
|
||||
self.__write_obj.write(self.__ob)
|
||||
self.__ob = line
|
||||
return 0
|
||||
return False
|
||||
else:
|
||||
# write previous bracket, since didn't fine asterisk
|
||||
if self.__ob:
|
||||
self.__write_obj.write(self.__ob)
|
||||
self.__ob = 0
|
||||
return 1
|
||||
return True
|
||||
|
||||
def __delete_func(self,line):
|
||||
"""Handle lines when in delete state. Don't print out lines
|
||||
unless the state has ended."""
|
||||
@ -95,13 +100,14 @@ class DeleteInfo:
|
||||
self.__state = 'default'
|
||||
if self.__write_cb:
|
||||
self.__write_cb = 0
|
||||
return 1
|
||||
return 0
|
||||
return True
|
||||
return False
|
||||
|
||||
def __asterisk_func(self,line):
|
||||
"""
|
||||
Determine whether to delete info in group
|
||||
Note on self.__cb flag.
|
||||
If you find that you are in a delete group, and the preivous
|
||||
If you find that you are in a delete group, and the previous
|
||||
token in not an open bracket (self.__ob = 0), that means
|
||||
that the delete group is nested inside another acceptable
|
||||
detination group. In this case, you have alrady written
|
||||
@ -110,21 +116,21 @@ class DeleteInfo:
|
||||
"""
|
||||
# Test for {\*}, in which case don't enter
|
||||
# delete state
|
||||
self.__after_asterisk = 0 # only enter this function once
|
||||
self.__found_delete = 1
|
||||
self.__after_asterisk = False # only enter this function once
|
||||
self.__found_delete = True
|
||||
if self.__token_info == 'cb<nu<clos-brack':
|
||||
if self.__delete_count == self.__cb_count:
|
||||
self.__state = 'default'
|
||||
self.__ob = 0
|
||||
# changed this because haven't printed out start
|
||||
return 0
|
||||
return False
|
||||
else:
|
||||
# not sure what happens here!
|
||||
# believe I have a '{\*}
|
||||
if self.__run_level > 3:
|
||||
msg = 'flag problem\n'
|
||||
raise self.__bug_handler, msg
|
||||
return 1
|
||||
return True
|
||||
elif self.__token_info in self.__allowable :
|
||||
if self.__ob:
|
||||
self.__write_obj.write(self.__ob)
|
||||
@ -132,7 +138,7 @@ class DeleteInfo:
|
||||
self.__state = 'default'
|
||||
else:
|
||||
pass
|
||||
return 1
|
||||
return True
|
||||
elif self.__token_info == 'cw<ls<list______':
|
||||
self.__ob = 0
|
||||
self.__found_list_func(line)
|
||||
@ -142,75 +148,74 @@ class DeleteInfo:
|
||||
self.__ob = 0
|
||||
self.__state = 'delete'
|
||||
self.__cb_count = 0
|
||||
return 0
|
||||
return False
|
||||
else:
|
||||
if self.__run_level > 5:
|
||||
msg = 'After an asterisk, and found neither an allowable or non-allowble token\n'
|
||||
msg += 'token is "%s"\n' % self.__token_info
|
||||
msg = _('After an asterisk, and found neither an allowable or non-allowble token\n\
|
||||
token is "%s"\n') % self.__token_info
|
||||
raise self.__bug_handler
|
||||
if not self.__ob:
|
||||
self.__write_cb = 1
|
||||
self.__ob = 0
|
||||
self.__state = 'delete'
|
||||
self.__cb_count = 0
|
||||
return 0
|
||||
return False
|
||||
|
||||
def __found_list_func(self, line):
|
||||
"""
|
||||
print out control words in this group
|
||||
"""
|
||||
self.__state = 'list'
|
||||
|
||||
def __list_func(self, line):
|
||||
"""
|
||||
Check to see if the group has ended.
|
||||
Return 1 for all control words.
|
||||
Return 0 otherwise.
|
||||
Return True for all control words.
|
||||
Return False otherwise.
|
||||
"""
|
||||
if self.__delete_count == self.__cb_count and self.__token_info ==\
|
||||
'cb<nu<clos-brack':
|
||||
self.__state = 'default'
|
||||
if self.__write_cb:
|
||||
self.__write_cb = 0
|
||||
return 1
|
||||
return 0
|
||||
return True
|
||||
return False
|
||||
elif line[0:2] == 'cw':
|
||||
return 1
|
||||
return True
|
||||
else:
|
||||
return 0
|
||||
return False
|
||||
|
||||
def delete_info(self):
|
||||
"""Main method for handling other methods. Read one line in at
|
||||
a time, and determine wheter to print the line based on the state."""
|
||||
line_to_read = 'dummy'
|
||||
read_obj = open(self.__file, 'r')
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
while line_to_read:
|
||||
#ob<nu<open-brack<0001
|
||||
to_print =1
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
self.__token_info = line[:16]
|
||||
if self.__token_info == 'ob<nu<open-brack':
|
||||
self.__ob_count = line[-5:-1]
|
||||
if self.__token_info == 'cb<nu<clos-brack':
|
||||
self.__cb_count = line[-5:-1]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if not action:
|
||||
sys.stderr.write('No action in dictionary state is "%s" \n'
|
||||
% self.__state)
|
||||
to_print = action(line)
|
||||
"""
|
||||
if self.__after_asterisk:
|
||||
to_print = self.__asterisk_func(line)
|
||||
elif self.__list:
|
||||
self.__in_list_func(line)
|
||||
elif self.__delete:
|
||||
to_print = self.__delete_func(line)
|
||||
else:
|
||||
to_print = self.__default_func(line)
|
||||
"""
|
||||
if to_print:
|
||||
self.__write_obj.write(line)
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
for line in read_obj:
|
||||
#ob<nu<open-brack<0001
|
||||
to_print = True
|
||||
self.__token_info = line[:16]
|
||||
if self.__token_info == 'ob<nu<open-brack':
|
||||
self.__ob_count = line[-5:-1]
|
||||
if self.__token_info == 'cb<nu<clos-brack':
|
||||
self.__cb_count = line[-5:-1]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if not action:
|
||||
sys.stderr.write(_('No action in dictionary state is "%s" \n')
|
||||
% self.__state)
|
||||
to_print = action(line)
|
||||
"""
|
||||
if self.__after_asterisk:
|
||||
to_print = self.__asterisk_func(line)
|
||||
elif self.__list:
|
||||
self.__in_list_func(line)
|
||||
elif self.__delete:
|
||||
to_print = self.__delete_func(line)
|
||||
else:
|
||||
to_print = self.__default_func(line)
|
||||
"""
|
||||
if to_print:
|
||||
self.__write_obj.write(line)
|
||||
self.__write_obj.close()
|
||||
read_obj.close()
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "delete_info.data")
|
||||
|
@ -622,7 +622,7 @@ class ProcessTokens:
|
||||
msg = _('Number "%s" cannot be converted to integer\n') % num
|
||||
raise self.__bug_handler, msg
|
||||
type = self.__number_type_dict.get(num)
|
||||
if type == None:
|
||||
if type is None:
|
||||
if self.__run_level > 3:
|
||||
msg = _('No type for "%s" in self.__number_type_dict\n')
|
||||
raise self.__bug_handler
|
||||
@ -634,7 +634,7 @@ class ProcessTokens:
|
||||
if not lang_name:
|
||||
lang_name = "not defined"
|
||||
if self.__run_level > 3:
|
||||
msg = 'No entry for number "%s"' % num
|
||||
msg = _('No entry for number "%s"') % num
|
||||
raise self.__bug_handler, msg
|
||||
return 'cw<%s<%s<nu<%s\n' % (pre, token, lang_name)
|
||||
|
||||
@ -686,9 +686,7 @@ class ProcessTokens:
|
||||
return 'cw<%s<%s<nu<false\n' % (pre, token)
|
||||
##return 'cw<nu<nu<nu<%s>false<%s\n' % (token, token)
|
||||
else:
|
||||
msg = 'boolean should have some value module process tokens\n'
|
||||
msg += 'token is ' + token + "\n"
|
||||
msg += "'" + num + "'" + "\n"
|
||||
msg = _("boolean should have some value module process tokens\ntoken is %s\n'%s'\n") % (token, num)
|
||||
raise self.__bug_handler, msg
|
||||
|
||||
def __no_sup_sub_func(self, pre, token, num):
|
||||
@ -702,11 +700,9 @@ class ProcessTokens:
|
||||
numerator = float(re.search('[0-9.\-]+', numerator).group())
|
||||
except TypeError, msg:
|
||||
if self.__run_level > 3:
|
||||
msg = 'no number to process?\n'
|
||||
msg += 'this indicates that the token '
|
||||
msg += ' \(\\li\) should have a number and does not\n'
|
||||
msg += 'numerator is "%s"\n' % numerator
|
||||
msg += 'denominator is "%s"\n' % denominator
|
||||
msg = _('No number to process?\nthis indicates that the token \(\\li\) \
|
||||
should have a number and does not\nnumerator is \
|
||||
"%s"\ndenominator is "%s"\n') % (numerator, denominator)
|
||||
raise self.__bug_handler, msg
|
||||
if 5 > self.__return_code:
|
||||
self.__return_code = 5
|
||||
@ -720,17 +716,17 @@ class ProcessTokens:
|
||||
|
||||
def split_let_num(self, token):
|
||||
match_obj = re.search(self.__num_exp,token)
|
||||
if match_obj != None:
|
||||
if match_obj is not None:
|
||||
first = match_obj.group(1)
|
||||
second = match_obj.group(2)
|
||||
if not second:
|
||||
if self.__run_level > 3:
|
||||
msg = "token is '%s' \n" % token
|
||||
msg = _("token is '%s' \n") % token
|
||||
raise self.__bug_handler, msg
|
||||
return first, 0
|
||||
else:
|
||||
if self.__run_level > 3:
|
||||
msg = "token is '%s' \n" % token
|
||||
msg = _("token is '%s' \n") % token
|
||||
raise self.__bug_handler
|
||||
return token, 0
|
||||
return first, second
|
||||
|
@ -117,7 +117,7 @@ class Tokenize:
|
||||
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
|
||||
#remove \n in bin data
|
||||
input_file = self.__bin_exp.sub(lambda x: \
|
||||
x.group().replace('\n', '') +'\n', input_file)
|
||||
x.group().replace('\n', '') + '\n', input_file)
|
||||
#split
|
||||
tokens = re.split(self.__splitexp, input_file)
|
||||
#remove empty tokens and \n
|
||||
|
Loading…
x
Reference in New Issue
Block a user