Handle inproper \*\csN in body without braces

This commit is contained in:
Sengian 2011-01-16 00:47:01 +01:00
parent 2e033022b7
commit fc42efda42
4 changed files with 39 additions and 24 deletions

View File

@ -375,7 +375,7 @@ class ParseRtf:
old_rtf = old_rtf_obj.check_if_old_rtf() old_rtf = old_rtf_obj.check_if_old_rtf()
if old_rtf: if old_rtf:
if self.__run_level > 5: if self.__run_level > 5:
msg = 'older RTF\n' msg = 'Older RTF\n'
msg += 'self.__run_level is "%s"\n' % self.__run_level msg += 'self.__run_level is "%s"\n' % self.__run_level
raise RtfInvalidCodeException, msg raise RtfInvalidCodeException, msg
if self.__run_level > 1: if self.__run_level > 1:

View File

@ -48,6 +48,7 @@ class DeleteInfo:
'cw<it<listtable_', 'cw<it<listtable_',
'cw<it<revi-table', 'cw<it<revi-table',
'cw<ls<list-lev-d', 'cw<ls<list-lev-d',
# Field allowed
'cw<fd<field-inst', 'cw<fd<field-inst',
'cw<an<book-mk-st', 'cw<an<book-mk-st',
'cw<an<book-mk-en', 'cw<an<book-mk-en',
@ -86,7 +87,7 @@ class DeleteInfo:
self.__ob = line self.__ob = line
return False return False
else: else:
# write previous bracket, since didn't fine asterisk # write previous bracket, since didn't find asterisk
if self.__ob: if self.__ob:
self.__write_obj.write(self.__ob) self.__write_obj.write(self.__ob)
self.__ob = 0 self.__ob = 0
@ -109,7 +110,7 @@ class DeleteInfo:
If you find that you are in a delete group, and the previous If you find that you are in a delete group, and the previous
token in not an open bracket (self.__ob = 0), that means token in not an open bracket (self.__ob = 0), that means
that the delete group is nested inside another acceptable that the delete group is nested inside another acceptable
detination group. In this case, you have alrady written detination group. In this case, you have already written
the open bracket, so you will need to write the closed one the open bracket, so you will need to write the closed one
as well. as well.
""" """

View File

@ -16,7 +16,9 @@
# # # #
######################################################################### #########################################################################
import sys, os, tempfile, re import sys, os, tempfile, re
from calibre.ebooks.rtf2xml import field_strings, copy from calibre.ebooks.rtf2xml import field_strings, copy
class FieldsSmall: class FieldsSmall:
""" """
================= =================
@ -24,7 +26,7 @@ Purpose
================= =================
Write tags for bookmarks, index and toc entry fields in a tokenized file. Write tags for bookmarks, index and toc entry fields in a tokenized file.
This module does not handle toc or index tables. (This module won't be any This module does not handle toc or index tables. (This module won't be any
use to use to you unless you use it as part of the other modules.) use to you unless you use it as part of the other modules.)
----------- -----------
Method Method
----------- -----------
@ -55,6 +57,7 @@ file.
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__run_level = run_level self.__run_level = run_level
def __initiate_values(self): def __initiate_values(self):
""" """
Initiate all values. Initiate all values.
@ -81,6 +84,7 @@ file.
tx = 'tx<nu<__________<(.*?)' tx = 'tx<nu<__________<(.*?)'
reg_st = ob + bk_st + tx + cb reg_st = ob + bk_st + tx + cb
self.__book_start = re.compile(r'%s' % reg_st) self.__book_start = re.compile(r'%s' % reg_st)
def __before_body_func(self, line): def __before_body_func(self, line):
""" """
Requires: Requires:
@ -94,6 +98,7 @@ file.
if self.__token_info == 'mi<mk<body-open_': if self.__token_info == 'mi<mk<body-open_':
self.__state = 'body' self.__state = 'body'
self.__write_obj.write(line) self.__write_obj.write(line)
def __body_func(self, line): def __body_func(self, line):
""" """
Requires: Requires:
@ -110,6 +115,7 @@ file.
action(line, tag) action(line, tag)
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __found_bookmark_func(self, line, tag): def __found_bookmark_func(self, line, tag):
""" """
Requires: Requires:
@ -125,6 +131,7 @@ file.
self.__cb_count = 0 self.__cb_count = 0
self.__state = 'bookmark' self.__state = 'bookmark'
self.__type_of_bookmark = tag self.__type_of_bookmark = tag
def __bookmark_func(self, line): def __bookmark_func(self, line):
""" """
Requires: Requires:
@ -153,6 +160,7 @@ file.
self.__write_obj.write(line) self.__write_obj.write(line)
elif line[0:2] == 'tx': elif line[0:2] == 'tx':
self.__text_string += line[17:-1] self.__text_string += line[17:-1]
def __parse_index_func(self, my_string): def __parse_index_func(self, my_string):
""" """
Requires: Requires:
@ -201,6 +209,7 @@ file.
my_changed_string += '<sub-entry>%s' % sub_entry my_changed_string += '<sub-entry>%s' % sub_entry
my_changed_string += '\n' my_changed_string += '\n'
return my_changed_string return my_changed_string
def __index_see_func(self, my_string): def __index_see_func(self, my_string):
in_see = 0 in_see = 0
bracket_count = 0 bracket_count = 0
@ -226,6 +235,7 @@ file.
in_see = 1 in_see = 1
changed_string += '%s\n' % line changed_string += '%s\n' % line
return changed_string, see_string return changed_string, see_string
def __index_bookmark_func(self, my_string): def __index_bookmark_func(self, my_string):
""" """
Requries: Requries:
@ -262,6 +272,7 @@ file.
in_bookmark = 1 in_bookmark = 1
index_string += '%s\n' % line index_string += '%s\n' % line
return index_string, bookmark_string return index_string, bookmark_string
def __index__format_func(self, my_string): def __index__format_func(self, my_string):
italics = 0 italics = 0
bold =0 bold =0
@ -273,6 +284,7 @@ file.
if token_info == 'cw<in<index-ital': if token_info == 'cw<in<index-ital':
italics = 1 italics = 1
return italics, bold return italics, bold
def __parse_toc_func(self, my_string): def __parse_toc_func(self, my_string):
""" """
Requires: Requires:
@ -308,6 +320,7 @@ file.
my_changed_string += '<main-entry>%s' % main_entry my_changed_string += '<main-entry>%s' % main_entry
my_changed_string += '\n' my_changed_string += '\n'
return my_changed_string return my_changed_string
def __parse_bookmark_for_toc(self, my_string): def __parse_bookmark_for_toc(self, my_string):
""" """
Requires: Requires:
@ -353,6 +366,7 @@ file.
in_bookmark = 1 in_bookmark = 1
toc_string += '%s\n' % line toc_string += '%s\n' % line
return toc_string, book_start_string, book_end_string return toc_string, book_start_string, book_end_string
def __parse_bookmark_func(self, my_string, type): def __parse_bookmark_func(self, my_string, type):
""" """
Requires: Requires:
@ -367,6 +381,7 @@ file.
my_changed_string = ('mi<tg<empty-att_<field<type>%s' my_changed_string = ('mi<tg<empty-att_<field<type>%s'
'<number>%s<update>none\n' % (type, my_string)) '<number>%s<update>none\n' % (type, my_string))
return my_changed_string return my_changed_string
def __found_toc_index_func(self, line, tag): def __found_toc_index_func(self, line, tag):
""" """
Requires: Requires:
@ -382,6 +397,7 @@ file.
self.__cb_count = 0 self.__cb_count = 0
self.__state = 'toc_index' self.__state = 'toc_index'
self.__tag = tag self.__tag = tag
def __toc_index_func(self, line): def __toc_index_func(self, line):
""" """
Requires: Requires:
@ -409,6 +425,7 @@ file.
self.__write_obj.write(line) self.__write_obj.write(line)
else: else:
self.__text_string += line self.__text_string += line
def fix_fields(self): def fix_fields(self):
""" """
Requires: Requires:
@ -423,24 +440,19 @@ file.
bookmark. bookmark.
""" """
self.__initiate_values() self.__initiate_values()
read_obj = open(self.__file) with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
line_to_read = '1' for line in read_obj:
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16] self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack': if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1] self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack': if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1] self.__cb_count = line[-5:-1]
action = self.__state_dict.get(self.__state) action = self.__state_dict.get(self.__state)
if action == None: if action is None:
sys.stderr.write('no no matching state in module fields_small.py\n') sys.stderr.write('No matching state in module fields_small.py\n')
sys.stderr.write(self.__state + '\n') sys.stderr.write(self.__state + '\n')
action(line) action(line)
read_obj.close()
self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "fields_small.data") copy_obj.copy_file(self.__write_to, "fields_small.data")

View File

@ -115,8 +115,8 @@ class Tokenize:
def __sub_reg_split(self,input_file): def __sub_reg_split(self,input_file):
input_file = self.__replace_spchar.mreplace(input_file) input_file = self.__replace_spchar.mreplace(input_file)
# this is for older RTF
input_file = self.__par_exp.sub('\n\\par \n', input_file) input_file = self.__par_exp.sub('\n\\par \n', input_file)
input_file = self.__cs_ast.sub("\g<1>", input_file)
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file) input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file) input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
#remove \n in bin data #remove \n in bin data
@ -172,6 +172,8 @@ class Tokenize:
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)") self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
#this is for old RTF #this is for old RTF
self.__par_exp = re.compile(r'\\\n+') self.__par_exp = re.compile(r'\\\n+')
#handle improper cs char-style with \* before without {
self.__cs_ast = re.compile(r'\\\*([\n ]*\\cs\d+[\n \\]+)')
# self.__par_exp = re.compile(r'\\$') # self.__par_exp = re.compile(r'\\$')
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}") #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})") #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")