mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Handle inproper \*\csN in body without braces
This commit is contained in:
parent
2e033022b7
commit
fc42efda42
@ -375,7 +375,7 @@ class ParseRtf:
|
|||||||
old_rtf = old_rtf_obj.check_if_old_rtf()
|
old_rtf = old_rtf_obj.check_if_old_rtf()
|
||||||
if old_rtf:
|
if old_rtf:
|
||||||
if self.__run_level > 5:
|
if self.__run_level > 5:
|
||||||
msg = 'older RTF\n'
|
msg = 'Older RTF\n'
|
||||||
msg += 'self.__run_level is "%s"\n' % self.__run_level
|
msg += 'self.__run_level is "%s"\n' % self.__run_level
|
||||||
raise RtfInvalidCodeException, msg
|
raise RtfInvalidCodeException, msg
|
||||||
if self.__run_level > 1:
|
if self.__run_level > 1:
|
||||||
|
@ -48,6 +48,7 @@ class DeleteInfo:
|
|||||||
'cw<it<listtable_',
|
'cw<it<listtable_',
|
||||||
'cw<it<revi-table',
|
'cw<it<revi-table',
|
||||||
'cw<ls<list-lev-d',
|
'cw<ls<list-lev-d',
|
||||||
|
# Field allowed
|
||||||
'cw<fd<field-inst',
|
'cw<fd<field-inst',
|
||||||
'cw<an<book-mk-st',
|
'cw<an<book-mk-st',
|
||||||
'cw<an<book-mk-en',
|
'cw<an<book-mk-en',
|
||||||
@ -86,7 +87,7 @@ class DeleteInfo:
|
|||||||
self.__ob = line
|
self.__ob = line
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
# write previous bracket, since didn't fine asterisk
|
# write previous bracket, since didn't find asterisk
|
||||||
if self.__ob:
|
if self.__ob:
|
||||||
self.__write_obj.write(self.__ob)
|
self.__write_obj.write(self.__ob)
|
||||||
self.__ob = 0
|
self.__ob = 0
|
||||||
@ -109,7 +110,7 @@ class DeleteInfo:
|
|||||||
If you find that you are in a delete group, and the previous
|
If you find that you are in a delete group, and the previous
|
||||||
token in not an open bracket (self.__ob = 0), that means
|
token in not an open bracket (self.__ob = 0), that means
|
||||||
that the delete group is nested inside another acceptable
|
that the delete group is nested inside another acceptable
|
||||||
detination group. In this case, you have alrady written
|
detination group. In this case, you have already written
|
||||||
the open bracket, so you will need to write the closed one
|
the open bracket, so you will need to write the closed one
|
||||||
as well.
|
as well.
|
||||||
"""
|
"""
|
||||||
|
@ -15,8 +15,10 @@
|
|||||||
# #
|
# #
|
||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os, tempfile, re
|
import sys, os, tempfile, re
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import field_strings, copy
|
from calibre.ebooks.rtf2xml import field_strings, copy
|
||||||
|
|
||||||
class FieldsSmall:
|
class FieldsSmall:
|
||||||
"""
|
"""
|
||||||
=================
|
=================
|
||||||
@ -24,7 +26,7 @@ Purpose
|
|||||||
=================
|
=================
|
||||||
Write tags for bookmarks, index and toc entry fields in a tokenized file.
|
Write tags for bookmarks, index and toc entry fields in a tokenized file.
|
||||||
This module does not handle toc or index tables. (This module won't be any
|
This module does not handle toc or index tables. (This module won't be any
|
||||||
use to use to you unless you use it as part of the other modules.)
|
use to you unless you use it as part of the other modules.)
|
||||||
-----------
|
-----------
|
||||||
Method
|
Method
|
||||||
-----------
|
-----------
|
||||||
@ -55,6 +57,7 @@ file.
|
|||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
Initiate all values.
|
Initiate all values.
|
||||||
@ -81,6 +84,7 @@ file.
|
|||||||
tx = 'tx<nu<__________<(.*?)'
|
tx = 'tx<nu<__________<(.*?)'
|
||||||
reg_st = ob + bk_st + tx + cb
|
reg_st = ob + bk_st + tx + cb
|
||||||
self.__book_start = re.compile(r'%s' % reg_st)
|
self.__book_start = re.compile(r'%s' % reg_st)
|
||||||
|
|
||||||
def __before_body_func(self, line):
|
def __before_body_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -94,6 +98,7 @@ file.
|
|||||||
if self.__token_info == 'mi<mk<body-open_':
|
if self.__token_info == 'mi<mk<body-open_':
|
||||||
self.__state = 'body'
|
self.__state = 'body'
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __body_func(self, line):
|
def __body_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -110,6 +115,7 @@ file.
|
|||||||
action(line, tag)
|
action(line, tag)
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __found_bookmark_func(self, line, tag):
|
def __found_bookmark_func(self, line, tag):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -125,6 +131,7 @@ file.
|
|||||||
self.__cb_count = 0
|
self.__cb_count = 0
|
||||||
self.__state = 'bookmark'
|
self.__state = 'bookmark'
|
||||||
self.__type_of_bookmark = tag
|
self.__type_of_bookmark = tag
|
||||||
|
|
||||||
def __bookmark_func(self, line):
|
def __bookmark_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -153,6 +160,7 @@ file.
|
|||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
elif line[0:2] == 'tx':
|
elif line[0:2] == 'tx':
|
||||||
self.__text_string += line[17:-1]
|
self.__text_string += line[17:-1]
|
||||||
|
|
||||||
def __parse_index_func(self, my_string):
|
def __parse_index_func(self, my_string):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -201,6 +209,7 @@ file.
|
|||||||
my_changed_string += '<sub-entry>%s' % sub_entry
|
my_changed_string += '<sub-entry>%s' % sub_entry
|
||||||
my_changed_string += '\n'
|
my_changed_string += '\n'
|
||||||
return my_changed_string
|
return my_changed_string
|
||||||
|
|
||||||
def __index_see_func(self, my_string):
|
def __index_see_func(self, my_string):
|
||||||
in_see = 0
|
in_see = 0
|
||||||
bracket_count = 0
|
bracket_count = 0
|
||||||
@ -226,6 +235,7 @@ file.
|
|||||||
in_see = 1
|
in_see = 1
|
||||||
changed_string += '%s\n' % line
|
changed_string += '%s\n' % line
|
||||||
return changed_string, see_string
|
return changed_string, see_string
|
||||||
|
|
||||||
def __index_bookmark_func(self, my_string):
|
def __index_bookmark_func(self, my_string):
|
||||||
"""
|
"""
|
||||||
Requries:
|
Requries:
|
||||||
@ -262,6 +272,7 @@ file.
|
|||||||
in_bookmark = 1
|
in_bookmark = 1
|
||||||
index_string += '%s\n' % line
|
index_string += '%s\n' % line
|
||||||
return index_string, bookmark_string
|
return index_string, bookmark_string
|
||||||
|
|
||||||
def __index__format_func(self, my_string):
|
def __index__format_func(self, my_string):
|
||||||
italics = 0
|
italics = 0
|
||||||
bold =0
|
bold =0
|
||||||
@ -273,6 +284,7 @@ file.
|
|||||||
if token_info == 'cw<in<index-ital':
|
if token_info == 'cw<in<index-ital':
|
||||||
italics = 1
|
italics = 1
|
||||||
return italics, bold
|
return italics, bold
|
||||||
|
|
||||||
def __parse_toc_func(self, my_string):
|
def __parse_toc_func(self, my_string):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -308,6 +320,7 @@ file.
|
|||||||
my_changed_string += '<main-entry>%s' % main_entry
|
my_changed_string += '<main-entry>%s' % main_entry
|
||||||
my_changed_string += '\n'
|
my_changed_string += '\n'
|
||||||
return my_changed_string
|
return my_changed_string
|
||||||
|
|
||||||
def __parse_bookmark_for_toc(self, my_string):
|
def __parse_bookmark_for_toc(self, my_string):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -353,6 +366,7 @@ file.
|
|||||||
in_bookmark = 1
|
in_bookmark = 1
|
||||||
toc_string += '%s\n' % line
|
toc_string += '%s\n' % line
|
||||||
return toc_string, book_start_string, book_end_string
|
return toc_string, book_start_string, book_end_string
|
||||||
|
|
||||||
def __parse_bookmark_func(self, my_string, type):
|
def __parse_bookmark_func(self, my_string, type):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -367,6 +381,7 @@ file.
|
|||||||
my_changed_string = ('mi<tg<empty-att_<field<type>%s'
|
my_changed_string = ('mi<tg<empty-att_<field<type>%s'
|
||||||
'<number>%s<update>none\n' % (type, my_string))
|
'<number>%s<update>none\n' % (type, my_string))
|
||||||
return my_changed_string
|
return my_changed_string
|
||||||
|
|
||||||
def __found_toc_index_func(self, line, tag):
|
def __found_toc_index_func(self, line, tag):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -382,6 +397,7 @@ file.
|
|||||||
self.__cb_count = 0
|
self.__cb_count = 0
|
||||||
self.__state = 'toc_index'
|
self.__state = 'toc_index'
|
||||||
self.__tag = tag
|
self.__tag = tag
|
||||||
|
|
||||||
def __toc_index_func(self, line):
|
def __toc_index_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -409,6 +425,7 @@ file.
|
|||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
else:
|
else:
|
||||||
self.__text_string += line
|
self.__text_string += line
|
||||||
|
|
||||||
def fix_fields(self):
|
def fix_fields(self):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -423,24 +440,19 @@ file.
|
|||||||
bookmark.
|
bookmark.
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
read_obj = open(self.__file)
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
line_to_read = '1'
|
for line in read_obj:
|
||||||
while line_to_read:
|
self.__token_info = line[:16]
|
||||||
line_to_read = read_obj.readline()
|
if self.__token_info == 'ob<nu<open-brack':
|
||||||
line = line_to_read
|
self.__ob_count = line[-5:-1]
|
||||||
self.__token_info = line[:16]
|
if self.__token_info == 'cb<nu<clos-brack':
|
||||||
if self.__token_info == 'ob<nu<open-brack':
|
self.__cb_count = line[-5:-1]
|
||||||
self.__ob_count = line[-5:-1]
|
action = self.__state_dict.get(self.__state)
|
||||||
if self.__token_info == 'cb<nu<clos-brack':
|
if action is None:
|
||||||
self.__cb_count = line[-5:-1]
|
sys.stderr.write('No matching state in module fields_small.py\n')
|
||||||
action = self.__state_dict.get(self.__state)
|
sys.stderr.write(self.__state + '\n')
|
||||||
if action == None:
|
action(line)
|
||||||
sys.stderr.write('no no matching state in module fields_small.py\n')
|
|
||||||
sys.stderr.write(self.__state + '\n')
|
|
||||||
action(line)
|
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "fields_small.data")
|
copy_obj.copy_file(self.__write_to, "fields_small.data")
|
||||||
|
@ -115,8 +115,8 @@ class Tokenize:
|
|||||||
|
|
||||||
def __sub_reg_split(self,input_file):
|
def __sub_reg_split(self,input_file):
|
||||||
input_file = self.__replace_spchar.mreplace(input_file)
|
input_file = self.__replace_spchar.mreplace(input_file)
|
||||||
# this is for older RTF
|
|
||||||
input_file = self.__par_exp.sub('\n\\par \n', input_file)
|
input_file = self.__par_exp.sub('\n\\par \n', input_file)
|
||||||
|
input_file = self.__cs_ast.sub("\g<1>", input_file)
|
||||||
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
|
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
|
||||||
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
|
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
|
||||||
#remove \n in bin data
|
#remove \n in bin data
|
||||||
@ -172,6 +172,8 @@ class Tokenize:
|
|||||||
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
||||||
#this is for old RTF
|
#this is for old RTF
|
||||||
self.__par_exp = re.compile(r'\\\n+')
|
self.__par_exp = re.compile(r'\\\n+')
|
||||||
|
#handle improper cs char-style with \* before without {
|
||||||
|
self.__cs_ast = re.compile(r'\\\*([\n ]*\\cs\d+[\n \\]+)')
|
||||||
# self.__par_exp = re.compile(r'\\$')
|
# self.__par_exp = re.compile(r'\\$')
|
||||||
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
|
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
|
||||||
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
|
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user