mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Handle inproper \*\csN in body without braces
This commit is contained in:
parent
2e033022b7
commit
fc42efda42
@ -375,7 +375,7 @@ class ParseRtf:
|
||||
old_rtf = old_rtf_obj.check_if_old_rtf()
|
||||
if old_rtf:
|
||||
if self.__run_level > 5:
|
||||
msg = 'older RTF\n'
|
||||
msg = 'Older RTF\n'
|
||||
msg += 'self.__run_level is "%s"\n' % self.__run_level
|
||||
raise RtfInvalidCodeException, msg
|
||||
if self.__run_level > 1:
|
||||
|
@ -48,6 +48,7 @@ class DeleteInfo:
|
||||
'cw<it<listtable_',
|
||||
'cw<it<revi-table',
|
||||
'cw<ls<list-lev-d',
|
||||
# Field allowed
|
||||
'cw<fd<field-inst',
|
||||
'cw<an<book-mk-st',
|
||||
'cw<an<book-mk-en',
|
||||
@ -86,7 +87,7 @@ class DeleteInfo:
|
||||
self.__ob = line
|
||||
return False
|
||||
else:
|
||||
# write previous bracket, since didn't fine asterisk
|
||||
# write previous bracket, since didn't find asterisk
|
||||
if self.__ob:
|
||||
self.__write_obj.write(self.__ob)
|
||||
self.__ob = 0
|
||||
@ -109,7 +110,7 @@ class DeleteInfo:
|
||||
If you find that you are in a delete group, and the previous
|
||||
token in not an open bracket (self.__ob = 0), that means
|
||||
that the delete group is nested inside another acceptable
|
||||
detination group. In this case, you have alrady written
|
||||
detination group. In this case, you have already written
|
||||
the open bracket, so you will need to write the closed one
|
||||
as well.
|
||||
"""
|
||||
|
@ -15,8 +15,10 @@
|
||||
# #
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os, tempfile, re
|
||||
import sys, os, tempfile, re
|
||||
|
||||
from calibre.ebooks.rtf2xml import field_strings, copy
|
||||
|
||||
class FieldsSmall:
|
||||
"""
|
||||
=================
|
||||
@ -24,7 +26,7 @@ Purpose
|
||||
=================
|
||||
Write tags for bookmarks, index and toc entry fields in a tokenized file.
|
||||
This module does not handle toc or index tables. (This module won't be any
|
||||
use to use to you unless you use it as part of the other modules.)
|
||||
use to you unless you use it as part of the other modules.)
|
||||
-----------
|
||||
Method
|
||||
-----------
|
||||
@ -55,6 +57,7 @@ file.
|
||||
self.__copy = copy
|
||||
self.__write_to = tempfile.mktemp()
|
||||
self.__run_level = run_level
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
Initiate all values.
|
||||
@ -81,6 +84,7 @@ file.
|
||||
tx = 'tx<nu<__________<(.*?)'
|
||||
reg_st = ob + bk_st + tx + cb
|
||||
self.__book_start = re.compile(r'%s' % reg_st)
|
||||
|
||||
def __before_body_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -94,6 +98,7 @@ file.
|
||||
if self.__token_info == 'mi<mk<body-open_':
|
||||
self.__state = 'body'
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __body_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -110,6 +115,7 @@ file.
|
||||
action(line, tag)
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __found_bookmark_func(self, line, tag):
|
||||
"""
|
||||
Requires:
|
||||
@ -125,6 +131,7 @@ file.
|
||||
self.__cb_count = 0
|
||||
self.__state = 'bookmark'
|
||||
self.__type_of_bookmark = tag
|
||||
|
||||
def __bookmark_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -153,6 +160,7 @@ file.
|
||||
self.__write_obj.write(line)
|
||||
elif line[0:2] == 'tx':
|
||||
self.__text_string += line[17:-1]
|
||||
|
||||
def __parse_index_func(self, my_string):
|
||||
"""
|
||||
Requires:
|
||||
@ -201,6 +209,7 @@ file.
|
||||
my_changed_string += '<sub-entry>%s' % sub_entry
|
||||
my_changed_string += '\n'
|
||||
return my_changed_string
|
||||
|
||||
def __index_see_func(self, my_string):
|
||||
in_see = 0
|
||||
bracket_count = 0
|
||||
@ -226,6 +235,7 @@ file.
|
||||
in_see = 1
|
||||
changed_string += '%s\n' % line
|
||||
return changed_string, see_string
|
||||
|
||||
def __index_bookmark_func(self, my_string):
|
||||
"""
|
||||
Requries:
|
||||
@ -262,6 +272,7 @@ file.
|
||||
in_bookmark = 1
|
||||
index_string += '%s\n' % line
|
||||
return index_string, bookmark_string
|
||||
|
||||
def __index__format_func(self, my_string):
|
||||
italics = 0
|
||||
bold =0
|
||||
@ -273,6 +284,7 @@ file.
|
||||
if token_info == 'cw<in<index-ital':
|
||||
italics = 1
|
||||
return italics, bold
|
||||
|
||||
def __parse_toc_func(self, my_string):
|
||||
"""
|
||||
Requires:
|
||||
@ -308,6 +320,7 @@ file.
|
||||
my_changed_string += '<main-entry>%s' % main_entry
|
||||
my_changed_string += '\n'
|
||||
return my_changed_string
|
||||
|
||||
def __parse_bookmark_for_toc(self, my_string):
|
||||
"""
|
||||
Requires:
|
||||
@ -353,6 +366,7 @@ file.
|
||||
in_bookmark = 1
|
||||
toc_string += '%s\n' % line
|
||||
return toc_string, book_start_string, book_end_string
|
||||
|
||||
def __parse_bookmark_func(self, my_string, type):
|
||||
"""
|
||||
Requires:
|
||||
@ -367,6 +381,7 @@ file.
|
||||
my_changed_string = ('mi<tg<empty-att_<field<type>%s'
|
||||
'<number>%s<update>none\n' % (type, my_string))
|
||||
return my_changed_string
|
||||
|
||||
def __found_toc_index_func(self, line, tag):
|
||||
"""
|
||||
Requires:
|
||||
@ -382,6 +397,7 @@ file.
|
||||
self.__cb_count = 0
|
||||
self.__state = 'toc_index'
|
||||
self.__tag = tag
|
||||
|
||||
def __toc_index_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -409,6 +425,7 @@ file.
|
||||
self.__write_obj.write(line)
|
||||
else:
|
||||
self.__text_string += line
|
||||
|
||||
def fix_fields(self):
|
||||
"""
|
||||
Requires:
|
||||
@ -423,24 +440,19 @@ file.
|
||||
bookmark.
|
||||
"""
|
||||
self.__initiate_values()
|
||||
read_obj = open(self.__file)
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
line_to_read = '1'
|
||||
while line_to_read:
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
self.__token_info = line[:16]
|
||||
if self.__token_info == 'ob<nu<open-brack':
|
||||
self.__ob_count = line[-5:-1]
|
||||
if self.__token_info == 'cb<nu<clos-brack':
|
||||
self.__cb_count = line[-5:-1]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action == None:
|
||||
sys.stderr.write('no no matching state in module fields_small.py\n')
|
||||
sys.stderr.write(self.__state + '\n')
|
||||
action(line)
|
||||
read_obj.close()
|
||||
self.__write_obj.close()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
if self.__token_info == 'ob<nu<open-brack':
|
||||
self.__ob_count = line[-5:-1]
|
||||
if self.__token_info == 'cb<nu<clos-brack':
|
||||
self.__cb_count = line[-5:-1]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action is None:
|
||||
sys.stderr.write('No matching state in module fields_small.py\n')
|
||||
sys.stderr.write(self.__state + '\n')
|
||||
action(line)
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "fields_small.data")
|
||||
|
@ -115,8 +115,8 @@ class Tokenize:
|
||||
|
||||
def __sub_reg_split(self,input_file):
|
||||
input_file = self.__replace_spchar.mreplace(input_file)
|
||||
# this is for older RTF
|
||||
input_file = self.__par_exp.sub('\n\\par \n', input_file)
|
||||
input_file = self.__cs_ast.sub("\g<1>", input_file)
|
||||
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
|
||||
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
|
||||
#remove \n in bin data
|
||||
@ -172,6 +172,8 @@ class Tokenize:
|
||||
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
||||
#this is for old RTF
|
||||
self.__par_exp = re.compile(r'\\\n+')
|
||||
#handle improper cs char-style with \* before without {
|
||||
self.__cs_ast = re.compile(r'\\\*([\n ]*\\cs\d+[\n \\]+)')
|
||||
# self.__par_exp = re.compile(r'\\$')
|
||||
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
|
||||
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
|
||||
|
Loading…
x
Reference in New Issue
Block a user