mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Still old paragraph format
This commit is contained in:
parent
056f97c700
commit
ccf856539a
@ -78,7 +78,6 @@ class RTFInput(InputFormatPlugin):
|
||||
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
||||
ofile = 'dataxml.xml'
|
||||
run_lev, debug_dir, indent_out = 1, None, 0
|
||||
#just to check if the debug process is lauched, no need of this directory in fact
|
||||
if getattr(self.opts, 'debug_pipeline', None) is not None:
|
||||
try:
|
||||
os.mkdir('rtfdebug')
|
||||
@ -322,5 +321,5 @@ class RTFInput(InputFormatPlugin):
|
||||
return os.path.abspath('metadata.opf')
|
||||
|
||||
#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
|
||||
# os.makedirs('E:\\Mes eBooks\\Developpement\\rtfdebug')
|
||||
# debug_dir = 'E:\\Mes eBooks\\Developpement\\rtfdebug'
|
||||
# os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug")
|
||||
# debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug"
|
||||
|
@ -226,7 +226,7 @@ class ParseRtf:
|
||||
try:
|
||||
return_value = process_tokens_obj.process_tokens()
|
||||
except InvalidRtfException, msg:
|
||||
#Check to see if the file is correctly encoded
|
||||
# Check to see if the file is correctly encoded
|
||||
encode_obj = default_encoding.DefaultEncoding(
|
||||
in_file = self.__temp_file,
|
||||
run_level = self.__run_level,
|
||||
@ -237,14 +237,14 @@ class ParseRtf:
|
||||
check_encoding_obj = check_encoding.CheckEncoding(
|
||||
bug_handler = RtfInvalidCodeException,
|
||||
)
|
||||
enc = 'cp' + encode_obj.get_codepage()
|
||||
if enc == 'cp10000':
|
||||
enc = 'mac_roman'
|
||||
msg = 'Exception in token processing'
|
||||
enc = encode_obj.get_codepage()
|
||||
if enc != 'mac_roman':
|
||||
enc = 'cp' + enc
|
||||
msg = '%s\nException in token processing' % str(msg)
|
||||
if check_encoding_obj.check_encoding(self.__file, enc):
|
||||
file_name = self.__file if isinstance(self.__file, str) \
|
||||
else self.__file.encode('utf-8')
|
||||
msg = 'File %s does not appear to be correctly encoded.\n' % file_name
|
||||
msg +='\nFile %s does not appear to be correctly encoded.\n' % file_name
|
||||
try:
|
||||
os.remove(self.__temp_file)
|
||||
except OSError:
|
||||
|
@ -786,21 +786,23 @@ class ProcessTokens:
|
||||
token = line.replace("\n","")
|
||||
line_count += 1
|
||||
if line_count == 1 and token != '\\{':
|
||||
msg = 'Invalid RTF: document doesn\'t start with {\n'
|
||||
msg = '\nInvalid RTF: document doesn\'t start with {\n'
|
||||
raise self.__exception_handler, msg
|
||||
elif line_count == 2 and token[0:4] != '\\rtf':
|
||||
msg = 'Invalid RTF: document doesn\'t start with \\rtf \n'
|
||||
msg = '\nInvalid RTF: document doesn\'t start with \\rtf \n'
|
||||
raise self.__exception_handler, msg
|
||||
|
||||
the_index = token.find('\\ ')
|
||||
if token is not None and the_index > -1:
|
||||
msg = 'Invalid RTF: token "\\ " not valid.\n'
|
||||
msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\
|
||||
% line_count
|
||||
raise self.__exception_handler, msg
|
||||
elif token[:1] == "\\":
|
||||
try:
|
||||
token.decode('us-ascii')
|
||||
except UnicodeError, msg:
|
||||
msg = 'Invalid RTF: Tokens not ascii encoded.\n%s' % str(msg)
|
||||
msg = '\nInvalid RTF: Tokens not ascii encoded.\n%s\nError at line %d'\
|
||||
% (str(msg), line_count)
|
||||
raise self.__exception_handler, msg
|
||||
line = self.process_cw(token)
|
||||
if line is not None:
|
||||
@ -816,7 +818,7 @@ class ProcessTokens:
|
||||
write_obj.write('tx<nu<__________<%s\n' % field)
|
||||
|
||||
if not line_count:
|
||||
msg = 'Invalid RTF: file appears to be empty.\n'
|
||||
msg = '\nInvalid RTF: file appears to be empty.\n'
|
||||
raise self.__exception_handler, msg
|
||||
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
@ -827,7 +829,7 @@ class ProcessTokens:
|
||||
|
||||
bad_brackets = self.__check_brackets(self.__file)
|
||||
if bad_brackets:
|
||||
msg = 'Invalid RTF: document does not have matching brackets.\n'
|
||||
msg = '\nInvalid RTF: document does not have matching brackets.\n'
|
||||
raise self.__exception_handler, msg
|
||||
else:
|
||||
return self.__return_code
|
@ -141,17 +141,17 @@ class Tokenize:
|
||||
"\\_": "\\_ ",
|
||||
"\\:": "\\: ",
|
||||
"\\-": "\\- ",
|
||||
# turn into a generic token to eliminate special
|
||||
# cases and make processing easier
|
||||
#turn into a generic token to eliminate special
|
||||
#cases and make processing easier
|
||||
"\\{": "\\ob ",
|
||||
# turn into a generic token to eliminate special
|
||||
# cases and make processing easier
|
||||
#turn into a generic token to eliminate special
|
||||
#cases and make processing easier
|
||||
"\\}": "\\cb ",
|
||||
# put a backslash in front of to eliminate special cases and
|
||||
# make processing easier
|
||||
#put a backslash in front of to eliminate special cases and
|
||||
#make processing easier
|
||||
"{": "\\{",
|
||||
# put a backslash in front of to eliminate special cases and
|
||||
# make processing easier
|
||||
#put a backslash in front of to eliminate special cases and
|
||||
#make processing easier
|
||||
"}": "\\}",
|
||||
}
|
||||
self.__replace_spchar = MReplace(SIMPLE_RPL)
|
||||
@ -167,16 +167,11 @@ class Tokenize:
|
||||
#remove \n from endline char
|
||||
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
||||
#this is for old RTF
|
||||
self.__par_exp = re.compile(r'\\\n+')
|
||||
self.__par_exp = re.compile(r'(\\\n+|\\ )')
|
||||
#handle improper cs char-style with \* before without {
|
||||
self.__cs_ast = re.compile(r'\\\*([\n ]*\\cs\d+[\n \\]+)')
|
||||
#handle cw using a digit as argument and without space as delimiter
|
||||
self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
|
||||
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
|
||||
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
|
||||
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
|
||||
#self.__remove_line = re.compile(r'\n+')
|
||||
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
|
||||
|
||||
def tokenize(self):
|
||||
"""Main class for handling other methods. Reads the file \
|
||||
|
Loading…
x
Reference in New Issue
Block a user