diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py index 40945be975..30305b27a7 100755 --- a/src/calibre/ebooks/rtf2xml/ParseRtf.py +++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py @@ -372,8 +372,8 @@ class ParseRtf: old_rtf = old_rtf_obj.check_if_old_rtf() if old_rtf: if self.__run_level > 5: - msg = 'Older RTF\n' - msg += 'self.__run_level is "%s"\n' % self.__run_level + msg = 'Older RTF\n' \ + 'self.__run_level is "%s"\n' % self.__run_level raise RtfInvalidCodeException, msg if self.__run_level > 1: sys.stderr.write('File could be older RTF...\n') @@ -381,7 +381,7 @@ class ParseRtf: if self.__run_level > 1: sys.stderr.write( 'File also has newer RTF.\n' - 'Will do the best to convert.\n' + 'Will do the best to convert...\n' ) add_brackets_obj = add_brackets.AddBrackets( in_file = self.__temp_file, diff --git a/src/calibre/ebooks/rtf2xml/add_brackets.py b/src/calibre/ebooks/rtf2xml/add_brackets.py index f82fc40d66..9bef9756f2 100755 --- a/src/calibre/ebooks/rtf2xml/add_brackets.py +++ b/src/calibre/ebooks/rtf2xml/add_brackets.py @@ -20,6 +20,9 @@ class AddBrackets: """ Add brackets for old RTF. Logic: + When control words without their own brackets are encountered + and in the list of allowed words, this will add brackets + to facilitate the treatment of the file """ def __init__(self, in_file, bug_handler, @@ -41,50 +44,51 @@ class AddBrackets: self.__copy = copy self.__write_to = better_mktemp() self.__run_level = run_level - - def __initiate_values(self): - """ - """ self.__state_dict = { 'before_body' : self.__before_body_func, 'in_body' : self.__in_body_func, 'after_control_word' : self.__after_control_word_func, 'in_ignore' : self.__ignore_func, } + self.__accept = [ + 'cw%s\n' % (type) ) else: - sys.stderr.write('module is header\n') - sys.stderr.write('method is __found_header\n') - sys.stderr.write('no dict entry\n') - sys.stderr.write('line is %s' % line) + sys.stderr.write( + 'module is header\n' \ + 'method is __found_header\n' \ + 'no dict entry\n' \ + 'line is %s' % line) self.__write_to_head_obj.write( 'minone\n' ) + def __default_sep(self, line): - """Handle all tokens that are not header tokens""" + """ + Handle all tokens that are not header tokens + """ if self.__token_info[3:5] == 'hf': self.__found_header(line) self.__write_obj.write(line) + def __initiate_sep_values(self): """ initiate counters for separate_footnotes method. @@ -89,7 +95,7 @@ class Header: self.__ob_count = 0 self.__cb_count = 0 self.__header_bracket_count = 0 - self.__in_header = 0 + self.__in_header = False self.__header_count = 0 self.__head_dict = { 'head-left_' : ('header-left'), @@ -101,6 +107,7 @@ class Header: 'header____' : ('header' ), 'footer____' : ('footer' ), } + def separate_headers(self): """ Separate all the footnotes in an RTF file and put them at the bottom, @@ -110,53 +117,47 @@ class Header: bottom of the main file. """ self.__initiate_sep_values() - read_obj = open(self.__file) - self.__write_obj = open(self.__write_to, 'w') self.__header_holder = better_mktemp() - self.__write_to_head_obj = open(self.__header_holder, 'w') - line_to_read = 1 - while line_to_read: - line_to_read = read_obj.readline() - line = line_to_read - self.__token_info = line[:16] - # keep track of opening and closing brackets - if self.__token_info == 'ob 3: + sys.stderr.write( + 'Old rtf construction %s (bracket %s, line %s)\n' + % (self.__inline_info, str(self.__ob_group), line_num) + ) + return True + self.__previous_token = line[6:16] + return False diff --git a/src/calibre/ebooks/rtf2xml/paragraphs.py b/src/calibre/ebooks/rtf2xml/paragraphs.py index cab1a7ffa8..ea6a482fca 100755 --- a/src/calibre/ebooks/rtf2xml/paragraphs.py +++ b/src/calibre/ebooks/rtf2xml/paragraphs.py @@ -11,31 +11,32 @@ # # ######################################################################### import sys, os + from calibre.ebooks.rtf2xml import copy from calibre.ptempfile import better_mktemp class Paragraphs: """ -================= -Purpose -================= -Write paragraph tags for a tokenized file. (This module won't be any use to use -to you unless you use it as part of the other modules.) -------------- -Method -------------- -RTF does not tell you when a paragraph begins. It only tells you when the -paragraph ends. -In order to make paragraphs out of this limited info, the parser starts in the -body of the documents and assumes it is not in a paragraph. It looks for clues -to begin a paragraph. Text starts a paragraph; so does an inline field or -list-text. If an end of paragraph marker (\par) is found, then this indicates -a blank paragraph. -Once a paragraph is found, the state changes to 'paragraph.' In this state, -clues are looked to for the end of a paragraph. The end of a paragraph marker -(\par) marks the end of a paragraph. So does the end of a footnote or heading; -a paragraph definintion; the end of a field-block; and the beginning of a -section. (How about the end of a section or the end of a field-block?) + ================= + Purpose + ================= + Write paragraph tags for a tokenized file. (This module won't be any use to use + to you unless you use it as part of the other modules.) + ------------- + Method + ------------- + RTF does not tell you when a paragraph begins. It only tells you when the + paragraph ends. + In order to make paragraphs out of this limited info, the parser starts in the + body of the documents and assumes it is not in a paragraph. It looks for clues + to begin a paragraph. Text starts a paragraph; so does an inline field or + list-text. If an end of paragraph marker (\par) is found, then this indicates + a blank paragraph. + Once a paragraph is found, the state changes to 'paragraph.' In this state, + clues are looked to for the end of a paragraph. The end of a paragraph marker + (\par) marks the end of a paragraph. So does the end of a footnote or heading; + a paragraph definition; the end of a field-block; and the beginning of a + section. (How about the end of a section or the end of a field-block?) """ def __init__(self, in_file, @@ -60,6 +61,7 @@ section. (How about the end of a section or the end of a field-block?) self.__write_empty_para = write_empty_para self.__run_level = run_level self.__write_to = better_mktemp() + def __initiate_values(self): """ Initiate all values. @@ -77,7 +79,7 @@ section. (How about the end of a section or the end of a field-block?) self.__paragraph_dict = { 'cw%s\n' % (self.__default_font, self.__code_page, self.__platform) ) + def __found_list_table_func(self, line): self.__state = 'list_table' + def __list_table_func(self, line): if self.__token_info == 'mi