Correction of hard line breaking bug in RTF (introduce a <br/> for \line)

This commit is contained in:
Sengian 2010-07-26 01:26:39 +02:00
parent c1776406e5
commit 8cb95f343b
4 changed files with 48 additions and 35 deletions

View File

@ -413,6 +413,10 @@
</xsl:element> </xsl:element>
</xsl:template> </xsl:template>
<xsl:template match="rtf:hardline-break">
<xsl:element name="br"/>
</xsl:template>
<xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/> <xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/>
<xsl:template match="rtf:body"> <xsl:template match="rtf:body">

View File

@ -90,7 +90,7 @@ class ParseRtf:
out_file = '', out_file = '',
out_dir = None, out_dir = None,
dtd = '', dtd = '',
debug = 0, #debug = 0, #why? calibre
deb_dir = None, deb_dir = None,
convert_symbol = None, convert_symbol = None,
convert_wingdings = None, convert_wingdings = None,
@ -132,7 +132,7 @@ class ParseRtf:
self.__dtd_path = dtd self.__dtd_path = dtd
self.__check_file(in_file,"file_to_parse") self.__check_file(in_file,"file_to_parse")
self.__char_data = char_data self.__char_data = char_data
self.__debug_dir = debug self.__debug_dir = deb_dir #self.__debug_dir = debug calibre
self.__check_dir(self.__temp_dir) self.__check_dir(self.__temp_dir)
self.__copy = self.__check_dir(self.__debug_dir) self.__copy = self.__check_dir(self.__debug_dir)
self.__convert_caps = convert_caps self.__convert_caps = convert_caps

View File

@ -51,6 +51,7 @@ class Inline:
'tx<ut<__________' : self.__found_text_func, 'tx<ut<__________' : self.__found_text_func,
'mi<mk<inline-fld' : self.__found_text_func, 'mi<mk<inline-fld' : self.__found_text_func,
'text' : self.__found_text_func, 'text' : self.__found_text_func,
'cw<nu<hard-lineb' : self.__found_text_func, #calibre
'cb<nu<clos-brack' : self.__close_bracket_func, 'cb<nu<clos-brack' : self.__close_bracket_func,
'mi<mk<par-end___' : self.__end_para_func, 'mi<mk<par-end___' : self.__end_para_func,
'mi<mk<footnt-ope' : self.__end_para_func, 'mi<mk<footnt-ope' : self.__end_para_func,
@ -62,6 +63,7 @@ class Inline:
'tx<hx<__________' : self.__found_text_func, 'tx<hx<__________' : self.__found_text_func,
'tx<ut<__________' : self.__found_text_func, 'tx<ut<__________' : self.__found_text_func,
'text' : self.__found_text_func, 'text' : self.__found_text_func,
'cw<nu<hard-lineb' : self.__found_text_func, #calibre
'mi<mk<inline-fld' : self.__found_text_func, 'mi<mk<inline-fld' : self.__found_text_func,
'ob<nu<open-brack': self.__found_open_bracket_func, 'ob<nu<open-brack': self.__found_open_bracket_func,
'mi<mk<par-end___' : self.__end_para_func, 'mi<mk<par-end___' : self.__end_para_func,
@ -133,10 +135,12 @@ class Inline:
Returns: Returns:
nothing nothing
Logic: Logic:
Write if not hardline break
""" """
action = self.__default_dict.get(self.__token_info) action = self.__default_dict.get(self.__token_info)
if action: if action:
action(line) action(line)
if self.__token_info != 'cw<nu<hard-lineb': #calibre
self.__write_obj.write(line) self.__write_obj.write(line)
def __found_open_bracket_func(self, line): def __found_open_bracket_func(self, line):
""" """
@ -164,7 +168,7 @@ class Inline:
Use the dictionary to get the approriate function. Use the dictionary to get the approriate function.
Always print out the line. Always print out the line.
""" """
if line[0:2] == 'cw': if line[0:5] == 'cw<ci': #calibre: bug in original function no diff between cw<ci and cw<pf
self.__handle_control_word(line) self.__handle_control_word(line)
else: else:
action = self.__after_open_bracket_dict.get(self.__token_info) action = self.__after_open_bracket_dict.get(self.__token_info)
@ -247,12 +251,13 @@ class Inline:
Return: Return:
nothing nothing
Logic: Logic:
Two cases: Three cases:
1. in a list. Simply write inline 1. in a list. Simply write inline
2. Not in a list 2. Not in a list
Text can mark the start of a paragraph. Text can mark the start of a paragraph.
If already in a paragraph, check to see if any groups are waiting If already in a paragraph, check to see if any groups are waiting
to be added. If so, use another method to write these groups. to be added. If so, use another method to write these groups.
3. If not check if hardline break, then write
""" """
if self.__place == 'in_list': if self.__place == 'in_list':
self.__write_inline() self.__write_inline()
@ -261,8 +266,11 @@ class Inline:
self.__in_para = 1 self.__in_para = 1
self.__start_para_func(line) self.__start_para_func(line)
else: else:
if self.__token_info == 'cw<nu<hard-lineb': #calibre
self.__write_obj.write('mi<tg<empty_____<hardline-break\n')
if self.__groups_in_waiting[0] != 0: if self.__groups_in_waiting[0] != 0:
self.__write_inline() self.__write_inline()
def __write_inline(self): def __write_inline(self):
""" """
Required: Required:
@ -279,7 +287,7 @@ class Inline:
Get the keys in each dictionary. If 'font-style' is in the keys, Get the keys in each dictionary. If 'font-style' is in the keys,
write a marker tag. (I will use this marker tag later when conerting write a marker tag. (I will use this marker tag later when conerting
hext text to utf8.) hext text to utf8.)
Write a tag for the inline vaues. Write a tag for the inline values.
""" """
if self.__groups_in_waiting[0] != 0: if self.__groups_in_waiting[0] != 0:
last_index = -1 * self.__groups_in_waiting[0] last_index = -1 * self.__groups_in_waiting[0]

View File

@ -73,7 +73,8 @@ class ProcessTokens:
'backslash' : ('nu', '\\', self.text_func), 'backslash' : ('nu', '\\', self.text_func),
'ob' : ('nu', '{', self.text_func), 'ob' : ('nu', '{', self.text_func),
'cb' : ('nu', '}', self.text_func), 'cb' : ('nu', '}', self.text_func),
'line' : ('nu', ' ', self.text_func), 'line' : ('nu', 'hard-lineb', self.default_func), #calibre
#'line' : ('nu', ' ', self.text_func), calibre
# paragraph formatting => pf # paragraph formatting => pf
'page' : ('pf', 'page-break', self.default_func), 'page' : ('pf', 'page-break', self.default_func),
'par' : ('pf', 'par-end___', self.default_func), 'par' : ('pf', 'par-end___', self.default_func),