mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix bug #8641: Retain html headings as rtf headings when converting to rtf. Rewrite and simplify large parts of markup output.
This commit is contained in:
parent
20a8d6ecd3
commit
25129f9a71
@ -24,15 +24,15 @@ from calibre.utils.magick.draw import save_cover_data_to, identify_data
|
|||||||
TAGS = {
|
TAGS = {
|
||||||
'b': '\\b',
|
'b': '\\b',
|
||||||
'del': '\\deleted',
|
'del': '\\deleted',
|
||||||
'h1': '\\b \\par \\pard \\hyphpar',
|
'h1': '\\s1 \\afs32',
|
||||||
'h2': '\\b \\par \\pard \\hyphpar',
|
'h2': '\\s2 \\afs28',
|
||||||
'h3': '\\b \\par \\pard \\hyphpar',
|
'h3': '\\s3 \\afs28',
|
||||||
'h4': '\\b \\par \\pard \\hyphpar',
|
'h4': '\\s4 \\afs23',
|
||||||
'h5': '\\b \\par \\pard \\hyphpar',
|
'h5': '\\s5 \\afs23',
|
||||||
'h6': '\\b \\par \\pard \\hyphpar',
|
'h6': '\\s6 \\afs21',
|
||||||
'i': '\\i',
|
'i': '\\i',
|
||||||
'li': '\\par \\pard \\hyphpar \t',
|
'li': '\t',
|
||||||
'p': '\\par \\pard \\hyphpar \t',
|
'p': '\t',
|
||||||
'sub': '\\sub',
|
'sub': '\\sub',
|
||||||
'sup': '\\super',
|
'sup': '\\super',
|
||||||
'u': '\\ul',
|
'u': '\\ul',
|
||||||
@ -40,15 +40,9 @@ TAGS = {
|
|||||||
|
|
||||||
SINGLE_TAGS = {
|
SINGLE_TAGS = {
|
||||||
'br': '\n{\\line }\n',
|
'br': '\n{\\line }\n',
|
||||||
'div': '\n{\\line }\n',
|
|
||||||
}
|
|
||||||
|
|
||||||
SINGLE_TAGS_END = {
|
|
||||||
'div': '\n{\\line }\n',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
STYLES = [
|
STYLES = [
|
||||||
('display', {'block': '\\par \\pard \\hyphpar'}),
|
|
||||||
('font-weight', {'bold': '\\b', 'bolder': '\\b'}),
|
('font-weight', {'bold': '\\b', 'bolder': '\\b'}),
|
||||||
('font-style', {'italic': '\\i'}),
|
('font-style', {'italic': '\\i'}),
|
||||||
('text-align', {'center': '\\qc', 'left': '\\ql', 'right': '\\qr'}),
|
('text-align', {'center': '\\qc', 'left': '\\ql', 'right': '\\qr'}),
|
||||||
@ -56,6 +50,7 @@ STYLES = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
BLOCK_TAGS = [
|
BLOCK_TAGS = [
|
||||||
|
'div',
|
||||||
'p',
|
'p',
|
||||||
'h1',
|
'h1',
|
||||||
'h2',
|
'h2',
|
||||||
@ -113,7 +108,7 @@ class RTFMLizer(object):
|
|||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
||||||
self.opts, self.opts.output_profile)
|
self.opts, self.opts.output_profile)
|
||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
output += '{\\page } '
|
output += '{\\page }'
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
self.log.debug('Converting %s to RTF markup...' % item.href)
|
self.log.debug('Converting %s to RTF markup...' % item.href)
|
||||||
content = unicode(etree.tostring(item.data, encoding=unicode))
|
content = unicode(etree.tostring(item.data, encoding=unicode))
|
||||||
@ -122,6 +117,7 @@ class RTFMLizer(object):
|
|||||||
content = etree.fromstring(content)
|
content = etree.fromstring(content)
|
||||||
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
output += self.dump_text(content.find(XHTML('body')), stylizer)
|
output += self.dump_text(content.find(XHTML('body')), stylizer)
|
||||||
|
output += '{\\page }'
|
||||||
output += self.footer()
|
output += self.footer()
|
||||||
output = self.insert_images(output)
|
output = self.insert_images(output)
|
||||||
output = self.clean_text(output)
|
output = self.clean_text(output)
|
||||||
@ -143,7 +139,16 @@ class RTFMLizer(object):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def header(self):
|
def header(self):
|
||||||
return u'{\\rtf1{\\info{\\title %s}{\\author %s}}\\ansi\\ansicpg1252\\deff0\\deflang1033' % (self.oeb_book.metadata.title[0].value, authors_to_string([x.value for x in self.oeb_book.metadata.creator]))
|
header = u'{\\rtf1{\\info{\\title %s}{\\author %s}}\\ansi\\ansicpg1252\\deff0\\deflang1033\n' % (self.oeb_book.metadata.title[0].value, authors_to_string([x.value for x in self.oeb_book.metadata.creator]))
|
||||||
|
return header + \
|
||||||
|
'{\\fonttbl{\\f0\\froman\\fprq2\\fcharset128 Times New Roman;}{\\f1\\froman\\fprq2\\fcharset128 Times New Roman;}{\\f2\\fswiss\\fprq2\\fcharset128 Arial;}{\\f3\\fnil\\fprq2\\fcharset128 Arial;}{\\f4\\fnil\\fprq2\\fcharset128 MS Mincho;}{\\f5\\fnil\\fprq2\\fcharset128 Tahoma;}{\\f6\\fnil\\fprq0\\fcharset128 Tahoma;}}\n' \
|
||||||
|
'{\\stylesheet{\\ql \\li0\\ri0\\nowidctlpar\\wrapdefault\\faauto\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\af25\\afs24\\alang1033 \\ltrch\\fcs0 \\fs24\\lang1033\\langfe255\\cgrid\\langnp1033\\langfenp255 \\snext0 Normal;}\n' \
|
||||||
|
'{\\s1\\ql \\li0\\ri0\\sb240\\sa120\\keepn\\nowidctlpar\\wrapdefault\\faauto\\outlinelevel0\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\ab\\af0\\afs32\\alang1033 \\ltrch\\fcs0 \\b\\fs32\\lang1033\\langfe255\\loch\\f1\\hich\\af1\\dbch\\af26\\cgrid\\langnp1033\\langfenp255 \\sbasedon15 \\snext16 \\slink21 heading 1;}\n' \
|
||||||
|
'{\\s2\\ql \\li0\\ri0\\sb240\\sa120\\keepn\\nowidctlpar\\wrapdefault\\faauto\\outlinelevel1\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\ab\\ai\\af0\\afs28\\alang1033 \\ltrch\\fcs0 \\b\\i\\fs28\\lang1033\\langfe255\\loch\\f1\\hich\\af1\\dbch\\af26\\cgrid\\langnp1033\\langfenp255 \\sbasedon15 \\snext16 \\slink22 heading 2;}\n' \
|
||||||
|
'{\\s3\\ql \\li0\\ri0\\sb240\\sa120\\keepn\\nowidctlpar\\wrapdefault\\faauto\\outlinelevel2\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\ab\\af0\\afs28\\alang1033 \\ltrch\\fcs0 \\b\\fs28\\lang1033\\langfe255\\loch\\f1\\hich\\af1\\dbch\\af26\\cgrid\\langnp1033\\langfenp255 \\sbasedon15 \\snext16 \\slink23 heading 3;}\n' \
|
||||||
|
'{\\s4\\ql \\li0\\ri0\\sb240\\sa120\\keepn\\nowidctlpar\\wrapdefault\\faauto\\outlinelevel3\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\ab\\ai\\af0\\afs23\\alang1033 \\ltrch\\fcs0\\b\\i\\fs23\\lang1033\\langfe255\\loch\\f1\\hich\\af1\\dbch\\af26\\cgrid\\langnp1033\\langfenp255 \\sbasedon15 \\snext16 \\slink24 heading 4;}\n' \
|
||||||
|
'{\\s5\\ql \\li0\\ri0\\sb240\\sa120\\keepn\\nowidctlpar\\wrapdefault\\faauto\\outlinelevel4\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\ab\\af0\\afs23\\alang1033 \\ltrch\\fcs0 \\b\\fs23\\lang1033\\langfe255\\loch\\f1\\hich\\af1\\dbch\\af26\\cgrid\\langnp1033\\langfenp255 \\sbasedon15 \\snext16 \\slink25 heading 5;}\n' \
|
||||||
|
'{\\s6\\ql \\li0\\ri0\\sb240\\sa120\\keepn\\nowidctlpar\\wrapdefault\\faauto\\outlinelevel5\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\ab\\af0\\afs21\\alang1033 \\ltrch\\fcs0 \\b\\fs21\\lang1033\\langfe255\\loch\\f1\\hich\\af1\\dbch\\af26\\cgrid\\langnp1033\\langfenp255 \\sbasedon15 \\snext16 \\slink26 heading 6;}}\n'
|
||||||
|
|
||||||
def footer(self):
|
def footer(self):
|
||||||
return ' }'
|
return ' }'
|
||||||
@ -184,6 +189,7 @@ class RTFMLizer(object):
|
|||||||
# Remove excessive spaces
|
# Remove excessive spaces
|
||||||
text = re.sub('[ ]{2,}', ' ', text)
|
text = re.sub('[ ]{2,}', ' ', text)
|
||||||
text = re.sub('\t{2,}', '\t', text)
|
text = re.sub('\t{2,}', '\t', text)
|
||||||
|
text = re.sub('\t ', '\t', text)
|
||||||
|
|
||||||
# Remove excessive line breaks
|
# Remove excessive line breaks
|
||||||
text = re.sub(r'(\{\\line \}\s*){3,}', r'{\\line }{\\line }', text)
|
text = re.sub(r'(\{\\line \}\s*){3,}', r'{\\line }{\\line }', text)
|
||||||
@ -226,7 +232,7 @@ class RTFMLizer(object):
|
|||||||
block_start = ''
|
block_start = ''
|
||||||
block_end = ''
|
block_end = ''
|
||||||
if 'block' not in tag_stack:
|
if 'block' not in tag_stack:
|
||||||
block_start = '{\\par \\pard \\hyphpar '
|
block_start = '{\\par\\pard\\hyphpar '
|
||||||
block_end = '}'
|
block_end = '}'
|
||||||
text += '%s SPECIAL_IMAGE-%s-REPLACE_ME %s' % (block_start, src, block_end)
|
text += '%s SPECIAL_IMAGE-%s-REPLACE_ME %s' % (block_start, src, block_end)
|
||||||
|
|
||||||
@ -258,16 +264,15 @@ class RTFMLizer(object):
|
|||||||
for i in range(0, tag_count):
|
for i in range(0, tag_count):
|
||||||
end_tag = tag_stack.pop()
|
end_tag = tag_stack.pop()
|
||||||
if end_tag != 'block':
|
if end_tag != 'block':
|
||||||
text += u'}'
|
if tag in BLOCK_TAGS:
|
||||||
|
text += u'\\par\\pard\\plain\\hyphpar}'
|
||||||
single_tag_end = SINGLE_TAGS_END.get(tag, None)
|
else:
|
||||||
if single_tag_end:
|
text += u'}'
|
||||||
text += single_tag_end
|
|
||||||
|
|
||||||
if hasattr(elem, 'tail') and elem.tail:
|
if hasattr(elem, 'tail') and elem.tail:
|
||||||
if 'block' in tag_stack:
|
if 'block' in tag_stack:
|
||||||
text += '%s' % txt2rtf(elem.tail)
|
text += '%s' % txt2rtf(elem.tail)
|
||||||
else:
|
else:
|
||||||
text += '{\\par \\pard \\hyphpar %s}' % txt2rtf(elem.tail)
|
text += '{\\par\\pard\\hyphpar %s}' % txt2rtf(elem.tail)
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
Loading…
x
Reference in New Issue
Block a user