Fix #8640 (Convert books from Epub to RTF removes spaces before and after Italics)

This commit is contained in:
Kovid Goyal 2011-01-29 11:21:51 -07:00
commit 6b3dd16e1f

View File

@ -30,6 +30,7 @@ TAGS = {
'h4': '\\b \\par \\pard \\hyphpar',
'h5': '\\b \\par \\pard \\hyphpar',
'h6': '\\b \\par \\pard \\hyphpar',
'i': '\\i',
'li': '\\par \\pard \\hyphpar \t',
'p': '\\par \\pard \\hyphpar \t',
'sub': '\\sub',
@ -117,6 +118,7 @@ class RTFMLizer(object):
self.log.debug('Converting %s to RTF markup...' % item.href)
content = unicode(etree.tostring(item.data, encoding=unicode))
content = self.remove_newlines(content)
content = self.remove_tabs(content)
content = etree.fromstring(content)
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
output += self.dump_text(content.find(XHTML('body')), stylizer)
@ -134,6 +136,12 @@ class RTFMLizer(object):
return text
def remove_tabs(self, text):
self.log.debug('\Replace tabs with space for processing...')
text = text.replace('\t', ' ')
return text
def header(self):
return u'{\\rtf1{\\info{\\title %s}{\\author %s}}\\ansi\\ansicpg1252\\deff0\\deflang1033' % (self.oeb_book.metadata.title[0].value, authors_to_string([x.value for x in self.oeb_book.metadata.creator]))
@ -170,19 +178,15 @@ class RTFMLizer(object):
return (hex_string, width, height)
def clean_text(self, text):
# Remove excess spaces at beginning and end of lines
text = re.sub('(?m)^[ ]+', '', text)
text = re.sub('(?m)[ ]+$', '', text)
# Remove excessive newlines
#text = re.sub('%s{1,1}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
# Remove excessive spaces
text = re.sub('[ ]{2,}', ' ', text)
text = re.sub('\t{2,}', '\t', text)
# Remove excessive line breaks
text = re.sub(r'(\{\\line \}\s*){3,}', r'{\\line }{\\line }', text)
#text = re.compile(r'(\{\\line \}\s*)+(?P<brackets>}*)\s*\{\\par').sub(lambda mo: r'%s{\\par' % mo.group('brackets'), text)
# Remove non-breaking spaces
text = text.replace(u'\xa0', ' ')
@ -245,7 +249,7 @@ class RTFMLizer(object):
tag_stack.append(style_tag)
# Proccess tags that contain text.
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
if hasattr(elem, 'text') and elem.text:
text += txt2rtf(elem.text)
for item in elem:
@ -260,7 +264,7 @@ class RTFMLizer(object):
if single_tag_end:
text += single_tag_end
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
if hasattr(elem, 'tail') and elem.tail:
if 'block' in tag_stack:
text += '%s' % txt2rtf(elem.tail)
else: