diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 515bdee9df..dedfe963f6 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -71,19 +71,28 @@ class FB2MLizer(object):
return u'' + output
def clean_text(self, text):
+ # Condense empty paragraphs into a line break.
+ text = re.sub(r'(?miu)(
\s*
\s*){3,}', '
', text)
+ # Remove empty paragraphs.
text = re.sub(r'(?miu)\s*
', '', text)
+ # Clean up pargraph endings.
text = re.sub(r'(?miu)\s*', '', text)
+ # Put paragraphs following a paragraph on a separate line.
text = re.sub(r'(?miu)\s*', '
\n\n', text)
+ # Remove empty title elements.
text = re.sub(r'(?miu)
\s*', '', text)
text = re.sub(r'(?miu)\s+', '', text)
+ # Remove empty sections.
text = re.sub(r'(?miu)', '', text)
+ # Clean up sections start and ends.
text = re.sub(r'(?miu)\s*', '\n', text)
text = re.sub(r'(?miu)\s*', '\n\n', text)
text = re.sub(r'(?miu)\s*', '\n', text)
text = re.sub(r'(?miu)\s*', '\n', text)
- text = re.sub(r'(?miu)\n\n', text)
+ # Put sectnions followed by sections on a separate line.
+ text = re.sub(r'(?miu)\s*\n\n', text)
if self.opts.insert_blank_line:
text = re.sub(r'(?miu)', '', text)
@@ -338,6 +347,11 @@ class FB2MLizer(object):
tags = []
# First tag in tree
tag = barename(elem_tree.tag)
+ # Number of blank lines above tag
+ try:
+ ems = int(round((float(style.marginTop) / style.fontSize) - 1))
+ except:
+ ems = 0
# Convert TOC entries to s and add s
if self.opts.sectionize == 'toc':
@@ -370,7 +384,9 @@ class FB2MLizer(object):
fb2_out.append('')
self.section_level += 1
- # Process the XHTML tag if it needs to be converted to an FB2 tag.
+ # Process the XHTML tag and styles. Converted to an FB2 tag.
+ # Use individual if statement not if else. There can be
+ # only one XHTML tag but it can have multiple styles.
if tag == 'img':
if elem_tree.attrib.get('src', None):
# Only write the image tag if it is in the manifest.
@@ -381,7 +397,11 @@ class FB2MLizer(object):
fb2_out += p_txt
tags += p_tag
fb2_out.append('' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])])
- elif tag == 'br':
+ if tag in ('br', 'hr') or ems:
+ if not ems:
+ multiplier = 1
+ else:
+ multiplier = ems
if self.in_p:
closed_tags = []
open_tags = tag_stack+tags
@@ -391,52 +411,38 @@ class FB2MLizer(object):
closed_tags.append(t)
if t == 'p':
break
- fb2_out.append('')
+ fb2_out.append('' * multiplier)
closed_tags.reverse()
for t in closed_tags:
fb2_out.append('<%s>' % t)
else:
- fb2_out.append('')
- elif tag in ('div', 'li', 'p'):
+ fb2_out.append('' * multiplier)
+ if tag in ('div', 'li', 'p'):
p_text, added_p = self.close_open_p(tag_stack+tags)
fb2_out += p_text
if added_p:
tags.append('p')
- elif tag == 'b':
+ if tag == 'b' or style['font-weight'] in ('bold', 'bolder'):
s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
fb2_out += s_out
tags += s_tags
- elif tag == 'i':
+ if tag == 'i' or style['font-style'] == 'italic':
s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
fb2_out += s_out
tags += s_tags
- elif tag in ('del', 'strike'):
+ if tag in ('del', 'strike') or style['text-decoration'] == 'line-through':
s_out, s_tags = self.handle_simple_tag('strikethrough', tag_stack+tags)
fb2_out += s_out
tags += s_tags
- elif tag == 'sub':
+ if tag == 'sub':
s_out, s_tags = self.handle_simple_tag('sub', tag_stack+tags)
fb2_out += s_out
tags += s_tags
- elif tag == 'sup':
+ if tag == 'sup':
s_out, s_tags = self.handle_simple_tag('sup', tag_stack+tags)
fb2_out += s_out
tags += s_tags
- # Processes style information.
- if style['font-style'] == 'italic':
- s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
- fb2_out += s_out
- tags += s_tags
- elif style['font-weight'] in ('bold', 'bolder'):
- s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
- fb2_out += s_out
- tags += s_tags
- elif style['text-decoration'] == 'line-through':
- s_out, s_tags = self.handle_simple_tag('strikethrough', tag_stack+tags)
- fb2_out += s_out
- tags += s_tags
-
# Process element text.
if hasattr(elem_tree, 'text') and elem_tree.text:
if not self.in_p:
diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index 40b82514c1..abad5afcb3 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -633,7 +633,7 @@ class Style(object):
def lineHeight(self):
if self._lineHeight is None:
result = None
- parent = self._getparent()
+ #parent = self._getparent()
if 'line-height' in self._style:
lineh = self._style['line-height']
if lineh == 'normal':
@@ -642,9 +642,9 @@ class Style(object):
result = float(lineh) * self.fontSize
except ValueError:
result = self._unit_convert(lineh, base=self.fontSize)
- elif parent is not None:
- # TODO: proper inheritance
- result = parent.lineHeight
+ #elif parent is not None:
+ # # TODO: proper inheritance
+ # result = parent.lineHeight
else:
result = 1.2 * self.fontSize
self._lineHeight = result
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index 660fd9d38a..6654e70475 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -67,10 +67,11 @@ class TXTMLizer(object):
output.append(self.get_toc())
for item in self.oeb_book.spine:
self.log.debug('Converting %s to TXT...' % item.href)
- stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
- content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
+ content = unicode(etree.tostring(item.data, encoding=unicode))
content = self.remove_newlines(content)
- output += self.dump_text(etree.fromstring(content), stylizer, item)
+ content = etree.fromstring(content)
+ stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
+ output += self.dump_text(content.find(XHTML('body')), stylizer, item)
output += '\n\n\n\n\n\n'
output = u''.join(output)
output = u'\n'.join(l.rstrip() for l in output.splitlines())
@@ -219,11 +220,16 @@ class TXTMLizer(object):
if tag in SPACE_TAGS:
text.append(u' ')
- # Scene breaks.
+ # Hard scene breaks.
if tag == 'hr':
text.append('\n\n* * *\n\n')
- elif style['margin-top']:
- text.append('\n\n' + '\n' * round(style['margin-top']))
+ # Soft scene breaks.
+ try:
+ ems = int(round((float(style.marginTop) / style.fontSize) - 1))
+ if ems:
+ text.append('\n' * ems)
+ except:
+ pass
# Process tags that contain text.
if hasattr(elem, 'text') and elem.text: