diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 515bdee9df..dedfe963f6 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -71,19 +71,28 @@ class FB2MLizer(object): return u'' + output def clean_text(self, text): + # Condense empty paragraphs into a line break. + text = re.sub(r'(?miu)(

\s*

\s*){3,}', '

', text) + # Remove empty paragraphs. text = re.sub(r'(?miu)

\s*

', '', text) + # Clean up pargraph endings. text = re.sub(r'(?miu)\s*

', '

', text) + # Put paragraphs following a paragraph on a separate line. text = re.sub(r'(?miu)

\s*

', '

\n\n

', text) + # Remove empty title elements. text = re.sub(r'(?miu)\s*', '', text) text = re.sub(r'(?miu)\s+', '', text) + # Remove empty sections. text = re.sub(r'(?miu)

\s*
', '', text) + # Clean up sections start and ends. text = re.sub(r'(?miu)\s*', '\n', text) text = re.sub(r'(?miu)\s*', '\n\n', text) text = re.sub(r'(?miu)\s*
', '\n
', text) text = re.sub(r'(?miu)
\s*', '
\n', text) - text = re.sub(r'(?miu)
', '
\n\n
', text) + # Put sectnions followed by sections on a separate line. + text = re.sub(r'(?miu)
\s*
', '
\n\n
', text) if self.opts.insert_blank_line: text = re.sub(r'(?miu)

', '

', text) @@ -338,6 +347,11 @@ class FB2MLizer(object): tags = [] # First tag in tree tag = barename(elem_tree.tag) + # Number of blank lines above tag + try: + ems = int(round((float(style.marginTop) / style.fontSize) - 1)) + except: + ems = 0 # Convert TOC entries to s and add <section>s if self.opts.sectionize == 'toc': @@ -370,7 +384,9 @@ class FB2MLizer(object): fb2_out.append('<section>') self.section_level += 1 - # Process the XHTML tag if it needs to be converted to an FB2 tag. + # Process the XHTML tag and styles. Converted to an FB2 tag. + # Use individual if statement not if else. There can be + # only one XHTML tag but it can have multiple styles. if tag == 'img': if elem_tree.attrib.get('src', None): # Only write the image tag if it is in the manifest. @@ -381,7 +397,11 @@ class FB2MLizer(object): fb2_out += p_txt tags += p_tag fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])]) - elif tag == 'br': + if tag in ('br', 'hr') or ems: + if not ems: + multiplier = 1 + else: + multiplier = ems if self.in_p: closed_tags = [] open_tags = tag_stack+tags @@ -391,52 +411,38 @@ class FB2MLizer(object): closed_tags.append(t) if t == 'p': break - fb2_out.append('<empty-line />') + fb2_out.append('<empty-line />' * multiplier) closed_tags.reverse() for t in closed_tags: fb2_out.append('<%s>' % t) else: - fb2_out.append('<empty-line />') - elif tag in ('div', 'li', 'p'): + fb2_out.append('<empty-line />' * multiplier) + if tag in ('div', 'li', 'p'): p_text, added_p = self.close_open_p(tag_stack+tags) fb2_out += p_text if added_p: tags.append('p') - elif tag == 'b': + if tag == 'b' or style['font-weight'] in ('bold', 'bolder'): s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags) fb2_out += s_out tags += s_tags - elif tag == 'i': + if tag == 'i' or style['font-style'] == 'italic': s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags) fb2_out += s_out tags += s_tags - elif tag in ('del', 'strike'): + if tag in ('del', 'strike') or style['text-decoration'] == 'line-through': s_out, s_tags = self.handle_simple_tag('strikethrough', tag_stack+tags) fb2_out += s_out tags += s_tags - elif tag == 'sub': + if tag == 'sub': s_out, s_tags = self.handle_simple_tag('sub', tag_stack+tags) fb2_out += s_out tags += s_tags - elif tag == 'sup': + if tag == 'sup': s_out, s_tags = self.handle_simple_tag('sup', tag_stack+tags) fb2_out += s_out tags += s_tags - # Processes style information. - if style['font-style'] == 'italic': - s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags) - fb2_out += s_out - tags += s_tags - elif style['font-weight'] in ('bold', 'bolder'): - s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags) - fb2_out += s_out - tags += s_tags - elif style['text-decoration'] == 'line-through': - s_out, s_tags = self.handle_simple_tag('strikethrough', tag_stack+tags) - fb2_out += s_out - tags += s_tags - # Process element text. if hasattr(elem_tree, 'text') and elem_tree.text: if not self.in_p: diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index 40b82514c1..abad5afcb3 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -633,7 +633,7 @@ class Style(object): def lineHeight(self): if self._lineHeight is None: result = None - parent = self._getparent() + #parent = self._getparent() if 'line-height' in self._style: lineh = self._style['line-height'] if lineh == 'normal': @@ -642,9 +642,9 @@ class Style(object): result = float(lineh) * self.fontSize except ValueError: result = self._unit_convert(lineh, base=self.fontSize) - elif parent is not None: - # TODO: proper inheritance - result = parent.lineHeight + #elif parent is not None: + # # TODO: proper inheritance + # result = parent.lineHeight else: result = 1.2 * self.fontSize self._lineHeight = result diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py index 660fd9d38a..6654e70475 100644 --- a/src/calibre/ebooks/txt/txtml.py +++ b/src/calibre/ebooks/txt/txtml.py @@ -67,10 +67,11 @@ class TXTMLizer(object): output.append(self.get_toc()) for item in self.oeb_book.spine: self.log.debug('Converting %s to TXT...' % item.href) - stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) - content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode)) + content = unicode(etree.tostring(item.data, encoding=unicode)) content = self.remove_newlines(content) - output += self.dump_text(etree.fromstring(content), stylizer, item) + content = etree.fromstring(content) + stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile) + output += self.dump_text(content.find(XHTML('body')), stylizer, item) output += '\n\n\n\n\n\n' output = u''.join(output) output = u'\n'.join(l.rstrip() for l in output.splitlines()) @@ -219,11 +220,16 @@ class TXTMLizer(object): if tag in SPACE_TAGS: text.append(u' ') - # Scene breaks. + # Hard scene breaks. if tag == 'hr': text.append('\n\n* * *\n\n') - elif style['margin-top']: - text.append('\n\n' + '\n' * round(style['margin-top'])) + # Soft scene breaks. + try: + ems = int(round((float(style.marginTop) / style.fontSize) - 1)) + if ems: + text.append('\n' * ems) + except: + pass # Process tags that contain text. if hasattr(elem, 'text') and elem.text: