diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py index 2f7960e94c..116561f355 100644 --- a/src/calibre/ebooks/txt/markdownml.py +++ b/src/calibre/ebooks/txt/markdownml.py @@ -28,16 +28,36 @@ class MarkdownMLizer(object): def mlize_spine(self): output = [u''] + for item in self.oeb_book.spine: self.log.debug('Converting %s to Markdown formatted TXT...' % item.href) + html = unicode(etree.tostring(item.data, encoding=unicode)) + if not self.opts.keep_links: html = re.sub(r'<\s*a[^>]*>', '', html) html = re.sub(r'<\s*/\s*a\s*>', '', html) if not self.opts.keep_image_references: html = re.sub(r'<\s*img[^>]*>', '', html) html = re.sub(r'<\s*img\s*>', '', html) - output += html2text(html) + + text = html2text(html) + + # Ensure the section ends with at least two new line characters. + # This is to prevent the last paragraph from a section being + # combined into the fist paragraph of the next. + end_chars = text[-4:] + # Convert all newlines to \n + end_chars = end_chars.replace('\r\n', '\n') + end_chars = end_chars.replace('\r', '\n') + end_chars = end_chars[-2:] + if not end_chars[1] == '\n': + text += '\n\n' + if end_chars[1] == '\n' and not end_chars[0] == '\n': + text += '\n' + + output += text + output = u''.join(output) return output