TXT Output: Markdown output, ensure separation between sections.

2025-07-09 03:04:10 -04:00 · 2010-12-02 07:37:38 -05:00 · 2010-12-02 07:37:38 -05:00 · 8032890d0f
commit 8032890d0f
parent d744fb698b
1 changed files with 21 additions and 1 deletions
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@ -28,16 +28,36 @@ class MarkdownMLizer(object):
    def mlize_spine(self):
        output = [u'']
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
            html = unicode(etree.tostring(item.data, encoding=unicode))
            if not self.opts.keep_links:
                html = re.sub(r'<\s*a[^>]*>', '', html)
                html = re.sub(r'<\s*/\s*a\s*>', '', html)
            if not self.opts.keep_image_references:
                html = re.sub(r'<\s*img[^>]*>', '', html)
                html = re.sub(r'<\s*img\s*>', '', html)
-            output += html2text(html)
+            
            text = html2text(html)
            # Ensure the section ends with at least two new line characters.
            # This is to prevent the last paragraph from a section being
            # combined into the fist paragraph of the next.
            end_chars = text[-4:]
            # Convert all newlines to \n
            end_chars = end_chars.replace('\r\n', '\n')
            end_chars = end_chars.replace('\r', '\n')
            end_chars = end_chars[-2:]
            if not end_chars[1] == '\n':
                text += '\n\n'
            if end_chars[1] == '\n' and not end_chars[0] == '\n':
                text += '\n'
            output += text
        output = u''.join(output)
        return output