mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
TXT Output: Markdown output, ensure separation between sections.
This commit is contained in:
parent
d744fb698b
commit
8032890d0f
@ -28,16 +28,36 @@ class MarkdownMLizer(object):
|
|||||||
|
|
||||||
def mlize_spine(self):
|
def mlize_spine(self):
|
||||||
output = [u'']
|
output = [u'']
|
||||||
|
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
|
self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
|
||||||
|
|
||||||
html = unicode(etree.tostring(item.data, encoding=unicode))
|
html = unicode(etree.tostring(item.data, encoding=unicode))
|
||||||
|
|
||||||
if not self.opts.keep_links:
|
if not self.opts.keep_links:
|
||||||
html = re.sub(r'<\s*a[^>]*>', '', html)
|
html = re.sub(r'<\s*a[^>]*>', '', html)
|
||||||
html = re.sub(r'<\s*/\s*a\s*>', '', html)
|
html = re.sub(r'<\s*/\s*a\s*>', '', html)
|
||||||
if not self.opts.keep_image_references:
|
if not self.opts.keep_image_references:
|
||||||
html = re.sub(r'<\s*img[^>]*>', '', html)
|
html = re.sub(r'<\s*img[^>]*>', '', html)
|
||||||
html = re.sub(r'<\s*img\s*>', '', html)
|
html = re.sub(r'<\s*img\s*>', '', html)
|
||||||
output += html2text(html)
|
|
||||||
|
text = html2text(html)
|
||||||
|
|
||||||
|
# Ensure the section ends with at least two new line characters.
|
||||||
|
# This is to prevent the last paragraph from a section being
|
||||||
|
# combined into the fist paragraph of the next.
|
||||||
|
end_chars = text[-4:]
|
||||||
|
# Convert all newlines to \n
|
||||||
|
end_chars = end_chars.replace('\r\n', '\n')
|
||||||
|
end_chars = end_chars.replace('\r', '\n')
|
||||||
|
end_chars = end_chars[-2:]
|
||||||
|
if not end_chars[1] == '\n':
|
||||||
|
text += '\n\n'
|
||||||
|
if end_chars[1] == '\n' and not end_chars[0] == '\n':
|
||||||
|
text += '\n'
|
||||||
|
|
||||||
|
output += text
|
||||||
|
|
||||||
output = u''.join(output)
|
output = u''.join(output)
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
Loading…
x
Reference in New Issue
Block a user