diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py new file mode 100644 index 0000000000..2ea3e7dafe --- /dev/null +++ b/src/calibre/ebooks/txt/markdownml.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +''' +Transform OEB content into Markdown formatted plain text +''' + +import re + +from lxml import etree + +from calibre.utils.html2text import html2text + +class MarkdownMLizer(object): + + def __init__(self, log): + self.log = log + + def extract_content(self, oeb_book, opts): + self.log.info('Converting XHTML to Markdown formatted TXT...') + self.oeb_book = oeb_book + self.opts = opts + + return self.mlize_spine() + + def mlize_spine(self): + output = [u''] + for item in self.oeb_book.spine: + self.log.debug('Converting %s to Markdown formatted TXT...' % item.href) + html = unicode(etree.tostring(item.data, encoding=unicode)) + if self.opts.remove_links: + html = re.sub(r'<\s*a[^>]*>', '', html) + html = re.sub(r'<\s*/\s*a\s*>', '', html) + output += html2text(html) + output = u''.join(output) + + return output diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py index 3c0d475460..a6f52f92ca 100644 --- a/src/calibre/ebooks/txt/output.py +++ b/src/calibre/ebooks/txt/output.py @@ -48,6 +48,11 @@ class TXTOutput(OutputFormatPlugin): OptionRecommendation(name='markdown_format', recommended_value=False, level=OptionRecommendation.LOW, help=_('Produce Markdown formatted text.')), + OptionRecommendation(name='remove_links', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Remove links within the document. This is only ' \ + 'useful when paried with the markdown-format option because' \ + 'links are removed with plain text output.')), ]) def convert(self, oeb_book, output_path, input_plugin, opts, log):