Add markdownml.py. TXT Output: Remove links option to make markdown output cleaner.

This commit is contained in:
John Schember 2010-12-01 20:33:52 -05:00
parent 04e3ba0e81
commit 98a0970f02
2 changed files with 45 additions and 0 deletions

View File

@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Transform OEB content into Markdown formatted plain text
'''
import re
from lxml import etree
from calibre.utils.html2text import html2text
class MarkdownMLizer(object):
def __init__(self, log):
self.log = log
def extract_content(self, oeb_book, opts):
self.log.info('Converting XHTML to Markdown formatted TXT...')
self.oeb_book = oeb_book
self.opts = opts
return self.mlize_spine()
def mlize_spine(self):
output = [u'']
for item in self.oeb_book.spine:
self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
html = unicode(etree.tostring(item.data, encoding=unicode))
if self.opts.remove_links:
html = re.sub(r'<\s*a[^>]*>', '', html)
html = re.sub(r'<\s*/\s*a\s*>', '', html)
output += html2text(html)
output = u''.join(output)
return output

View File

@ -48,6 +48,11 @@ class TXTOutput(OutputFormatPlugin):
OptionRecommendation(name='markdown_format', OptionRecommendation(name='markdown_format',
recommended_value=False, level=OptionRecommendation.LOW, recommended_value=False, level=OptionRecommendation.LOW,
help=_('Produce Markdown formatted text.')), help=_('Produce Markdown formatted text.')),
OptionRecommendation(name='remove_links',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Remove links within the document. This is only ' \
'useful when paried with the markdown-format option because' \
'links are removed with plain text output.')),
]) ])
def convert(self, oeb_book, output_path, input_plugin, opts, log): def convert(self, oeb_book, output_path, input_plugin, opts, log):