mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add markdownml.py. TXT Output: Remove links option to make markdown output cleaner.
This commit is contained in:
parent
04e3ba0e81
commit
98a0970f02
40
src/calibre/ebooks/txt/markdownml.py
Normal file
40
src/calibre/ebooks/txt/markdownml.py
Normal file
@ -0,0 +1,40 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Transform OEB content into Markdown formatted plain text
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.utils.html2text import html2text
|
||||
|
||||
class MarkdownMLizer(object):
|
||||
|
||||
def __init__(self, log):
|
||||
self.log = log
|
||||
|
||||
def extract_content(self, oeb_book, opts):
|
||||
self.log.info('Converting XHTML to Markdown formatted TXT...')
|
||||
self.oeb_book = oeb_book
|
||||
self.opts = opts
|
||||
|
||||
return self.mlize_spine()
|
||||
|
||||
def mlize_spine(self):
|
||||
output = [u'']
|
||||
for item in self.oeb_book.spine:
|
||||
self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
|
||||
html = unicode(etree.tostring(item.data, encoding=unicode))
|
||||
if self.opts.remove_links:
|
||||
html = re.sub(r'<\s*a[^>]*>', '', html)
|
||||
html = re.sub(r'<\s*/\s*a\s*>', '', html)
|
||||
output += html2text(html)
|
||||
output = u''.join(output)
|
||||
|
||||
return output
|
@ -48,6 +48,11 @@ class TXTOutput(OutputFormatPlugin):
|
||||
OptionRecommendation(name='markdown_format',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Produce Markdown formatted text.')),
|
||||
OptionRecommendation(name='remove_links',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Remove links within the document. This is only ' \
|
||||
'useful when paried with the markdown-format option because' \
|
||||
'links are removed with plain text output.')),
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
|
Loading…
x
Reference in New Issue
Block a user