Implement bug #3359: Make markdown processing of text files optional.

This commit is contained in:
John Schember 2009-08-31 21:03:00 -04:00
parent b8166db5cc
commit 13a4379063
5 changed files with 44 additions and 22 deletions

View File

@ -934,7 +934,7 @@ class Manifest(object):
self.oeb.log.debug('Converting', self.href, '...')
from calibre.ebooks.txt.processor import txt_to_markdown
from calibre.ebooks.txt.processor import convert_markdown
title = self.oeb.metadata.title
if title:
@ -942,7 +942,7 @@ class Manifest(object):
else:
title = _('Unknown')
return self._parse_xhtml(txt_to_markdown(data, title))
return self._parse_xhtml(convert_markdown(data, title))
def _parse_css(self, data):

View File

@ -13,8 +13,8 @@ import struct
from calibre.ebooks.compression.palmdoc import decompress_doc
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.txt.processor import opf_writer
from calibre.ebooks.txt.processor import txt_to_markdown
from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
opf_writer
class HeaderRecord(object):
'''
@ -62,7 +62,9 @@ class Reader(FormatReader):
txt += self.decompress_text(i)
self.log.info('Converting text to OEB...')
html = txt_to_markdown(txt, single_line_paras=self.single_line_paras)
if self.single_line_paras:
txt = separate_paragraphs(txt)
html = convert_basic(txt)
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
index.write(html.encode('utf-8'))

View File

@ -12,7 +12,8 @@ import os, struct, zlib
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pdb.ztxt import zTXTError
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
opf_writer
SUPPORTED_VERSION = (1, 40)
@ -77,7 +78,9 @@ class Reader(FormatReader):
txt += self.decompress_text(i)
self.log.info('Converting text to OEB...')
html = txt_to_markdown(txt, single_line_paras=self.single_line_paras)
if self.single_line_paras:
txt = separate_paragraphs(txt)
html = convert_basic(txt)
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
index.write(html.encode('utf-8'))

View File

@ -7,7 +7,8 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.txt.processor import txt_to_markdown
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
separate_paragraphs
class TXTInput(InputFormatPlugin):
@ -21,6 +22,8 @@ class TXTInput(InputFormatPlugin):
help=_('Normally calibre treats blank lines as paragraph markers. '
'With this option it will assume that every line represents '
'a paragraph instead.')),
OptionRecommendation(name='markdown', recommended_value=False,
help=_('Run the text input though the markdown processor.')),
])
def convert(self, stream, options, file_ext, log,
@ -31,12 +34,18 @@ class TXTInput(InputFormatPlugin):
log.debug('Reading text from file...')
txt = stream.read().decode(ienc, 'replace')
if options.single_line_paras:
txt = separate_paragraphs(txt)
if options.markdown:
log.debug('Running text though markdown conversion...')
try:
html = txt_to_markdown(txt, single_line_paras=options.single_line_paras)
html = convert_markdown(txt)
except RuntimeError:
raise ValueError('This txt file has malformed markup, it cannot be'
'converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
else:
html = convert_basic(txt)
from calibre.customize.ui import plugin_for_input_format
html_input = plugin_for_input_format('html')

View File

@ -5,6 +5,7 @@ Read content from txt file.
'''
import os
import re
from calibre.ebooks.markdown import markdown
from calibre.ebooks.metadata.opf2 import OPFCreator
@ -13,18 +14,25 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
def txt_to_markdown(txt, title='', single_line_paras=False):
if single_line_paras:
txt = txt.replace('\r\n', '\n')
txt = txt.replace('\r', '\n')
txt = txt.replace('\n', '\n\n')
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>%s</body></html>'
def convert_basic(txt, title=''):
lines = []
for line in txt.splitlines():
lines.append('<p>%s</p>' % line)
return HTML_TEMPLATE % (title, '\n'.join(lines))
def convert_markdown(txt, title=''):
md = markdown.Markdown(
extensions=['footnotes', 'tables', 'toc'],
safe_mode=False,)
html = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>%s</body></html>' % (title,
md.convert(txt))
return HTML_TEMPLATE % (title, md.convert(txt))
return html
def separate_paragraphs(txt):
txt = txt.replace('\r\n', '\n')
txt = txt.replace('\r', '\n')
txt = re.sub(u'(?<=.)\n(?=.)', u'\n\n', txt)
return txt
def opf_writer(path, opf_name, manifest, spine, mi):
opf = OPFCreator(path, mi)