diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 3f9e6a4d4a..2e06fffe4e 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -934,7 +934,7 @@ class Manifest(object): self.oeb.log.debug('Converting', self.href, '...') - from calibre.ebooks.txt.processor import txt_to_markdown + from calibre.ebooks.txt.processor import convert_markdown title = self.oeb.metadata.title if title: @@ -942,7 +942,7 @@ class Manifest(object): else: title = _('Unknown') - return self._parse_xhtml(txt_to_markdown(data, title)) + return self._parse_xhtml(convert_markdown(data, title)) def _parse_css(self, data): diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py index e1935db566..8992382597 100644 --- a/src/calibre/ebooks/pdb/palmdoc/reader.py +++ b/src/calibre/ebooks/pdb/palmdoc/reader.py @@ -13,8 +13,8 @@ import struct from calibre.ebooks.compression.palmdoc import decompress_doc from calibre.ebooks.pdb.formatreader import FormatReader -from calibre.ebooks.txt.processor import opf_writer -from calibre.ebooks.txt.processor import txt_to_markdown +from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \ + opf_writer class HeaderRecord(object): ''' @@ -62,7 +62,9 @@ class Reader(FormatReader): txt += self.decompress_text(i) self.log.info('Converting text to OEB...') - html = txt_to_markdown(txt, single_line_paras=self.single_line_paras) + if self.single_line_paras: + txt = separate_paragraphs(txt) + html = convert_basic(txt) with open(os.path.join(output_dir, 'index.html'), 'wb') as index: index.write(html.encode('utf-8')) diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py index 86c5abfe82..664f498bee 100644 --- a/src/calibre/ebooks/pdb/ztxt/reader.py +++ b/src/calibre/ebooks/pdb/ztxt/reader.py @@ -12,7 +12,8 @@ import os, struct, zlib from calibre.ebooks.pdb.formatreader import FormatReader from calibre.ebooks.pdb.ztxt import zTXTError -from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer +from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \ + opf_writer SUPPORTED_VERSION = (1, 40) @@ -77,7 +78,9 @@ class Reader(FormatReader): txt += self.decompress_text(i) self.log.info('Converting text to OEB...') - html = txt_to_markdown(txt, single_line_paras=self.single_line_paras) + if self.single_line_paras: + txt = separate_paragraphs(txt) + html = convert_basic(txt) with open(os.path.join(output_dir, 'index.html'), 'wb') as index: index.write(html.encode('utf-8')) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 5d84a1bde1..2b0245c98b 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -7,7 +7,8 @@ __docformat__ = 'restructuredtext en' import os from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation -from calibre.ebooks.txt.processor import txt_to_markdown +from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ + separate_paragraphs class TXTInput(InputFormatPlugin): @@ -21,6 +22,8 @@ class TXTInput(InputFormatPlugin): help=_('Normally calibre treats blank lines as paragraph markers. ' 'With this option it will assume that every line represents ' 'a paragraph instead.')), + OptionRecommendation(name='markdown', recommended_value=False, + help=_('Run the text input though the markdown processor.')), ]) def convert(self, stream, options, file_ext, log, @@ -31,12 +34,18 @@ class TXTInput(InputFormatPlugin): log.debug('Reading text from file...') txt = stream.read().decode(ienc, 'replace') - log.debug('Running text though markdown conversion...') - try: - html = txt_to_markdown(txt, single_line_paras=options.single_line_paras) - except RuntimeError: - raise ValueError('This txt file has malformed markup, it cannot be' - 'converted by calibre. See http://daringfireball.net/projects/markdown/syntax') + if options.single_line_paras: + txt = separate_paragraphs(txt) + + if options.markdown: + log.debug('Running text though markdown conversion...') + try: + html = convert_markdown(txt) + except RuntimeError: + raise ValueError('This txt file has malformed markup, it cannot be' + 'converted by calibre. See http://daringfireball.net/projects/markdown/syntax') + else: + html = convert_basic(txt) from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 3005d633b8..94df216616 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -5,6 +5,7 @@ Read content from txt file. ''' import os +import re from calibre.ebooks.markdown import markdown from calibre.ebooks.metadata.opf2 import OPFCreator @@ -13,18 +14,25 @@ __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' -def txt_to_markdown(txt, title='', single_line_paras=False): - if single_line_paras: - txt = txt.replace('\r\n', '\n') - txt = txt.replace('\r', '\n') - txt = txt.replace('\n', '\n\n') +HTML_TEMPLATE = u'%s%s' + +def convert_basic(txt, title=''): + lines = [] + for line in txt.splitlines(): + lines.append('

%s

' % line) + return HTML_TEMPLATE % (title, '\n'.join(lines)) + +def convert_markdown(txt, title=''): md = markdown.Markdown( extensions=['footnotes', 'tables', 'toc'], safe_mode=False,) - html = u'%s%s' % (title, - md.convert(txt)) + return HTML_TEMPLATE % (title, md.convert(txt)) - return html +def separate_paragraphs(txt): + txt = txt.replace('\r\n', '\n') + txt = txt.replace('\r', '\n') + txt = re.sub(u'(?<=.)\n(?=.)', u'\n\n', txt) + return txt def opf_writer(path, opf_name, manifest, spine, mi): opf = OPFCreator(path, mi)