Implement bug #3359: Make markdown processing of text files optional.

2025-07-09 03:04:10 -04:00 · 2009-08-31 21:03:00 -04:00 · 2009-08-31 21:03:00 -04:00 · 13a4379063
commit 13a4379063
parent b8166db5cc
5 changed files with 44 additions and 22 deletions
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -934,7 +934,7 @@ class Manifest(object):

            self.oeb.log.debug('Converting', self.href, '...')

-            from calibre.ebooks.txt.processor import txt_to_markdown
+            from calibre.ebooks.txt.processor import convert_markdown

            title = self.oeb.metadata.title
            if title:
@ -942,7 +942,7 @@ class Manifest(object):
            else:
                title = _('Unknown')

-            return self._parse_xhtml(txt_to_markdown(data, title))
+            return self._parse_xhtml(convert_markdown(data, title))


        def _parse_css(self, data):
--- a/src/calibre/ebooks/pdb/palmdoc/reader.py
+++ b/src/calibre/ebooks/pdb/palmdoc/reader.py
@ -13,8 +13,8 @@ import struct

 from calibre.ebooks.compression.palmdoc import decompress_doc
 from calibre.ebooks.pdb.formatreader import FormatReader
-from calibre.ebooks.txt.processor import opf_writer
-from calibre.ebooks.txt.processor import txt_to_markdown
+from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
+    opf_writer

 class HeaderRecord(object):
    '''
@ -62,7 +62,9 @@ class Reader(FormatReader):
            txt += self.decompress_text(i)

        self.log.info('Converting text to OEB...')
-        html = txt_to_markdown(txt, single_line_paras=self.single_line_paras)
+        if self.single_line_paras:
+            txt = separate_paragraphs(txt)
+        html = convert_basic(txt)
        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
            index.write(html.encode('utf-8'))

--- a/src/calibre/ebooks/pdb/ztxt/reader.py
+++ b/src/calibre/ebooks/pdb/ztxt/reader.py
@ -12,7 +12,8 @@ import os, struct, zlib

 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ebooks.pdb.ztxt import zTXTError
-from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
+from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
+    opf_writer

 SUPPORTED_VERSION = (1, 40)

@ -77,7 +78,9 @@ class Reader(FormatReader):
            txt += self.decompress_text(i)

        self.log.info('Converting text to OEB...')
-        html = txt_to_markdown(txt, single_line_paras=self.single_line_paras)
+        if self.single_line_paras:
+            txt = separate_paragraphs(txt)
+        html = convert_basic(txt)
        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
            index.write(html.encode('utf-8'))
                        
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -7,7 +7,8 @@ __docformat__ = 'restructuredtext en'
 import os

 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
-from calibre.ebooks.txt.processor import txt_to_markdown
+from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
+    separate_paragraphs

 class TXTInput(InputFormatPlugin):

@ -21,6 +22,8 @@ class TXTInput(InputFormatPlugin):
            help=_('Normally calibre treats blank lines as paragraph markers. '
                'With this option it will assume that every line represents '
                'a paragraph instead.')),
+        OptionRecommendation(name='markdown', recommended_value=False,
+            help=_('Run the text input though the markdown processor.')),
    ])

    def convert(self, stream, options, file_ext, log,
@ -31,12 +34,18 @@ class TXTInput(InputFormatPlugin):
        log.debug('Reading text from file...')
        txt = stream.read().decode(ienc, 'replace')

+        if options.single_line_paras:
+            txt = separate_paragraphs(txt)
+
+        if options.markdown:
            log.debug('Running text though markdown conversion...')
            try:
-            html = txt_to_markdown(txt, single_line_paras=options.single_line_paras)
+                html = convert_markdown(txt)
            except RuntimeError:
                raise ValueError('This txt file has malformed markup, it cannot be'
                    'converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
+        else:
+            html = convert_basic(txt)

        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -5,6 +5,7 @@ Read content from txt file.
 '''

 import os
+import re

 from calibre.ebooks.markdown import markdown
 from calibre.ebooks.metadata.opf2 import OPFCreator
@ -13,18 +14,25 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-def txt_to_markdown(txt, title='', single_line_paras=False):
-    if single_line_paras:
-        txt = txt.replace('\r\n', '\n')
-        txt = txt.replace('\r', '\n')
-        txt = txt.replace('\n', '\n\n')
+HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>%s</body></html>'
+
+def convert_basic(txt, title=''):
+    lines = []
+    for line in txt.splitlines():
+        lines.append('<p>%s</p>' % line)
+    return HTML_TEMPLATE % (title, '\n'.join(lines))
+
+def convert_markdown(txt, title=''):
    md = markdown.Markdown(
        extensions=['footnotes', 'tables', 'toc'],
        safe_mode=False,)
-    html = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>%s</body></html>' % (title,
-        md.convert(txt))
+    return HTML_TEMPLATE % (title, md.convert(txt))

-    return html
+def separate_paragraphs(txt):
+    txt = txt.replace('\r\n', '\n')
+    txt = txt.replace('\r', '\n')
+    txt = re.sub(u'(?<=.)\n(?=.)', u'\n\n', txt)
+    return txt

 def opf_writer(path, opf_name, manifest, spine, mi):
    opf = OPFCreator(path, mi)