TXT Input: Add support for markdown extensions

TXT Input: Allow using various markdown extensions for more features
when converting markdown formatted txt files. See
http://pythonhosted.org/Markdown/extensions/index.html for details.

Merge branch 'markdown_extensions' of https://github.com/user-none/calibre
This commit is contained in:
Kovid Goyal 2013-08-05 10:12:59 +05:30
commit 7d26a22a46
4 changed files with 80 additions and 31 deletions

View File

@ -9,6 +9,18 @@ import os
from calibre import _ent_pat, walk, xml_entity_to_unicode from calibre import _ent_pat, walk, xml_entity_to_unicode
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
MD_EXTENSIONS = {
'abbr': _('Abbreviations'),
'def_list': _('Definition lists'),
'fenced_code': _('Alternative code block syntax'),
'footnotes': _('Footnotes'),
'headerid': _('Allow ids as part of a header'),
'meta': _('Metadata in the document'),
'tables': _('Support tables'),
'toc': _('Generate a table of contents'),
'wikilinks': _('Wiki style links'),
}
class TXTInput(InputFormatPlugin): class TXTInput(InputFormatPlugin):
name = 'TXT Input' name = 'TXT Input'
@ -47,8 +59,12 @@ class TXTInput(InputFormatPlugin):
OptionRecommendation(name='txt_in_remove_indents', recommended_value=False, OptionRecommendation(name='txt_in_remove_indents', recommended_value=False,
help=_('Normally extra space at the beginning of lines is retained. ' help=_('Normally extra space at the beginning of lines is retained. '
'With this option they will be removed.')), 'With this option they will be removed.')),
OptionRecommendation(name="markdown_disable_toc", recommended_value=False, OptionRecommendation(name="markdown_extensions", recommended_value='footnotes, tables, toc',
help=_('Do not insert a Table of Contents into the output text.')), help=_('Enable extensions to markdown syntax. Extensions are formatting that is not part '
'of the standard markdown format. The extensions enabled by default: %default.\n'
'To learn more about markdown extensions, see http://pythonhosted.org/Markdown/extensions/index.html\n'
'This should be a comma separated list of extensions to enable:\n') +
'\n'.join('* %s: %s' % (k, MD_EXTENSIONS[k]) for k in sorted(MD_EXTENSIONS))),
]) ])
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
@ -178,7 +194,7 @@ class TXTInput(InputFormatPlugin):
if options.formatting_type == 'markdown': if options.formatting_type == 'markdown':
log.debug('Running text through markdown conversion...') log.debug('Running text through markdown conversion...')
try: try:
html = convert_markdown(txt, disable_toc=options.markdown_disable_toc) html = convert_markdown(txt, extensions=[x.strip() for x in options.markdown_extensions.split(',') if x.strip()])
except RuntimeError: except RuntimeError:
raise ValueError('This txt file has malformed markup, it cannot be' raise ValueError('This txt file has malformed markup, it cannot be'
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax') ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')

View File

@ -40,7 +40,7 @@ def clean_txt(txt):
txt = re.sub('(?<=.)\s+$', '', txt) txt = re.sub('(?<=.)\s+$', '', txt)
# Remove excessive line breaks. # Remove excessive line breaks.
txt = re.sub('\n{5,}', '\n\n\n\n', txt) txt = re.sub('\n{5,}', '\n\n\n\n', txt)
#remove ASCII invalid chars : 0 to 8 and 11-14 to 24 # remove ASCII invalid chars : 0 to 8 and 11-14 to 24
txt = clean_ascii_chars(txt) txt = clean_ascii_chars(txt)
return txt return txt
@ -55,14 +55,14 @@ def split_txt(txt, epub_split_size_kb=0):
to accomidate the EPUB file size limitation to accomidate the EPUB file size limitation
and will fail. and will fail.
''' '''
#Takes care if there is no point to split # Takes care if there is no point to split
if epub_split_size_kb > 0: if epub_split_size_kb > 0:
if isinstance(txt, unicode): if isinstance(txt, unicode):
txt = txt.encode('utf-8') txt = txt.encode('utf-8')
length_byte = len(txt) length_byte = len(txt)
#Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin) # Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024) ) + 2 )) chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024)) + 2))
#if there are chunks with a superior size then go and break # if there are chunks with a superior size then go and break
if (len(filter(lambda x: len(x) > chunk_size, txt.split('\n\n')))) : if (len(filter(lambda x: len(x) > chunk_size, txt.split('\n\n')))) :
txt = '\n\n'.join([split_string_separator(line, chunk_size) txt = '\n\n'.join([split_string_separator(line, chunk_size)
for line in txt.split('\n\n')]) for line in txt.split('\n\n')])
@ -95,11 +95,10 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
return HTML_TEMPLATE % (title, u'\n'.join(lines)) return HTML_TEMPLATE % (title, u'\n'.join(lines))
def convert_markdown(txt, title='', disable_toc=False): def convert_markdown(txt, title='', extensions=('footnotes', 'tables', 'toc')):
from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
from calibre.ebooks.markdown import markdown from calibre.ebooks.markdown import markdown
extensions=['footnotes', 'tables'] extensions = [x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
if not disable_toc:
extensions.append('toc')
md = markdown.Markdown( md = markdown.Markdown(
extensions, extensions,
safe_mode=False) safe_mode=False)

View File

@ -4,8 +4,11 @@ __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from PyQt4.Qt import QListWidgetItem, Qt
from calibre.gui2.convert.txt_input_ui import Ui_Form from calibre.gui2.convert.txt_input_ui import Ui_Form
from calibre.gui2.convert import Widget from calibre.gui2.convert import Widget
from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
class PluginWidget(Widget, Ui_Form): class PluginWidget(Widget, Ui_Form):
@ -16,11 +19,42 @@ class PluginWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None): def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, Widget.__init__(self, parent,
['paragraph_type', 'formatting_type', 'markdown_disable_toc', ['paragraph_type', 'formatting_type', 'markdown_extensions',
'preserve_spaces', 'txt_in_remove_indents']) 'preserve_spaces', 'txt_in_remove_indents'])
self.db, self.book_id = db, book_id self.db, self.book_id = db, book_id
for x in get_option('paragraph_type').option.choices: for x in get_option('paragraph_type').option.choices:
self.opt_paragraph_type.addItem(x) self.opt_paragraph_type.addItem(x)
for x in get_option('formatting_type').option.choices: for x in get_option('formatting_type').option.choices:
self.opt_formatting_type.addItem(x) self.opt_formatting_type.addItem(x)
self.md_map = {}
for name, text in MD_EXTENSIONS.iteritems():
i = QListWidgetItem('%s - %s' % (name, text), self.opt_markdown_extensions)
i.setFlags(Qt.ItemIsUserCheckable | Qt.ItemIsEnabled)
i.setData(Qt.UserRole, name)
self.md_map[name] = i
self.initialize_options(get_option, get_help, db, book_id) self.initialize_options(get_option, get_help, db, book_id)
def setup_widget_help(self, g):
g._help = _('Specify which markdown extensions to enable')
return Widget.setup_widget_help(self, g)
def set_value_handler(self, g, val):
if g is self.opt_markdown_extensions:
for i in self.md_map.itervalues():
i.setCheckState(Qt.Unchecked)
for x in val.split(','):
x = x.strip()
if x in self.md_map:
self.md_map[x].setCheckState(Qt.Checked)
return True
def get_value_handler(self, g):
if g is not self.opt_markdown_extensions:
return Widget.get_value_handler(self, g)
return ', '.join(unicode(i.data(Qt.UserRole).toString()) for i in self.md_map.itervalues() if i.checkState())
def connect_gui_obj_handler(self, g, f):
if g is not self.opt_markdown_extensions:
raise NotImplementedError()
g.itemChanged.connect(lambda item: f())

View File

@ -6,8 +6,8 @@
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>518</width> <width>588</width>
<height>353</height> <height>378</height>
</rect> </rect>
</property> </property>
<property name="windowTitle"> <property name="windowTitle">
@ -93,7 +93,7 @@
</widget> </widget>
</item> </item>
<item> <item>
<widget class="QGroupBox" name="groupBox"> <widget class="QGroupBox" name="markdown_box">
<property name="title"> <property name="title">
<string>Markdown</string> <string>Markdown</string>
</property> </property>
@ -112,28 +112,28 @@
</widget> </widget>
</item> </item>
<item> <item>
<widget class="QCheckBox" name="opt_markdown_disable_toc"> <widget class="QLabel" name="label_4">
<property name="text"> <property name="text">
<string>Do not insert Table of Contents into output text when using markdown</string> <string>You can optionally enable various extensions to the base markdown syntax, below.</string>
</property>
</widget>
</item>
<item>
<widget class="QListWidget" name="opt_markdown_extensions"/>
</item>
<item>
<widget class="QLabel" name="label_5">
<property name="text">
<string>More information on &lt;a href=&quot;http://pythonhosted.org/Markdown/extensions/index.html&quot;&gt;markdown extensions&lt;/a&gt;</string>
</property>
<property name="openExternalLinks">
<bool>true</bool>
</property> </property>
</widget> </widget>
</item> </item>
</layout> </layout>
</widget> </widget>
</item> </item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>213</height>
</size>
</property>
</spacer>
</item>
</layout> </layout>
</widget> </widget>
<resources/> <resources/>