mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
TXT Input: Add support for markdown extensions
TXT Input: Allow using various markdown extensions for more features when converting markdown formatted txt files. See http://pythonhosted.org/Markdown/extensions/index.html for details. Merge branch 'markdown_extensions' of https://github.com/user-none/calibre
This commit is contained in:
commit
7d26a22a46
@ -9,6 +9,18 @@ import os
|
|||||||
from calibre import _ent_pat, walk, xml_entity_to_unicode
|
from calibre import _ent_pat, walk, xml_entity_to_unicode
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
|
|
||||||
|
MD_EXTENSIONS = {
|
||||||
|
'abbr': _('Abbreviations'),
|
||||||
|
'def_list': _('Definition lists'),
|
||||||
|
'fenced_code': _('Alternative code block syntax'),
|
||||||
|
'footnotes': _('Footnotes'),
|
||||||
|
'headerid': _('Allow ids as part of a header'),
|
||||||
|
'meta': _('Metadata in the document'),
|
||||||
|
'tables': _('Support tables'),
|
||||||
|
'toc': _('Generate a table of contents'),
|
||||||
|
'wikilinks': _('Wiki style links'),
|
||||||
|
}
|
||||||
|
|
||||||
class TXTInput(InputFormatPlugin):
|
class TXTInput(InputFormatPlugin):
|
||||||
|
|
||||||
name = 'TXT Input'
|
name = 'TXT Input'
|
||||||
@ -47,8 +59,12 @@ class TXTInput(InputFormatPlugin):
|
|||||||
OptionRecommendation(name='txt_in_remove_indents', recommended_value=False,
|
OptionRecommendation(name='txt_in_remove_indents', recommended_value=False,
|
||||||
help=_('Normally extra space at the beginning of lines is retained. '
|
help=_('Normally extra space at the beginning of lines is retained. '
|
||||||
'With this option they will be removed.')),
|
'With this option they will be removed.')),
|
||||||
OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
|
OptionRecommendation(name="markdown_extensions", recommended_value='footnotes, tables, toc',
|
||||||
help=_('Do not insert a Table of Contents into the output text.')),
|
help=_('Enable extensions to markdown syntax. Extensions are formatting that is not part '
|
||||||
|
'of the standard markdown format. The extensions enabled by default: %default.\n'
|
||||||
|
'To learn more about markdown extensions, see http://pythonhosted.org/Markdown/extensions/index.html\n'
|
||||||
|
'This should be a comma separated list of extensions to enable:\n') +
|
||||||
|
'\n'.join('* %s: %s' % (k, MD_EXTENSIONS[k]) for k in sorted(MD_EXTENSIONS))),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
@ -178,7 +194,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
if options.formatting_type == 'markdown':
|
if options.formatting_type == 'markdown':
|
||||||
log.debug('Running text through markdown conversion...')
|
log.debug('Running text through markdown conversion...')
|
||||||
try:
|
try:
|
||||||
html = convert_markdown(txt, disable_toc=options.markdown_disable_toc)
|
html = convert_markdown(txt, extensions=[x.strip() for x in options.markdown_extensions.split(',') if x.strip()])
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
raise ValueError('This txt file has malformed markup, it cannot be'
|
raise ValueError('This txt file has malformed markup, it cannot be'
|
||||||
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
||||||
|
@ -40,7 +40,7 @@ def clean_txt(txt):
|
|||||||
txt = re.sub('(?<=.)\s+$', '', txt)
|
txt = re.sub('(?<=.)\s+$', '', txt)
|
||||||
# Remove excessive line breaks.
|
# Remove excessive line breaks.
|
||||||
txt = re.sub('\n{5,}', '\n\n\n\n', txt)
|
txt = re.sub('\n{5,}', '\n\n\n\n', txt)
|
||||||
#remove ASCII invalid chars : 0 to 8 and 11-14 to 24
|
# remove ASCII invalid chars : 0 to 8 and 11-14 to 24
|
||||||
txt = clean_ascii_chars(txt)
|
txt = clean_ascii_chars(txt)
|
||||||
|
|
||||||
return txt
|
return txt
|
||||||
@ -55,14 +55,14 @@ def split_txt(txt, epub_split_size_kb=0):
|
|||||||
to accomidate the EPUB file size limitation
|
to accomidate the EPUB file size limitation
|
||||||
and will fail.
|
and will fail.
|
||||||
'''
|
'''
|
||||||
#Takes care if there is no point to split
|
# Takes care if there is no point to split
|
||||||
if epub_split_size_kb > 0:
|
if epub_split_size_kb > 0:
|
||||||
if isinstance(txt, unicode):
|
if isinstance(txt, unicode):
|
||||||
txt = txt.encode('utf-8')
|
txt = txt.encode('utf-8')
|
||||||
length_byte = len(txt)
|
length_byte = len(txt)
|
||||||
#Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
|
# Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
|
||||||
chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024) ) + 2 ))
|
chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024)) + 2))
|
||||||
#if there are chunks with a superior size then go and break
|
# if there are chunks with a superior size then go and break
|
||||||
if (len(filter(lambda x: len(x) > chunk_size, txt.split('\n\n')))) :
|
if (len(filter(lambda x: len(x) > chunk_size, txt.split('\n\n')))) :
|
||||||
txt = '\n\n'.join([split_string_separator(line, chunk_size)
|
txt = '\n\n'.join([split_string_separator(line, chunk_size)
|
||||||
for line in txt.split('\n\n')])
|
for line in txt.split('\n\n')])
|
||||||
@ -95,11 +95,10 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
|
|||||||
|
|
||||||
return HTML_TEMPLATE % (title, u'\n'.join(lines))
|
return HTML_TEMPLATE % (title, u'\n'.join(lines))
|
||||||
|
|
||||||
def convert_markdown(txt, title='', disable_toc=False):
|
def convert_markdown(txt, title='', extensions=('footnotes', 'tables', 'toc')):
|
||||||
|
from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
|
||||||
from calibre.ebooks.markdown import markdown
|
from calibre.ebooks.markdown import markdown
|
||||||
extensions=['footnotes', 'tables']
|
extensions = [x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
|
||||||
if not disable_toc:
|
|
||||||
extensions.append('toc')
|
|
||||||
md = markdown.Markdown(
|
md = markdown.Markdown(
|
||||||
extensions,
|
extensions,
|
||||||
safe_mode=False)
|
safe_mode=False)
|
||||||
|
@ -4,8 +4,11 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from PyQt4.Qt import QListWidgetItem, Qt
|
||||||
|
|
||||||
from calibre.gui2.convert.txt_input_ui import Ui_Form
|
from calibre.gui2.convert.txt_input_ui import Ui_Form
|
||||||
from calibre.gui2.convert import Widget
|
from calibre.gui2.convert import Widget
|
||||||
|
from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
|
||||||
|
|
||||||
class PluginWidget(Widget, Ui_Form):
|
class PluginWidget(Widget, Ui_Form):
|
||||||
|
|
||||||
@ -16,11 +19,42 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
|
|
||||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
Widget.__init__(self, parent,
|
Widget.__init__(self, parent,
|
||||||
['paragraph_type', 'formatting_type', 'markdown_disable_toc',
|
['paragraph_type', 'formatting_type', 'markdown_extensions',
|
||||||
'preserve_spaces', 'txt_in_remove_indents'])
|
'preserve_spaces', 'txt_in_remove_indents'])
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
for x in get_option('paragraph_type').option.choices:
|
for x in get_option('paragraph_type').option.choices:
|
||||||
self.opt_paragraph_type.addItem(x)
|
self.opt_paragraph_type.addItem(x)
|
||||||
for x in get_option('formatting_type').option.choices:
|
for x in get_option('formatting_type').option.choices:
|
||||||
self.opt_formatting_type.addItem(x)
|
self.opt_formatting_type.addItem(x)
|
||||||
|
self.md_map = {}
|
||||||
|
for name, text in MD_EXTENSIONS.iteritems():
|
||||||
|
i = QListWidgetItem('%s - %s' % (name, text), self.opt_markdown_extensions)
|
||||||
|
i.setFlags(Qt.ItemIsUserCheckable | Qt.ItemIsEnabled)
|
||||||
|
i.setData(Qt.UserRole, name)
|
||||||
|
self.md_map[name] = i
|
||||||
|
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
|
||||||
|
def setup_widget_help(self, g):
|
||||||
|
g._help = _('Specify which markdown extensions to enable')
|
||||||
|
return Widget.setup_widget_help(self, g)
|
||||||
|
|
||||||
|
def set_value_handler(self, g, val):
|
||||||
|
if g is self.opt_markdown_extensions:
|
||||||
|
for i in self.md_map.itervalues():
|
||||||
|
i.setCheckState(Qt.Unchecked)
|
||||||
|
for x in val.split(','):
|
||||||
|
x = x.strip()
|
||||||
|
if x in self.md_map:
|
||||||
|
self.md_map[x].setCheckState(Qt.Checked)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get_value_handler(self, g):
|
||||||
|
if g is not self.opt_markdown_extensions:
|
||||||
|
return Widget.get_value_handler(self, g)
|
||||||
|
return ', '.join(unicode(i.data(Qt.UserRole).toString()) for i in self.md_map.itervalues() if i.checkState())
|
||||||
|
|
||||||
|
def connect_gui_obj_handler(self, g, f):
|
||||||
|
if g is not self.opt_markdown_extensions:
|
||||||
|
raise NotImplementedError()
|
||||||
|
g.itemChanged.connect(lambda item: f())
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
<rect>
|
<rect>
|
||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>518</width>
|
<width>588</width>
|
||||||
<height>353</height>
|
<height>378</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowTitle">
|
<property name="windowTitle">
|
||||||
@ -93,7 +93,7 @@
|
|||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QGroupBox" name="groupBox">
|
<widget class="QGroupBox" name="markdown_box">
|
||||||
<property name="title">
|
<property name="title">
|
||||||
<string>Markdown</string>
|
<string>Markdown</string>
|
||||||
</property>
|
</property>
|
||||||
@ -112,28 +112,28 @@
|
|||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QCheckBox" name="opt_markdown_disable_toc">
|
<widget class="QLabel" name="label_4">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Do not insert Table of Contents into output text when using markdown</string>
|
<string>You can optionally enable various extensions to the base markdown syntax, below.</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QListWidget" name="opt_markdown_extensions"/>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QLabel" name="label_5">
|
||||||
|
<property name="text">
|
||||||
|
<string>More information on <a href="http://pythonhosted.org/Markdown/extensions/index.html">markdown extensions</a></string>
|
||||||
|
</property>
|
||||||
|
<property name="openExternalLinks">
|
||||||
|
<bool>true</bool>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
|
||||||
<spacer name="verticalSpacer">
|
|
||||||
<property name="orientation">
|
|
||||||
<enum>Qt::Vertical</enum>
|
|
||||||
</property>
|
|
||||||
<property name="sizeHint" stdset="0">
|
|
||||||
<size>
|
|
||||||
<width>20</width>
|
|
||||||
<height>213</height>
|
|
||||||
</size>
|
|
||||||
</property>
|
|
||||||
</spacer>
|
|
||||||
</item>
|
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<resources/>
|
<resources/>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user