diff --git a/src/calibre/ebooks/pdb/input.py b/src/calibre/ebooks/pdb/input.py
index b8b4b93ca1..3688abff3f 100644
--- a/src/calibre/ebooks/pdb/input.py
+++ b/src/calibre/ebooks/pdb/input.py
@@ -19,15 +19,20 @@ class PDBInput(InputFormatPlugin):
file_types = set(['pdb'])
options = set([
- OptionRecommendation(name='paragraph_format', recommended_value='auto',
- choices=['auto', 'block', 'single', 'print', 'markdown'],
- help=_('How calibre splits text into paragraphs.\n'
+ OptionRecommendation(name='paragraph_type', recommended_value='auto',
+ choices=['auto', 'block', 'single', 'print'],
+ help=_('Paragraph structure.\n'
'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
- '* auto: Try to auto detect paragraph format.\n'
+ '* auto: Try to auto detect paragraph type.\n'
'* block: Treat a blank line as a paragraph break.\n'
'* single: Assume every line is a paragraph.\n'
'* print: Assume every line starting with 2+ spaces or a tab '
- 'starts a paragraph.\n'
+ 'starts a paragraph.')),
+ OptionRecommendation(name='formatting_type', recommended_value='auto',
+ choices=['auto', 'none', 'markdown'],
+ help=_('Formatting used within the document.'
+ '* auto: Try to auto detect the document formatting.\n'
+ '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
'* markdown: Run the input though the markdown pre-processor. '
'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
OptionRecommendation(name='preserve_spaces', recommended_value=False,
diff --git a/src/calibre/ebooks/tcr/input.py b/src/calibre/ebooks/tcr/input.py
index 5f9554665b..e4118c1c0a 100644
--- a/src/calibre/ebooks/tcr/input.py
+++ b/src/calibre/ebooks/tcr/input.py
@@ -17,15 +17,20 @@ class TCRInput(InputFormatPlugin):
file_types = set(['tcr'])
options = set([
- OptionRecommendation(name='paragraph_format', recommended_value='auto',
- choices=['auto', 'block', 'single', 'print', 'markdown'],
- help=_('How calibre splits text into paragraphs.\n'
+ OptionRecommendation(name='paragraph_type', recommended_value='auto',
+ choices=['auto', 'block', 'single', 'print'],
+ help=_('Paragraph structure.\n'
'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
- '* auto: Try to auto detect paragraph format.\n'
+ '* auto: Try to auto detect paragraph type.\n'
'* block: Treat a blank line as a paragraph break.\n'
'* single: Assume every line is a paragraph.\n'
'* print: Assume every line starting with 2+ spaces or a tab '
- 'starts a paragraph.\n'
+ 'starts a paragraph.')),
+ OptionRecommendation(name='formatting_type', recommended_value='auto',
+ choices=['auto', 'none', 'markdown'],
+ help=_('Formatting used within the document.'
+ '* auto: Try to auto detect the document formatting.\n'
+ '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
'* markdown: Run the input though the markdown pre-processor. '
'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
OptionRecommendation(name='preserve_spaces', recommended_value=False,
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index e68c47e9b3..47e92a45a9 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -10,7 +10,7 @@ from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.chardet import detect
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
- preserve_spaces, detect_paragraph_formatting
+ preserve_spaces, detect_paragraph_type, detect_formatting_type
from calibre import _ent_pat, xml_entity_to_unicode
class TXTInput(InputFormatPlugin):
@@ -21,15 +21,20 @@ class TXTInput(InputFormatPlugin):
file_types = set(['txt'])
options = set([
- OptionRecommendation(name='paragraph_format', recommended_value='auto',
- choices=['auto', 'block', 'single', 'print', 'markdown'],
- help=_('How calibre splits text into paragraphs.\n'
+ OptionRecommendation(name='paragraph_type', recommended_value='auto',
+ choices=['auto', 'block', 'single', 'print'],
+ help=_('Paragraph structure.\n'
'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
- '* auto: Try to auto detect paragraph format.\n'
+ '* auto: Try to auto detect paragraph type.\n'
'* block: Treat a blank line as a paragraph break.\n'
'* single: Assume every line is a paragraph.\n'
'* print: Assume every line starting with 2+ spaces or a tab '
- 'starts a paragraph.\n'
+ 'starts a paragraph.')),
+ OptionRecommendation(name='formatting_type', recommended_value='auto',
+ choices=['auto', 'none', 'markdown'],
+ help=_('Formatting used within the document.'
+ '* auto: Try to auto detect the document formatting.\n'
+ '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
'* markdown: Run the input though the markdown pre-processor. '
'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
OptionRecommendation(name='preserve_spaces', recommended_value=False,
@@ -57,29 +62,16 @@ class TXTInput(InputFormatPlugin):
log.debug('No input encoding specified and could not auto detect using %s' % ienc)
txt = txt.decode(ienc, 'replace')
- # Determine the formatting of the document.
- if options.paragraph_format == 'auto':
- options.paragraph_format = detect_paragraph_formatting(txt)
- if options.paragraph_format == 'unknown':
- log.debug('Could not reliably determine paragraph format using block format')
- options.paragraph_format = 'block'
- else:
- log.debug('Auto detected paragraph format as %s' % options.paragraph_format)
-
- # We don't check for block because the processor assumes block.
- # single and print at transformed to block for processing.
- if options.paragraph_format == 'single':
- txt = separate_paragraphs_single_line(txt)
- elif options.paragraph_format == 'print':
- txt = separate_paragraphs_print_formatted(txt)
-
txt = _ent_pat.sub(xml_entity_to_unicode, txt)
# Preserve spaces will replace multiple spaces to a space
# followed by the entity.
if options.preserve_spaces:
txt = preserve_spaces(txt)
+
+ if options.formatting_type == 'auto':
+ options.formatting_type = detect_formatting_type(txt)
- if options.paragraph_format == 'markdown':
+ if options.formatting_type == 'markdown':
log.debug('Running text though markdown conversion...')
try:
html = convert_markdown(txt, disable_toc=options.markdown_disable_toc)
@@ -87,6 +79,22 @@ class TXTInput(InputFormatPlugin):
raise ValueError('This txt file has malformed markup, it cannot be'
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
else:
+ # Determine the paragraph type of the document.
+ if options.paragraph_type == 'auto':
+ options.paragraph_type = detect_paragraph_type(txt)
+ if options.paragraph_type == 'unknown':
+ log.debug('Could not reliably determine paragraph type using block')
+ options.paragraph_type = 'block'
+ else:
+ log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
+
+ # We don't check for block because the processor assumes block.
+ # single and print at transformed to block for processing.
+ if options.paragraph_type == 'single':
+ txt = separate_paragraphs_single_line(txt)
+ elif options.paragraph_type == 'print':
+ txt = separate_paragraphs_print_formatted(txt)
+
flow_size = getattr(options, 'flow_size', 0)
html = convert_basic(txt, epub_split_size_kb=flow_size)
diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index e1014b0c7b..f6d628e7c5 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -93,7 +93,7 @@ def split_string_separator(txt, size) :
xrange(0, len(txt), size)])
return txt
-def detect_paragraph_formatting(txt):
+def detect_paragraph_type(txt):
'''
Tries to determine the formatting of the document.
@@ -109,6 +109,20 @@ def detect_paragraph_formatting(txt):
txt = txt.replace('\r', '\n')
txt_line_count = len(re.findall('(?mu)^\s*.+$', txt))
+ # Check for print
+ tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt))
+ if tab_line_count / float(txt_line_count) >= .25:
+ return 'print'
+
+ # Check for block
+ empty_line_count = len(re.findall('(?mu)^\s*$', txt))
+ if empty_line_count / float(txt_line_count) >= .25:
+ return 'block'
+
+ # Nothing else matched to assume single.
+ return 'single'
+
+def detect_formatting_type(txt):
# Check for markdown
# Headings
if len(re.findall('(?mu)^#+', txt)) >= 5:
@@ -129,16 +143,4 @@ def detect_paragraph_formatting(txt):
if txt.count('\\'+c) > 10:
return 'markdown'
- # Check for print
- tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt))
- if tab_line_count / float(txt_line_count) >= .25:
- return 'print'
-
- # Check for block
- empty_line_count = len(re.findall('(?mu)^\s*$', txt))
- if empty_line_count / float(txt_line_count) >= .25:
- return 'block'
-
- # Nothing else matched to assume single.
- return 'single'
-
+ return 'none'
diff --git a/src/calibre/gui2/convert/pdb_input.py b/src/calibre/gui2/convert/pdb_input.py
index 655f4025a7..16ff1ff236 100644
--- a/src/calibre/gui2/convert/pdb_input.py
+++ b/src/calibre/gui2/convert/pdb_input.py
@@ -16,8 +16,10 @@ class PluginWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
- ['paragraph_format', 'markdown_disable_toc', 'preserve_spaces'])
+ ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
self.db, self.book_id = db, book_id
- for x in get_option('paragraph_format').option.choices:
- self.opt_paragraph_format.addItem(x)
+ for x in get_option('paragraph_type').option.choices:
+ self.opt_paragraph_type.addItem(x)
+ for x in get_option('formatting_type').option.choices:
+ self.opt_formatting_type.addItem(x)
self.initialize_options(get_option, get_help, db, book_id)
diff --git a/src/calibre/gui2/convert/tcr_input.py b/src/calibre/gui2/convert/tcr_input.py
index 2aa877ce4d..366643ad5b 100644
--- a/src/calibre/gui2/convert/tcr_input.py
+++ b/src/calibre/gui2/convert/tcr_input.py
@@ -16,8 +16,10 @@ class PluginWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
- ['paragraph_format', 'markdown_disable_toc', 'preserve_spaces'])
+ ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
self.db, self.book_id = db, book_id
- for x in get_option('paragraph_format').option.choices:
- self.opt_paragraph_format.addItem(x)
+ for x in get_option('paragraph_type').option.choices:
+ self.opt_paragraph_type.addItem(x)
+ for x in get_option('formatting_type').option.choices:
+ self.opt_formatting_type.addItem(x)
self.initialize_options(get_option, get_help, db, book_id)
diff --git a/src/calibre/gui2/convert/txt_input.py b/src/calibre/gui2/convert/txt_input.py
index 99d04fe2f4..62672cc0f9 100644
--- a/src/calibre/gui2/convert/txt_input.py
+++ b/src/calibre/gui2/convert/txt_input.py
@@ -16,8 +16,10 @@ class PluginWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
- ['paragraph_format', 'markdown_disable_toc', 'preserve_spaces'])
+ ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
self.db, self.book_id = db, book_id
- for x in get_option('paragraph_format').option.choices:
- self.opt_paragraph_format.addItem(x)
+ for x in get_option('paragraph_type').option.choices:
+ self.opt_paragraph_type.addItem(x)
+ for x in get_option('formatting_type').option.choices:
+ self.opt_formatting_type.addItem(x)
self.initialize_options(get_option, get_help, db, book_id)
diff --git a/src/calibre/gui2/convert/txt_input.ui b/src/calibre/gui2/convert/txt_input.ui
index b45297fdf2..6cbd68135f 100644
--- a/src/calibre/gui2/convert/txt_input.ui
+++ b/src/calibre/gui2/convert/txt_input.ui
@@ -6,7 +6,7 @@
0
0
- 488
+ 518
300
@@ -17,41 +17,21 @@
-
- Document structure detection
+ Paragraph style:
-
-
+
- -
-
-
- <p>Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit <a href="http://daringfireball.net/projects/markdown">markdown</a>.
-
-
- true
-
-
- true
-
-
-
- -
-
-
- Do not insert Table of Contents into output text when using markdown
-
-
-
- -
+
-
Preserve &spaces
- -
+
-
Qt::Vertical
@@ -64,6 +44,45 @@
+ -
+
+
+ -
+
+
+ Formatting style:
+
+
+
+ -
+
+
+ Markdown Options
+
+
+
-
+
+
+ <p>Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit <a href="http://daringfireball.net/projects/markdown">markdown</a>.
+
+
+ true
+
+
+ true
+
+
+
+ -
+
+
+ Do not insert Table of Contents into output text when using markdown
+
+
+
+
+
+