Implement #3418: Handle print style formatting for TXT input.

This commit is contained in:
John Schember 2009-09-06 12:59:21 -04:00
parent ec48f4029b
commit 2de625b3e1
9 changed files with 54 additions and 15 deletions

View File

@ -22,6 +22,12 @@ class PDBInput(InputFormatPlugin):
help=_('Normally calibre treats blank lines as paragraph markers. '
'With this option it will assume that every line represents '
'a paragraph instead.')),
OptionRecommendation(name='print_formatted_paras', recommended_value=False,
help=_('Normally calibre treats blank lines as paragraph markers. '
'With this option it will assume that every line starting with '
'an indent (either a tab or 2+ spaces) represents a paragraph.'
'Paragraphs end when the next line that starts with an indent '
'is reached.')),
])
def convert(self, stream, options, file_ext, log,

View File

@ -13,8 +13,8 @@ import struct
from calibre.ebooks.compression.palmdoc import decompress_doc
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
opf_writer
from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
separate_paragraphs_single_line
class HeaderRecord(object):
'''
@ -36,6 +36,7 @@ class Reader(FormatReader):
self.log = log
self.encoding = options.input_encoding
self.single_line_paras = options.single_line_paras
self.print_formatted_paras = options.print_formatted_paras
self.sections = []
for i in range(header.num_sections):
@ -63,7 +64,9 @@ class Reader(FormatReader):
self.log.info('Converting text to OEB...')
if self.single_line_paras:
txt = separate_paragraphs(txt)
txt = separate_paragraphs_single_line(txt)
if self.print_formatted_paras:
txt = separate_paragraphs_print_formatted(txt)
html = convert_basic(txt)
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
index.write(html.encode('utf-8'))

View File

@ -12,8 +12,8 @@ import os, struct, zlib
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pdb.ztxt import zTXTError
from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
opf_writer
from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
separate_paragraphs_single_line
SUPPORTED_VERSION = (1, 40)
@ -40,6 +40,7 @@ class Reader(FormatReader):
self.log = log
self.encoding = options.input_encoding
self.single_line_paras = options.single_line_paras
self.print_formatted_paras = options.print_formatted_paras
self.sections = []
for i in range(header.num_sections):
@ -79,7 +80,9 @@ class Reader(FormatReader):
self.log.info('Converting text to OEB...')
if self.single_line_paras:
txt = separate_paragraphs(txt)
txt = separate_paragraphs_single_line(txt)
if self.print_formatted_paras:
txt = separate_paragraphs_print_formatted(txt)
html = convert_basic(txt)
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
index.write(html.encode('utf-8'))

View File

@ -8,7 +8,7 @@ import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
separate_paragraphs
separate_paragraphs_single_line, separate_paragraphs_print_formatted
class TXTInput(InputFormatPlugin):
@ -22,6 +22,12 @@ class TXTInput(InputFormatPlugin):
help=_('Normally calibre treats blank lines as paragraph markers. '
'With this option it will assume that every line represents '
'a paragraph instead.')),
OptionRecommendation(name='print_formatted_paras', recommended_value=False,
help=_('Normally calibre treats blank lines as paragraph markers. '
'With this option it will assume that every line starting with '
'an indent (either a tab or 2+ spaces) represents a paragraph.'
'Paragraphs end when the next line that starts with an indent '
'is reached.')),
OptionRecommendation(name='markdown', recommended_value=False,
help=_('Run the text input through the markdown pre-processor. To '
'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
@ -35,8 +41,11 @@ class TXTInput(InputFormatPlugin):
log.debug('Reading text from file...')
txt = stream.read().decode(ienc, 'replace')
# Adjust paragraph formatting as requested
if options.single_line_paras:
txt = separate_paragraphs(txt)
txt = separate_paragraphs_single_line(txt)
if options.print_formatted_paras:
txt = separate_paragraphs_print_formatted(txt)
if options.markdown:
log.debug('Running text though markdown conversion...')

View File

@ -45,12 +45,16 @@ def convert_markdown(txt, title=''):
safe_mode=False,)
return HTML_TEMPLATE % (title, md.convert(txt))
def separate_paragraphs(txt):
def separate_paragraphs_single_line(txt):
txt = txt.replace('\r\n', '\n')
txt = txt.replace('\r', '\n')
txt = re.sub(u'(?<=.)\n(?=.)', u'\n\n', txt)
return txt
def separate_paragraphs_print_formatted(txt):
txt = re.sub('(?miu)^(\t+|[ ]{2,})(?=.)', '\n\t', txt)
return txt
def opf_writer(path, opf_name, manifest, spine, mi):
opf = OPFCreator(path, mi)
opf.create_manifest(manifest)

View File

@ -14,6 +14,6 @@ class PluginWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, 'pdb_input',
['single_line_paras'])
['single_line_paras', 'print_formatted_paras'])
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -14,7 +14,7 @@
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="1" column="0">
<item row="2" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -34,6 +34,13 @@
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QCheckBox" name="opt_print_formatted_paras">
<property name="text">
<string>Assume print formatting</string>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>

View File

@ -14,6 +14,6 @@ class PluginWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, 'txt_input',
['single_line_paras', 'markdown'])
['single_line_paras', 'print_formatted_paras', 'markdown'])
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -14,7 +14,7 @@
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="3" column="0">
<item row="4" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -34,14 +34,14 @@
</property>
</widget>
</item>
<item row="1" column="0">
<item row="2" column="0">
<widget class="QCheckBox" name="opt_markdown">
<property name="text">
<string>Process using markdown</string>
</property>
</widget>
</item>
<item row="2" column="0">
<item row="3" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>&lt;p&gt;Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit &lt;a href=&quot;http://daringfireball.net/projects/markdown&quot;&gt;markdown&lt;/a&gt;.</string>
@ -51,6 +51,13 @@
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QCheckBox" name="opt_print_formatted_paras">
<property name="text">
<string>Assume print formatting</string>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>