mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement #3418: Handle print style formatting for TXT input.
This commit is contained in:
parent
ec48f4029b
commit
2de625b3e1
@ -22,6 +22,12 @@ class PDBInput(InputFormatPlugin):
|
||||
help=_('Normally calibre treats blank lines as paragraph markers. '
|
||||
'With this option it will assume that every line represents '
|
||||
'a paragraph instead.')),
|
||||
OptionRecommendation(name='print_formatted_paras', recommended_value=False,
|
||||
help=_('Normally calibre treats blank lines as paragraph markers. '
|
||||
'With this option it will assume that every line starting with '
|
||||
'an indent (either a tab or 2+ spaces) represents a paragraph.'
|
||||
'Paragraphs end when the next line that starts with an indent '
|
||||
'is reached.')),
|
||||
])
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
|
@ -13,8 +13,8 @@ import struct
|
||||
|
||||
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||
from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
|
||||
opf_writer
|
||||
from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
|
||||
separate_paragraphs_single_line
|
||||
|
||||
class HeaderRecord(object):
|
||||
'''
|
||||
@ -36,6 +36,7 @@ class Reader(FormatReader):
|
||||
self.log = log
|
||||
self.encoding = options.input_encoding
|
||||
self.single_line_paras = options.single_line_paras
|
||||
self.print_formatted_paras = options.print_formatted_paras
|
||||
|
||||
self.sections = []
|
||||
for i in range(header.num_sections):
|
||||
@ -63,7 +64,9 @@ class Reader(FormatReader):
|
||||
|
||||
self.log.info('Converting text to OEB...')
|
||||
if self.single_line_paras:
|
||||
txt = separate_paragraphs(txt)
|
||||
txt = separate_paragraphs_single_line(txt)
|
||||
if self.print_formatted_paras:
|
||||
txt = separate_paragraphs_print_formatted(txt)
|
||||
html = convert_basic(txt)
|
||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||
index.write(html.encode('utf-8'))
|
||||
|
@ -12,8 +12,8 @@ import os, struct, zlib
|
||||
|
||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||
from calibre.ebooks.pdb.ztxt import zTXTError
|
||||
from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
|
||||
opf_writer
|
||||
from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
|
||||
separate_paragraphs_single_line
|
||||
|
||||
SUPPORTED_VERSION = (1, 40)
|
||||
|
||||
@ -40,6 +40,7 @@ class Reader(FormatReader):
|
||||
self.log = log
|
||||
self.encoding = options.input_encoding
|
||||
self.single_line_paras = options.single_line_paras
|
||||
self.print_formatted_paras = options.print_formatted_paras
|
||||
|
||||
self.sections = []
|
||||
for i in range(header.num_sections):
|
||||
@ -79,7 +80,9 @@ class Reader(FormatReader):
|
||||
|
||||
self.log.info('Converting text to OEB...')
|
||||
if self.single_line_paras:
|
||||
txt = separate_paragraphs(txt)
|
||||
txt = separate_paragraphs_single_line(txt)
|
||||
if self.print_formatted_paras:
|
||||
txt = separate_paragraphs_print_formatted(txt)
|
||||
html = convert_basic(txt)
|
||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||
index.write(html.encode('utf-8'))
|
||||
|
@ -8,7 +8,7 @@ import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
||||
separate_paragraphs
|
||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted
|
||||
|
||||
class TXTInput(InputFormatPlugin):
|
||||
|
||||
@ -22,6 +22,12 @@ class TXTInput(InputFormatPlugin):
|
||||
help=_('Normally calibre treats blank lines as paragraph markers. '
|
||||
'With this option it will assume that every line represents '
|
||||
'a paragraph instead.')),
|
||||
OptionRecommendation(name='print_formatted_paras', recommended_value=False,
|
||||
help=_('Normally calibre treats blank lines as paragraph markers. '
|
||||
'With this option it will assume that every line starting with '
|
||||
'an indent (either a tab or 2+ spaces) represents a paragraph.'
|
||||
'Paragraphs end when the next line that starts with an indent '
|
||||
'is reached.')),
|
||||
OptionRecommendation(name='markdown', recommended_value=False,
|
||||
help=_('Run the text input through the markdown pre-processor. To '
|
||||
'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
|
||||
@ -35,8 +41,11 @@ class TXTInput(InputFormatPlugin):
|
||||
log.debug('Reading text from file...')
|
||||
txt = stream.read().decode(ienc, 'replace')
|
||||
|
||||
# Adjust paragraph formatting as requested
|
||||
if options.single_line_paras:
|
||||
txt = separate_paragraphs(txt)
|
||||
txt = separate_paragraphs_single_line(txt)
|
||||
if options.print_formatted_paras:
|
||||
txt = separate_paragraphs_print_formatted(txt)
|
||||
|
||||
if options.markdown:
|
||||
log.debug('Running text though markdown conversion...')
|
||||
|
@ -45,12 +45,16 @@ def convert_markdown(txt, title=''):
|
||||
safe_mode=False,)
|
||||
return HTML_TEMPLATE % (title, md.convert(txt))
|
||||
|
||||
def separate_paragraphs(txt):
|
||||
def separate_paragraphs_single_line(txt):
|
||||
txt = txt.replace('\r\n', '\n')
|
||||
txt = txt.replace('\r', '\n')
|
||||
txt = re.sub(u'(?<=.)\n(?=.)', u'\n\n', txt)
|
||||
return txt
|
||||
|
||||
def separate_paragraphs_print_formatted(txt):
|
||||
txt = re.sub('(?miu)^(\t+|[ ]{2,})(?=.)', '\n\t', txt)
|
||||
return txt
|
||||
|
||||
def opf_writer(path, opf_name, manifest, spine, mi):
|
||||
opf = OPFCreator(path, mi)
|
||||
opf.create_manifest(manifest)
|
||||
|
@ -14,6 +14,6 @@ class PluginWidget(Widget, Ui_Form):
|
||||
|
||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||
Widget.__init__(self, parent, 'pdb_input',
|
||||
['single_line_paras'])
|
||||
['single_line_paras', 'print_formatted_paras'])
|
||||
self.db, self.book_id = db, book_id
|
||||
self.initialize_options(get_option, get_help, db, book_id)
|
||||
|
@ -14,7 +14,7 @@
|
||||
<string>Form</string>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout">
|
||||
<item row="1" column="0">
|
||||
<item row="2" column="0">
|
||||
<spacer name="verticalSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
@ -34,6 +34,13 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QCheckBox" name="opt_print_formatted_paras">
|
||||
<property name="text">
|
||||
<string>Assume print formatting</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<resources/>
|
||||
|
@ -14,6 +14,6 @@ class PluginWidget(Widget, Ui_Form):
|
||||
|
||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||
Widget.__init__(self, parent, 'txt_input',
|
||||
['single_line_paras', 'markdown'])
|
||||
['single_line_paras', 'print_formatted_paras', 'markdown'])
|
||||
self.db, self.book_id = db, book_id
|
||||
self.initialize_options(get_option, get_help, db, book_id)
|
||||
|
@ -14,7 +14,7 @@
|
||||
<string>Form</string>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout">
|
||||
<item row="3" column="0">
|
||||
<item row="4" column="0">
|
||||
<spacer name="verticalSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
@ -34,14 +34,14 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<item row="2" column="0">
|
||||
<widget class="QCheckBox" name="opt_markdown">
|
||||
<property name="text">
|
||||
<string>Process using markdown</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<item row="3" column="0">
|
||||
<widget class="QLabel" name="label">
|
||||
<property name="text">
|
||||
<string><p>Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit <a href="http://daringfireball.net/projects/markdown">markdown</a>.</string>
|
||||
@ -51,6 +51,13 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QCheckBox" name="opt_print_formatted_paras">
|
||||
<property name="text">
|
||||
<string>Assume print formatting</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<resources/>
|
||||
|
Loading…
x
Reference in New Issue
Block a user