mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement #3418: Handle print style formatting for TXT input.
This commit is contained in:
parent
ec48f4029b
commit
2de625b3e1
@ -22,6 +22,12 @@ class PDBInput(InputFormatPlugin):
|
|||||||
help=_('Normally calibre treats blank lines as paragraph markers. '
|
help=_('Normally calibre treats blank lines as paragraph markers. '
|
||||||
'With this option it will assume that every line represents '
|
'With this option it will assume that every line represents '
|
||||||
'a paragraph instead.')),
|
'a paragraph instead.')),
|
||||||
|
OptionRecommendation(name='print_formatted_paras', recommended_value=False,
|
||||||
|
help=_('Normally calibre treats blank lines as paragraph markers. '
|
||||||
|
'With this option it will assume that every line starting with '
|
||||||
|
'an indent (either a tab or 2+ spaces) represents a paragraph.'
|
||||||
|
'Paragraphs end when the next line that starts with an indent '
|
||||||
|
'is reached.')),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
@ -13,8 +13,8 @@ import struct
|
|||||||
|
|
||||||
from calibre.ebooks.compression.palmdoc import decompress_doc
|
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
|
from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
|
||||||
opf_writer
|
separate_paragraphs_single_line
|
||||||
|
|
||||||
class HeaderRecord(object):
|
class HeaderRecord(object):
|
||||||
'''
|
'''
|
||||||
@ -36,6 +36,7 @@ class Reader(FormatReader):
|
|||||||
self.log = log
|
self.log = log
|
||||||
self.encoding = options.input_encoding
|
self.encoding = options.input_encoding
|
||||||
self.single_line_paras = options.single_line_paras
|
self.single_line_paras = options.single_line_paras
|
||||||
|
self.print_formatted_paras = options.print_formatted_paras
|
||||||
|
|
||||||
self.sections = []
|
self.sections = []
|
||||||
for i in range(header.num_sections):
|
for i in range(header.num_sections):
|
||||||
@ -63,7 +64,9 @@ class Reader(FormatReader):
|
|||||||
|
|
||||||
self.log.info('Converting text to OEB...')
|
self.log.info('Converting text to OEB...')
|
||||||
if self.single_line_paras:
|
if self.single_line_paras:
|
||||||
txt = separate_paragraphs(txt)
|
txt = separate_paragraphs_single_line(txt)
|
||||||
|
if self.print_formatted_paras:
|
||||||
|
txt = separate_paragraphs_print_formatted(txt)
|
||||||
html = convert_basic(txt)
|
html = convert_basic(txt)
|
||||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||||
index.write(html.encode('utf-8'))
|
index.write(html.encode('utf-8'))
|
||||||
|
@ -12,8 +12,8 @@ import os, struct, zlib
|
|||||||
|
|
||||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
from calibre.ebooks.pdb.ztxt import zTXTError
|
from calibre.ebooks.pdb.ztxt import zTXTError
|
||||||
from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
|
from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
|
||||||
opf_writer
|
separate_paragraphs_single_line
|
||||||
|
|
||||||
SUPPORTED_VERSION = (1, 40)
|
SUPPORTED_VERSION = (1, 40)
|
||||||
|
|
||||||
@ -40,6 +40,7 @@ class Reader(FormatReader):
|
|||||||
self.log = log
|
self.log = log
|
||||||
self.encoding = options.input_encoding
|
self.encoding = options.input_encoding
|
||||||
self.single_line_paras = options.single_line_paras
|
self.single_line_paras = options.single_line_paras
|
||||||
|
self.print_formatted_paras = options.print_formatted_paras
|
||||||
|
|
||||||
self.sections = []
|
self.sections = []
|
||||||
for i in range(header.num_sections):
|
for i in range(header.num_sections):
|
||||||
@ -79,7 +80,9 @@ class Reader(FormatReader):
|
|||||||
|
|
||||||
self.log.info('Converting text to OEB...')
|
self.log.info('Converting text to OEB...')
|
||||||
if self.single_line_paras:
|
if self.single_line_paras:
|
||||||
txt = separate_paragraphs(txt)
|
txt = separate_paragraphs_single_line(txt)
|
||||||
|
if self.print_formatted_paras:
|
||||||
|
txt = separate_paragraphs_print_formatted(txt)
|
||||||
html = convert_basic(txt)
|
html = convert_basic(txt)
|
||||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||||
index.write(html.encode('utf-8'))
|
index.write(html.encode('utf-8'))
|
||||||
|
@ -8,7 +8,7 @@ import os
|
|||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
||||||
separate_paragraphs
|
separate_paragraphs_single_line, separate_paragraphs_print_formatted
|
||||||
|
|
||||||
class TXTInput(InputFormatPlugin):
|
class TXTInput(InputFormatPlugin):
|
||||||
|
|
||||||
@ -22,6 +22,12 @@ class TXTInput(InputFormatPlugin):
|
|||||||
help=_('Normally calibre treats blank lines as paragraph markers. '
|
help=_('Normally calibre treats blank lines as paragraph markers. '
|
||||||
'With this option it will assume that every line represents '
|
'With this option it will assume that every line represents '
|
||||||
'a paragraph instead.')),
|
'a paragraph instead.')),
|
||||||
|
OptionRecommendation(name='print_formatted_paras', recommended_value=False,
|
||||||
|
help=_('Normally calibre treats blank lines as paragraph markers. '
|
||||||
|
'With this option it will assume that every line starting with '
|
||||||
|
'an indent (either a tab or 2+ spaces) represents a paragraph.'
|
||||||
|
'Paragraphs end when the next line that starts with an indent '
|
||||||
|
'is reached.')),
|
||||||
OptionRecommendation(name='markdown', recommended_value=False,
|
OptionRecommendation(name='markdown', recommended_value=False,
|
||||||
help=_('Run the text input through the markdown pre-processor. To '
|
help=_('Run the text input through the markdown pre-processor. To '
|
||||||
'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
|
'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
|
||||||
@ -35,8 +41,11 @@ class TXTInput(InputFormatPlugin):
|
|||||||
log.debug('Reading text from file...')
|
log.debug('Reading text from file...')
|
||||||
txt = stream.read().decode(ienc, 'replace')
|
txt = stream.read().decode(ienc, 'replace')
|
||||||
|
|
||||||
|
# Adjust paragraph formatting as requested
|
||||||
if options.single_line_paras:
|
if options.single_line_paras:
|
||||||
txt = separate_paragraphs(txt)
|
txt = separate_paragraphs_single_line(txt)
|
||||||
|
if options.print_formatted_paras:
|
||||||
|
txt = separate_paragraphs_print_formatted(txt)
|
||||||
|
|
||||||
if options.markdown:
|
if options.markdown:
|
||||||
log.debug('Running text though markdown conversion...')
|
log.debug('Running text though markdown conversion...')
|
||||||
|
@ -45,12 +45,16 @@ def convert_markdown(txt, title=''):
|
|||||||
safe_mode=False,)
|
safe_mode=False,)
|
||||||
return HTML_TEMPLATE % (title, md.convert(txt))
|
return HTML_TEMPLATE % (title, md.convert(txt))
|
||||||
|
|
||||||
def separate_paragraphs(txt):
|
def separate_paragraphs_single_line(txt):
|
||||||
txt = txt.replace('\r\n', '\n')
|
txt = txt.replace('\r\n', '\n')
|
||||||
txt = txt.replace('\r', '\n')
|
txt = txt.replace('\r', '\n')
|
||||||
txt = re.sub(u'(?<=.)\n(?=.)', u'\n\n', txt)
|
txt = re.sub(u'(?<=.)\n(?=.)', u'\n\n', txt)
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
|
def separate_paragraphs_print_formatted(txt):
|
||||||
|
txt = re.sub('(?miu)^(\t+|[ ]{2,})(?=.)', '\n\t', txt)
|
||||||
|
return txt
|
||||||
|
|
||||||
def opf_writer(path, opf_name, manifest, spine, mi):
|
def opf_writer(path, opf_name, manifest, spine, mi):
|
||||||
opf = OPFCreator(path, mi)
|
opf = OPFCreator(path, mi)
|
||||||
opf.create_manifest(manifest)
|
opf.create_manifest(manifest)
|
||||||
|
@ -14,6 +14,6 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
|
|
||||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
Widget.__init__(self, parent, 'pdb_input',
|
Widget.__init__(self, parent, 'pdb_input',
|
||||||
['single_line_paras'])
|
['single_line_paras', 'print_formatted_paras'])
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
<string>Form</string>
|
<string>Form</string>
|
||||||
</property>
|
</property>
|
||||||
<layout class="QGridLayout" name="gridLayout">
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
<item row="1" column="0">
|
<item row="2" column="0">
|
||||||
<spacer name="verticalSpacer">
|
<spacer name="verticalSpacer">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
<enum>Qt::Vertical</enum>
|
<enum>Qt::Vertical</enum>
|
||||||
@ -34,6 +34,13 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="1" column="0">
|
||||||
|
<widget class="QCheckBox" name="opt_print_formatted_paras">
|
||||||
|
<property name="text">
|
||||||
|
<string>Assume print formatting</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<resources/>
|
<resources/>
|
||||||
|
@ -14,6 +14,6 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
|
|
||||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
Widget.__init__(self, parent, 'txt_input',
|
Widget.__init__(self, parent, 'txt_input',
|
||||||
['single_line_paras', 'markdown'])
|
['single_line_paras', 'print_formatted_paras', 'markdown'])
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
<string>Form</string>
|
<string>Form</string>
|
||||||
</property>
|
</property>
|
||||||
<layout class="QGridLayout" name="gridLayout">
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
<item row="3" column="0">
|
<item row="4" column="0">
|
||||||
<spacer name="verticalSpacer">
|
<spacer name="verticalSpacer">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
<enum>Qt::Vertical</enum>
|
<enum>Qt::Vertical</enum>
|
||||||
@ -34,14 +34,14 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="1" column="0">
|
<item row="2" column="0">
|
||||||
<widget class="QCheckBox" name="opt_markdown">
|
<widget class="QCheckBox" name="opt_markdown">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Process using markdown</string>
|
<string>Process using markdown</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="2" column="0">
|
<item row="3" column="0">
|
||||||
<widget class="QLabel" name="label">
|
<widget class="QLabel" name="label">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string><p>Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit <a href="http://daringfireball.net/projects/markdown">markdown</a>.</string>
|
<string><p>Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit <a href="http://daringfireball.net/projects/markdown">markdown</a>.</string>
|
||||||
@ -51,6 +51,13 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="1" column="0">
|
||||||
|
<widget class="QCheckBox" name="opt_print_formatted_paras">
|
||||||
|
<property name="text">
|
||||||
|
<string>Assume print formatting</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<resources/>
|
<resources/>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user