diff --git a/src/calibre/ebooks/pdb/input.py b/src/calibre/ebooks/pdb/input.py index 3ad1a6121c..8f4751b42b 100644 --- a/src/calibre/ebooks/pdb/input.py +++ b/src/calibre/ebooks/pdb/input.py @@ -22,6 +22,12 @@ class PDBInput(InputFormatPlugin): help=_('Normally calibre treats blank lines as paragraph markers. ' 'With this option it will assume that every line represents ' 'a paragraph instead.')), + OptionRecommendation(name='print_formatted_paras', recommended_value=False, + help=_('Normally calibre treats blank lines as paragraph markers. ' + 'With this option it will assume that every line starting with ' + 'an indent (either a tab or 2+ spaces) represents a paragraph.' + 'Paragraphs end when the next line that starts with an indent ' + 'is reached.')), ]) def convert(self, stream, options, file_ext, log, diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py index 8992382597..0a57e3f51a 100644 --- a/src/calibre/ebooks/pdb/palmdoc/reader.py +++ b/src/calibre/ebooks/pdb/palmdoc/reader.py @@ -13,8 +13,8 @@ import struct from calibre.ebooks.compression.palmdoc import decompress_doc from calibre.ebooks.pdb.formatreader import FormatReader -from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \ - opf_writer +from calibre.ebooks.txt.processor import convert_basic, opf_writer, \ + separate_paragraphs_single_line, separate_paragraphs_print_formatted class HeaderRecord(object): ''' @@ -36,6 +36,7 @@ class Reader(FormatReader): self.log = log self.encoding = options.input_encoding self.single_line_paras = options.single_line_paras + self.print_formatted_paras = options.print_formatted_paras self.sections = [] for i in range(header.num_sections): @@ -63,7 +64,9 @@ class Reader(FormatReader): self.log.info('Converting text to OEB...') if self.single_line_paras: - txt = separate_paragraphs(txt) + txt = separate_paragraphs_single_line(txt) + if self.print_formatted_paras: + txt = separate_paragraphs_print_formatted(txt) html = convert_basic(txt) with open(os.path.join(output_dir, 'index.html'), 'wb') as index: index.write(html.encode('utf-8')) diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py index 664f498bee..5cac283264 100644 --- a/src/calibre/ebooks/pdb/ztxt/reader.py +++ b/src/calibre/ebooks/pdb/ztxt/reader.py @@ -12,8 +12,8 @@ import os, struct, zlib from calibre.ebooks.pdb.formatreader import FormatReader from calibre.ebooks.pdb.ztxt import zTXTError -from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \ - opf_writer +from calibre.ebooks.txt.processor import convert_basic, opf_writer, \ + separate_paragraphs_single_line, separate_paragraphs_print_formatted SUPPORTED_VERSION = (1, 40) @@ -31,22 +31,23 @@ class HeaderRecord(object): self.size, = struct.unpack('>L', raw[4:8]) self.record_size, = struct.unpack('>H', raw[8:10]) self.flags, = struct.unpack('>B', raw[18:19]) - - + + class Reader(FormatReader): - + def __init__(self, header, stream, log, options): self.stream = stream self.log = log self.encoding = options.input_encoding self.single_line_paras = options.single_line_paras - + self.print_formatted_paras = options.print_formatted_paras + self.sections = [] for i in range(header.num_sections): self.sections.append(header.section_data(i)) self.header_record = HeaderRecord(self.section_data(0)) - + vmajor = (self.header_record.version & 0x0000FF00) >> 8 vminor = self.header_record.version & 0x000000FF if vmajor < 1 or (vmajor == 1 and vminor < 40): @@ -71,7 +72,7 @@ class Reader(FormatReader): def extract_content(self, output_dir): txt = '' - + self.log.info('Decompressing text...') for i in range(1, self.header_record.num_records + 1): self.log.debug('\tDecompressing text section %i' % i) @@ -79,16 +80,18 @@ class Reader(FormatReader): self.log.info('Converting text to OEB...') if self.single_line_paras: - txt = separate_paragraphs(txt) + txt = separate_paragraphs_single_line(txt) + if self.print_formatted_paras: + txt = separate_paragraphs_print_formatted(txt) html = convert_basic(txt) with open(os.path.join(output_dir, 'index.html'), 'wb') as index: index.write(html.encode('utf-8')) - + from calibre.ebooks.metadata.meta import get_metadata mi = get_metadata(self.stream, 'pdb') manifest = [('index.html', None)] spine = ['index.html'] opf_writer(output_dir, 'metadata.opf', manifest, spine, mi) - + return os.path.join(output_dir, 'metadata.opf') diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index f52bfa6fb5..eb86113f7a 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -8,7 +8,7 @@ import os from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ - separate_paragraphs + separate_paragraphs_single_line, separate_paragraphs_print_formatted class TXTInput(InputFormatPlugin): @@ -22,6 +22,12 @@ class TXTInput(InputFormatPlugin): help=_('Normally calibre treats blank lines as paragraph markers. ' 'With this option it will assume that every line represents ' 'a paragraph instead.')), + OptionRecommendation(name='print_formatted_paras', recommended_value=False, + help=_('Normally calibre treats blank lines as paragraph markers. ' + 'With this option it will assume that every line starting with ' + 'an indent (either a tab or 2+ spaces) represents a paragraph.' + 'Paragraphs end when the next line that starts with an indent ' + 'is reached.')), OptionRecommendation(name='markdown', recommended_value=False, help=_('Run the text input through the markdown pre-processor. To ' 'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'), @@ -35,8 +41,11 @@ class TXTInput(InputFormatPlugin): log.debug('Reading text from file...') txt = stream.read().decode(ienc, 'replace') + # Adjust paragraph formatting as requested if options.single_line_paras: - txt = separate_paragraphs(txt) + txt = separate_paragraphs_single_line(txt) + if options.print_formatted_paras: + txt = separate_paragraphs_print_formatted(txt) if options.markdown: log.debug('Running text though markdown conversion...') diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index f6503c0bc5..9d0e1283c1 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -45,12 +45,16 @@ def convert_markdown(txt, title=''): safe_mode=False,) return HTML_TEMPLATE % (title, md.convert(txt)) -def separate_paragraphs(txt): +def separate_paragraphs_single_line(txt): txt = txt.replace('\r\n', '\n') txt = txt.replace('\r', '\n') txt = re.sub(u'(?<=.)\n(?=.)', u'\n\n', txt) return txt +def separate_paragraphs_print_formatted(txt): + txt = re.sub('(?miu)^(\t+|[ ]{2,})(?=.)', '\n\t', txt) + return txt + def opf_writer(path, opf_name, manifest, spine, mi): opf = OPFCreator(path, mi) opf.create_manifest(manifest) diff --git a/src/calibre/gui2/convert/pdb_input.py b/src/calibre/gui2/convert/pdb_input.py index 4b0ba73fda..058f589856 100644 --- a/src/calibre/gui2/convert/pdb_input.py +++ b/src/calibre/gui2/convert/pdb_input.py @@ -14,6 +14,6 @@ class PluginWidget(Widget, Ui_Form): def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, 'pdb_input', - ['single_line_paras']) + ['single_line_paras', 'print_formatted_paras']) self.db, self.book_id = db, book_id self.initialize_options(get_option, get_help, db, book_id) diff --git a/src/calibre/gui2/convert/pdb_input.ui b/src/calibre/gui2/convert/pdb_input.ui index 191e749833..2b632b1a33 100644 --- a/src/calibre/gui2/convert/pdb_input.ui +++ b/src/calibre/gui2/convert/pdb_input.ui @@ -14,7 +14,7 @@ Form - + Qt::Vertical @@ -34,6 +34,13 @@ + + + + Assume print formatting + + + diff --git a/src/calibre/gui2/convert/txt_input.py b/src/calibre/gui2/convert/txt_input.py index 3d17eefe0d..505a916f81 100644 --- a/src/calibre/gui2/convert/txt_input.py +++ b/src/calibre/gui2/convert/txt_input.py @@ -14,6 +14,6 @@ class PluginWidget(Widget, Ui_Form): def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, 'txt_input', - ['single_line_paras', 'markdown']) + ['single_line_paras', 'print_formatted_paras', 'markdown']) self.db, self.book_id = db, book_id self.initialize_options(get_option, get_help, db, book_id) diff --git a/src/calibre/gui2/convert/txt_input.ui b/src/calibre/gui2/convert/txt_input.ui index 8c22ff721e..94cc1ad25f 100644 --- a/src/calibre/gui2/convert/txt_input.ui +++ b/src/calibre/gui2/convert/txt_input.ui @@ -14,7 +14,7 @@ Form - + Qt::Vertical @@ -34,14 +34,14 @@ - + Process using markdown - + <p>Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit <a href="http://daringfireball.net/projects/markdown">markdown</a>. @@ -51,6 +51,13 @@ + + + + Assume print formatting + + +