diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py index 7a3298122f..77ca8d6933 100644 --- a/src/calibre/ebooks/pdb/ereader/reader.py +++ b/src/calibre/ebooks/pdb/ereader/reader.py @@ -15,13 +15,13 @@ from calibre.ebooks.pdb.ereader.reader202 import Reader202 class Reader(FormatReader): - def __init__(self, header, stream, log, encoding=None): + def __init__(self, header, stream, log, options): record0_size = len(header.section_data(0)) if record0_size == 132: - self.reader = Reader132(header, stream, log, encoding) + self.reader = Reader132(header, stream, log, options) elif record0_size == 202: - self.reader = Reader202(header, stream, log, encoding) + self.reader = Reader202(header, stream, log, options) else: raise EreaderError('Size mismatch. eReader header record size %s KB is not supported.' % record0_size) diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py index a1d1f4294d..d44eb2c561 100644 --- a/src/calibre/ebooks/pdb/ereader/reader132.py +++ b/src/calibre/ebooks/pdb/ereader/reader132.py @@ -47,9 +47,9 @@ class HeaderRecord(object): class Reader132(FormatReader): - def __init__(self, header, stream, log, encoding=None): + def __init__(self, header, stream, log, options): self.log = log - self.encoding = encoding + self.encoding = options.input_encoding self.log.debug('132 byte header version found.') diff --git a/src/calibre/ebooks/pdb/ereader/reader202.py b/src/calibre/ebooks/pdb/ereader/reader202.py index 5057df363e..18281a208e 100644 --- a/src/calibre/ebooks/pdb/ereader/reader202.py +++ b/src/calibre/ebooks/pdb/ereader/reader202.py @@ -33,9 +33,9 @@ class HeaderRecord(object): class Reader202(FormatReader): - def __init__(self, header, stream, log, encoding=None): + def __init__(self, header, stream, log, options): self.log = log - self.encoding = encoding + self.encoding = options.input_encoding self.log.debug('202 byte header version found.') diff --git a/src/calibre/ebooks/pdb/formatreader.py b/src/calibre/ebooks/pdb/formatreader.py index bde6c9ae35..2251eaae04 100644 --- a/src/calibre/ebooks/pdb/formatreader.py +++ b/src/calibre/ebooks/pdb/formatreader.py @@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en' class FormatReader(object): - def __init__(self, header, stream, log, encoding=None): + def __init__(self, header, stream, log, options): raise NotImplementedError() def extract_content(self, output_dir): diff --git a/src/calibre/ebooks/pdb/input.py b/src/calibre/ebooks/pdb/input.py index 62ae24c7f0..3ad1a6121c 100644 --- a/src/calibre/ebooks/pdb/input.py +++ b/src/calibre/ebooks/pdb/input.py @@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en' import os -from calibre.customize.conversion import InputFormatPlugin +from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.ebooks.pdb.header import PdbHeaderReader from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader @@ -17,6 +17,13 @@ class PDBInput(InputFormatPlugin): description = 'Convert PDB to HTML' file_types = set(['pdb']) + options = set([ + OptionRecommendation(name='single_line_paras', recommended_value=False, + help=_('Normally calibre treats blank lines as paragraph markers. ' + 'With this option it will assume that every line represents ' + 'a paragraph instead.')), + ]) + def convert(self, stream, options, file_ext, log, accelerators): header = PdbHeaderReader(stream) @@ -27,7 +34,7 @@ class PDBInput(InputFormatPlugin): log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident)) - reader = Reader(header, stream, log, options.input_encoding) + reader = Reader(header, stream, log, options) opf = reader.extract_content(os.getcwd()) return opf diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py index 7e8f3b241c..e1935db566 100644 --- a/src/calibre/ebooks/pdb/palmdoc/reader.py +++ b/src/calibre/ebooks/pdb/palmdoc/reader.py @@ -31,10 +31,11 @@ class HeaderRecord(object): class Reader(FormatReader): - def __init__(self, header, stream, log, encoding=None): + def __init__(self, header, stream, log, options): self.stream = stream self.log = log - self.encoding = encoding + self.encoding = options.input_encoding + self.single_line_paras = options.single_line_paras self.sections = [] for i in range(header.num_sections): @@ -61,7 +62,7 @@ class Reader(FormatReader): txt += self.decompress_text(i) self.log.info('Converting text to OEB...') - html = txt_to_markdown(txt) + html = txt_to_markdown(txt, single_line_paras=self.single_line_paras) with open(os.path.join(output_dir, 'index.html'), 'wb') as index: index.write(html.encode('utf-8')) diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py index 0c334556e8..86c5abfe82 100644 --- a/src/calibre/ebooks/pdb/ztxt/reader.py +++ b/src/calibre/ebooks/pdb/ztxt/reader.py @@ -34,10 +34,11 @@ class HeaderRecord(object): class Reader(FormatReader): - def __init__(self, header, stream, log, encoding=None): + def __init__(self, header, stream, log, options): self.stream = stream self.log = log - self.encoding = encoding + self.encoding = options.input_encoding + self.single_line_paras = options.single_line_paras self.sections = [] for i in range(header.num_sections): @@ -76,7 +77,7 @@ class Reader(FormatReader): txt += self.decompress_text(i) self.log.info('Converting text to OEB...') - html = txt_to_markdown(txt) + html = txt_to_markdown(txt, single_line_paras=self.single_line_paras) with open(os.path.join(output_dir, 'index.html'), 'wb') as index: index.write(html.encode('utf-8')) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 493fdf3967..75dd516360 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -31,14 +31,9 @@ class TXTInput(InputFormatPlugin): log.debug('Reading text from file...') txt = stream.read().decode(ienc, 'replace') - if options.single_line_paras: - txt = txt.replace('\r\n', '\n') - txt = txt.replace('\r', '\n') - txt = txt.replace('\n', '\n\n') - log.debug('Running text though markdown conversion...') try: - html = txt_to_markdown(txt) + html = txt_to_markdown(txt, single_line_paras=options.single_line_paras) except RuntimeError: raise ValueError('This txt file has malformed markup, it cannot be' 'converted by calibre. See http://daringfireball.net/projects/markdown/syntax') diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index ddb9b6a121..3005d633b8 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -13,7 +13,11 @@ __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' -def txt_to_markdown(txt, title=''): +def txt_to_markdown(txt, title='', single_line_paras=False): + if single_line_paras: + txt = txt.replace('\r\n', '\n') + txt = txt.replace('\r', '\n') + txt = txt.replace('\n', '\n\n') md = markdown.Markdown( extensions=['footnotes', 'tables', 'toc'], safe_mode=False,)