mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix bug #3251: Handle single line paragraphs in PDB files.
This commit is contained in:
parent
fd1e0711e0
commit
857f55d2b1
@ -15,13 +15,13 @@ from calibre.ebooks.pdb.ereader.reader202 import Reader202
|
|||||||
|
|
||||||
class Reader(FormatReader):
|
class Reader(FormatReader):
|
||||||
|
|
||||||
def __init__(self, header, stream, log, encoding=None):
|
def __init__(self, header, stream, log, options):
|
||||||
record0_size = len(header.section_data(0))
|
record0_size = len(header.section_data(0))
|
||||||
|
|
||||||
if record0_size == 132:
|
if record0_size == 132:
|
||||||
self.reader = Reader132(header, stream, log, encoding)
|
self.reader = Reader132(header, stream, log, options)
|
||||||
elif record0_size == 202:
|
elif record0_size == 202:
|
||||||
self.reader = Reader202(header, stream, log, encoding)
|
self.reader = Reader202(header, stream, log, options)
|
||||||
else:
|
else:
|
||||||
raise EreaderError('Size mismatch. eReader header record size %s KB is not supported.' % record0_size)
|
raise EreaderError('Size mismatch. eReader header record size %s KB is not supported.' % record0_size)
|
||||||
|
|
||||||
|
@ -47,9 +47,9 @@ class HeaderRecord(object):
|
|||||||
|
|
||||||
class Reader132(FormatReader):
|
class Reader132(FormatReader):
|
||||||
|
|
||||||
def __init__(self, header, stream, log, encoding=None):
|
def __init__(self, header, stream, log, options):
|
||||||
self.log = log
|
self.log = log
|
||||||
self.encoding = encoding
|
self.encoding = options.input_encoding
|
||||||
|
|
||||||
self.log.debug('132 byte header version found.')
|
self.log.debug('132 byte header version found.')
|
||||||
|
|
||||||
|
@ -33,9 +33,9 @@ class HeaderRecord(object):
|
|||||||
|
|
||||||
class Reader202(FormatReader):
|
class Reader202(FormatReader):
|
||||||
|
|
||||||
def __init__(self, header, stream, log, encoding=None):
|
def __init__(self, header, stream, log, options):
|
||||||
self.log = log
|
self.log = log
|
||||||
self.encoding = encoding
|
self.encoding = options.input_encoding
|
||||||
|
|
||||||
self.log.debug('202 byte header version found.')
|
self.log.debug('202 byte header version found.')
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
class FormatReader(object):
|
class FormatReader(object):
|
||||||
|
|
||||||
def __init__(self, header, stream, log, encoding=None):
|
def __init__(self, header, stream, log, options):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def extract_content(self, output_dir):
|
def extract_content(self, output_dir):
|
||||||
|
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
|
from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
|
||||||
|
|
||||||
@ -17,6 +17,13 @@ class PDBInput(InputFormatPlugin):
|
|||||||
description = 'Convert PDB to HTML'
|
description = 'Convert PDB to HTML'
|
||||||
file_types = set(['pdb'])
|
file_types = set(['pdb'])
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
OptionRecommendation(name='single_line_paras', recommended_value=False,
|
||||||
|
help=_('Normally calibre treats blank lines as paragraph markers. '
|
||||||
|
'With this option it will assume that every line represents '
|
||||||
|
'a paragraph instead.')),
|
||||||
|
])
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
header = PdbHeaderReader(stream)
|
header = PdbHeaderReader(stream)
|
||||||
@ -27,7 +34,7 @@ class PDBInput(InputFormatPlugin):
|
|||||||
|
|
||||||
log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))
|
log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))
|
||||||
|
|
||||||
reader = Reader(header, stream, log, options.input_encoding)
|
reader = Reader(header, stream, log, options)
|
||||||
opf = reader.extract_content(os.getcwd())
|
opf = reader.extract_content(os.getcwd())
|
||||||
|
|
||||||
return opf
|
return opf
|
||||||
|
@ -31,10 +31,11 @@ class HeaderRecord(object):
|
|||||||
|
|
||||||
class Reader(FormatReader):
|
class Reader(FormatReader):
|
||||||
|
|
||||||
def __init__(self, header, stream, log, encoding=None):
|
def __init__(self, header, stream, log, options):
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
self.log = log
|
self.log = log
|
||||||
self.encoding = encoding
|
self.encoding = options.input_encoding
|
||||||
|
self.single_line_paras = options.single_line_paras
|
||||||
|
|
||||||
self.sections = []
|
self.sections = []
|
||||||
for i in range(header.num_sections):
|
for i in range(header.num_sections):
|
||||||
@ -61,7 +62,7 @@ class Reader(FormatReader):
|
|||||||
txt += self.decompress_text(i)
|
txt += self.decompress_text(i)
|
||||||
|
|
||||||
self.log.info('Converting text to OEB...')
|
self.log.info('Converting text to OEB...')
|
||||||
html = txt_to_markdown(txt)
|
html = txt_to_markdown(txt, single_line_paras=self.single_line_paras)
|
||||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||||
index.write(html.encode('utf-8'))
|
index.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
@ -34,10 +34,11 @@ class HeaderRecord(object):
|
|||||||
|
|
||||||
class Reader(FormatReader):
|
class Reader(FormatReader):
|
||||||
|
|
||||||
def __init__(self, header, stream, log, encoding=None):
|
def __init__(self, header, stream, log, options):
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
self.log = log
|
self.log = log
|
||||||
self.encoding = encoding
|
self.encoding = options.input_encoding
|
||||||
|
self.single_line_paras = options.single_line_paras
|
||||||
|
|
||||||
self.sections = []
|
self.sections = []
|
||||||
for i in range(header.num_sections):
|
for i in range(header.num_sections):
|
||||||
@ -76,7 +77,7 @@ class Reader(FormatReader):
|
|||||||
txt += self.decompress_text(i)
|
txt += self.decompress_text(i)
|
||||||
|
|
||||||
self.log.info('Converting text to OEB...')
|
self.log.info('Converting text to OEB...')
|
||||||
html = txt_to_markdown(txt)
|
html = txt_to_markdown(txt, single_line_paras=self.single_line_paras)
|
||||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||||
index.write(html.encode('utf-8'))
|
index.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
@ -31,14 +31,9 @@ class TXTInput(InputFormatPlugin):
|
|||||||
log.debug('Reading text from file...')
|
log.debug('Reading text from file...')
|
||||||
txt = stream.read().decode(ienc, 'replace')
|
txt = stream.read().decode(ienc, 'replace')
|
||||||
|
|
||||||
if options.single_line_paras:
|
|
||||||
txt = txt.replace('\r\n', '\n')
|
|
||||||
txt = txt.replace('\r', '\n')
|
|
||||||
txt = txt.replace('\n', '\n\n')
|
|
||||||
|
|
||||||
log.debug('Running text though markdown conversion...')
|
log.debug('Running text though markdown conversion...')
|
||||||
try:
|
try:
|
||||||
html = txt_to_markdown(txt)
|
html = txt_to_markdown(txt, single_line_paras=options.single_line_paras)
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
raise ValueError('This txt file has malformed markup, it cannot be'
|
raise ValueError('This txt file has malformed markup, it cannot be'
|
||||||
'converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
'converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
||||||
|
@ -13,7 +13,11 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
def txt_to_markdown(txt, title=''):
|
def txt_to_markdown(txt, title='', single_line_paras=False):
|
||||||
|
if single_line_paras:
|
||||||
|
txt = txt.replace('\r\n', '\n')
|
||||||
|
txt = txt.replace('\r', '\n')
|
||||||
|
txt = txt.replace('\n', '\n\n')
|
||||||
md = markdown.Markdown(
|
md = markdown.Markdown(
|
||||||
extensions=['footnotes', 'tables', 'toc'],
|
extensions=['footnotes', 'tables', 'toc'],
|
||||||
safe_mode=False,)
|
safe_mode=False,)
|
||||||
|
19
src/calibre/gui2/convert/pdb_input.py
Normal file
19
src/calibre/gui2/convert/pdb_input.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.gui2.convert.pdb_input_ui import Ui_Form
|
||||||
|
from calibre.gui2.convert import Widget
|
||||||
|
|
||||||
|
class PluginWidget(Widget, Ui_Form):
|
||||||
|
|
||||||
|
TITLE = _('PDB Input')
|
||||||
|
HELP = _('Options specific to')+' PDB '+_('input')
|
||||||
|
|
||||||
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
|
Widget.__init__(self, parent, 'txt_input',
|
||||||
|
['single_line_paras'])
|
||||||
|
self.db, self.book_id = db, book_id
|
||||||
|
self.initialize_options(get_option, get_help, db, book_id)
|
41
src/calibre/gui2/convert/pdb_input.ui
Normal file
41
src/calibre/gui2/convert/pdb_input.ui
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<ui version="4.0">
|
||||||
|
<class>Form</class>
|
||||||
|
<widget class="QWidget" name="Form">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>0</x>
|
||||||
|
<y>0</y>
|
||||||
|
<width>400</width>
|
||||||
|
<height>300</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="windowTitle">
|
||||||
|
<string>Form</string>
|
||||||
|
</property>
|
||||||
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
|
<item row="1" column="0">
|
||||||
|
<spacer name="verticalSpacer">
|
||||||
|
<property name="orientation">
|
||||||
|
<enum>Qt::Vertical</enum>
|
||||||
|
</property>
|
||||||
|
<property name="sizeHint" stdset="0">
|
||||||
|
<size>
|
||||||
|
<width>20</width>
|
||||||
|
<height>213</height>
|
||||||
|
</size>
|
||||||
|
</property>
|
||||||
|
</spacer>
|
||||||
|
</item>
|
||||||
|
<item row="0" column="0">
|
||||||
|
<widget class="QCheckBox" name="opt_single_line_paras">
|
||||||
|
<property name="text">
|
||||||
|
<string>Treat each &line as a paragraph</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</widget>
|
||||||
|
<resources/>
|
||||||
|
<connections/>
|
||||||
|
</ui>
|
Loading…
x
Reference in New Issue
Block a user