mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
eReader format support from driver-dev
This commit is contained in:
commit
b93029a4fe
@ -278,6 +278,7 @@ class PDFMetadataWriter(MetadataWriterPlugin):
|
|||||||
|
|
||||||
from calibre.ebooks.epub.input import EPUBInput
|
from calibre.ebooks.epub.input import EPUBInput
|
||||||
from calibre.ebooks.mobi.input import MOBIInput
|
from calibre.ebooks.mobi.input import MOBIInput
|
||||||
|
from calibre.ebooks.pdb.input import PDBInput
|
||||||
from calibre.ebooks.pdf.input import PDFInput
|
from calibre.ebooks.pdf.input import PDFInput
|
||||||
from calibre.ebooks.txt.input import TXTInput
|
from calibre.ebooks.txt.input import TXTInput
|
||||||
from calibre.ebooks.lit.input import LITInput
|
from calibre.ebooks.lit.input import LITInput
|
||||||
@ -290,7 +291,7 @@ from calibre.ebooks.txt.output import TXTOutput
|
|||||||
from calibre.ebooks.pdf.output import PDFOutput
|
from calibre.ebooks.pdf.output import PDFOutput
|
||||||
from calibre.customize.profiles import input_profiles, output_profiles
|
from calibre.customize.profiles import input_profiles, output_profiles
|
||||||
|
|
||||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
|
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
||||||
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
|
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
|
||||||
FB2Input, ODTInput, RTFInput]
|
FB2Input, ODTInput, RTFInput]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
|
31
src/calibre/ebooks/pdb/__init__.py
Normal file
31
src/calibre/ebooks/pdb/__init__.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.ebooks.pdb.ereader.reader import Reader as eReader
|
||||||
|
|
||||||
|
FORMATS = {
|
||||||
|
'PNPdPPrs' : eReader,
|
||||||
|
'PNRdPPrs' : eReader,
|
||||||
|
}
|
||||||
|
|
||||||
|
IDENTITY_TO_NAME = {
|
||||||
|
'PNPdPPrs' : 'eReader',
|
||||||
|
'PNRdPPrs' : 'eReader',
|
||||||
|
}
|
||||||
|
|
||||||
|
class PDBError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def get_reader(identity):
|
||||||
|
'''
|
||||||
|
Returns None if no reader is found for the identity.
|
||||||
|
'''
|
||||||
|
if identity in FORMATS.keys():
|
||||||
|
return FORMATS[identity]
|
||||||
|
else:
|
||||||
|
return None
|
9
src/calibre/ebooks/pdb/ereader/__init__.py
Normal file
9
src/calibre/ebooks/pdb/ereader/__init__.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
class EreaderError(Exception):
|
||||||
|
pass
|
97
src/calibre/ebooks/pdb/ereader/pmlconverter.py
Normal file
97
src/calibre/ebooks/pdb/ereader/pmlconverter.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import with_statement
|
||||||
|
'''
|
||||||
|
Convert pml markup to and from html
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
|
||||||
|
|
||||||
|
PML_HTML_RULES = [
|
||||||
|
(re.compile('\\\\p'), lambda match: '<br /><br style="page-break-after: always;" />'),
|
||||||
|
(re.compile('\\\\x(?P<text>.+?)\\\\x', re.DOTALL), lambda match: '<h1 style="page-break-before: always;">%s</h1>' % match.group('text')),
|
||||||
|
(re.compile('\\\\X(?P<val>[0-4])(?P<text>.+?)\\\\X[0-4]', re.DOTALL), lambda match: '<h%i style="page-break-before: always;">%i</h%i>' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)),
|
||||||
|
(re.compile('\\\\C\d=".+"'), lambda match: ''), # This should be made to create a TOC entry
|
||||||
|
(re.compile('\\\\c(?P<text>.+?)\\\\c', re.DOTALL), lambda match: '<div style="text-align: center; display: block; margin: auto;">%s</div>' % match.group('text')),
|
||||||
|
(re.compile('\\\\r(?P<text>.+?)\\\\r', re.DOTALL), lambda match: '<div style="text-align: right; display: block;">%s</div>' % match.group('text')),
|
||||||
|
(re.compile('\\\\i(?P<text>.+?)\\\\i', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text')),
|
||||||
|
(re.compile('\\\\u(?P<text>.+?)\\\\u', re.DOTALL), lambda match: '<div style="text-decoration: underline;">%s</div>' % match.group('text')),
|
||||||
|
(re.compile('\\\\o(?P<text>.+?)\\\\o', re.DOTALL), lambda match: '<del>%s</del>' % match.group('text')),
|
||||||
|
(re.compile('\\\\v(?P<text>.+?)\\\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text')),
|
||||||
|
(re.compile('\\\\t(?P<text>.+?)\\\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%%">%s</div>' % match.group('text')),
|
||||||
|
(re.compile('\\\\T="(?P<val>\d+%*)"(?P<text>.+?)$', re.MULTILINE), lambda match: '<div style="margin-left: %i%">%s</div>' % (match.group('val'), match.group('text'))),
|
||||||
|
(re.compile('\\\\w="(?P<val>\d+)%"'), lambda match: '<hr width="%s%%" />' % match.group('val')),
|
||||||
|
(re.compile('\\\\n'), lambda match: ''),
|
||||||
|
(re.compile('\\\\s'), lambda match: ''),
|
||||||
|
(re.compile('\\\\b(?P<text>.+?)\\\\b', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')), # \b is deprecated; \B should be used instead.
|
||||||
|
(re.compile('\\\\l(?P<text>.+?)\\\\l', re.DOTALL), lambda match: '<big>%s</big>' % match.group('text')),
|
||||||
|
(re.compile('\\\\B(?P<text>.+?)\\\\B', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')),
|
||||||
|
(re.compile('\\\\Sp(?P<text>.+?)\\\\Sp', re.DOTALL), lambda match: '<sup>%s</sup>' % match.group('text')),
|
||||||
|
(re.compile('\\\\Sb(?P<text>.+?)\\\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text')),
|
||||||
|
(re.compile('\\\\k(?P<text>.+?)\\\\k', re.DOTALL), lambda match: '<small>%s</small>' % match.group('text')),
|
||||||
|
(re.compile('\\\\a(?P<num>\d\d\d)'), lambda match: '&#%s;' % match.group('num')),
|
||||||
|
(re.compile('\\\\U(?P<num>\d\d\d\d)'), lambda match: '&#%i;' % int(match.group('num'))),
|
||||||
|
(re.compile('\\\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % match.group('name')),
|
||||||
|
(re.compile('\\\\q="(?P<target>#.+?)"(?P<text>)\\\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text'))),
|
||||||
|
(re.compile('\\\\Q="(?P<target>.+?)"'), lambda match: '<div id="%s"></div>' % match.group('target')),
|
||||||
|
(re.compile('\\\\-'), lambda match: ''),
|
||||||
|
(re.compile('\\\\Fn="(?P<target>.+?)"(?P<text>.+?)\\\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text'))),
|
||||||
|
(re.compile('\\\\Sd="(?P<target>.+?)"(?P<text>.+?)\\\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text'))),
|
||||||
|
(re.compile('\\\\I'), lambda match: ''),
|
||||||
|
|
||||||
|
# eReader files are one paragraph per line.
|
||||||
|
# This forces the lines to wrap properly.
|
||||||
|
(re.compile('^(?P<text>.+)$', re.MULTILINE), lambda match: '<p>%s</p>' % match.group('text')),
|
||||||
|
|
||||||
|
# Remove unmatched plm codes.
|
||||||
|
(re.compile('(?<=[^\\\\])\\\\[pxcriouvtblBk]'), lambda match: ''),
|
||||||
|
(re.compile('(?<=[^\\\\])\\\\X[0-4]'), lambda match: ''),
|
||||||
|
(re.compile('(?<=[^\\\\])\\\\Sp'), lambda match: ''),
|
||||||
|
(re.compile('(?<=[^\\\\])\\\\Sb'), lambda match: ''),
|
||||||
|
|
||||||
|
# Replace \\ with \.
|
||||||
|
(re.compile('\\\\\\\\'), lambda match: '\\'),
|
||||||
|
]
|
||||||
|
|
||||||
|
FOOTNOTE_HTML_RULES = [
|
||||||
|
(re.compile('<footnote id="(?P<id>.+?)">(?P<text>.+?)</footnote>', re.DOTALL), lambda match: '<div id="footnote-%s">%s</div>')
|
||||||
|
]
|
||||||
|
|
||||||
|
SIDEBAR_HTML_RULES = [
|
||||||
|
(re.compile('<sidebar id="(?P<id>.+?)">(?P<text>.+?)</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def pml_to_html(pml):
|
||||||
|
html = pml
|
||||||
|
for rule in PML_HTML_RULES:
|
||||||
|
html = rule[0].sub(rule[1], html)
|
||||||
|
|
||||||
|
for symbol in HTML_SYMBOLS.keys():
|
||||||
|
if ord(symbol) > 128:
|
||||||
|
html = html.replace(symbol, HTML_SYMBOLS[symbol][len(HTML_SYMBOLS[symbol]) - 1])
|
||||||
|
|
||||||
|
return html
|
||||||
|
|
||||||
|
def footnote_to_html(footnotes):
|
||||||
|
html = footnotes
|
||||||
|
for rule in FOOTNOTE_HTML_RULES:
|
||||||
|
html = rule[0].sub(rule[1], html)
|
||||||
|
|
||||||
|
html = pml_to_html(html)
|
||||||
|
|
||||||
|
return html
|
||||||
|
|
||||||
|
def sidebar_to_html(sidebars):
|
||||||
|
html = sidebars
|
||||||
|
for rule in FOOTNOTE_HTML_RULES:
|
||||||
|
html = rule[0].sub(rule[1], html)
|
||||||
|
|
||||||
|
html = pml_to_html(html)
|
||||||
|
|
||||||
|
return html
|
216
src/calibre/ebooks/pdb/ereader/reader.py
Normal file
216
src/calibre/ebooks/pdb/ereader/reader.py
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import with_statement
|
||||||
|
'''
|
||||||
|
Read content from ereader pdb file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, sys, struct, zlib
|
||||||
|
|
||||||
|
from calibre import CurrentDir
|
||||||
|
from calibre.ebooks import DRMError
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
|
from calibre.ebooks.pdb.ereader import EreaderError
|
||||||
|
from calibre.ebooks.pdb.ereader.pmlconverter import pml_to_html, \
|
||||||
|
footnote_to_html, sidebar_to_html
|
||||||
|
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
|
||||||
|
class HeaderRecord(object):
|
||||||
|
'''
|
||||||
|
The first record in the file is always the header record. It holds
|
||||||
|
information related to the location of text, images, and so on
|
||||||
|
in the file. This is used in conjunction with the sections
|
||||||
|
defined in the file header.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.version, = struct.unpack('>H', raw[0:2])
|
||||||
|
self.non_text_offset, = struct.unpack('>H', raw[12:14])
|
||||||
|
self.footnote_rec, = struct.unpack('>H', raw[28:30])
|
||||||
|
self.sidebar_rec, = struct.unpack('>H', raw[30:32])
|
||||||
|
self.bookmark_offset, = struct.unpack('>H', raw[32:34])
|
||||||
|
self.image_data_offset, = struct.unpack('>H', raw[40:42])
|
||||||
|
self.metadata_offset, = struct.unpack('>H', raw[44:46])
|
||||||
|
self.footnote_offset, = struct.unpack('>H', raw[48:50])
|
||||||
|
self.sidebar_offset, = struct.unpack('>H', raw[50:52])
|
||||||
|
self.last_data_offset, = struct.unpack('>H', raw[52:54])
|
||||||
|
|
||||||
|
self.num_text_pages = self.non_text_offset -1
|
||||||
|
self.num_image_pages = self.metadata_offset - self.image_data_offset
|
||||||
|
|
||||||
|
# Can't tell which is sidebar and footnote if they have same offset.
|
||||||
|
# They don't exist if offset is larget than last_record.
|
||||||
|
# Todo: Determine if the subtraction is necessary and find out
|
||||||
|
# what _rec means.
|
||||||
|
end_footnote_offset = self.sidebar_offset if self.sidebar_offset != self.footnote_offset else self.last_data_offset
|
||||||
|
self.num_footnote_pages = end_footnote_offset - self.footnote_offset if self.footnote_offset < self.last_data_offset else 0
|
||||||
|
self.num_sidebar_pages = self.sidebar_offset - self.last_data_offset if self.footnote_offset < self.last_data_offset else 0
|
||||||
|
|
||||||
|
|
||||||
|
class Reader(FormatReader):
|
||||||
|
|
||||||
|
def __init__(self, header, stream, log, encoding=None):
|
||||||
|
self.log = log
|
||||||
|
self.encoding = encoding
|
||||||
|
|
||||||
|
self.sections = []
|
||||||
|
for i in range(header.num_sections):
|
||||||
|
self.sections.append(header.section_data(i))
|
||||||
|
|
||||||
|
self.header_record = HeaderRecord(self.section_data(0))
|
||||||
|
|
||||||
|
if self.header_record.version not in (2, 10):
|
||||||
|
if self.header_record.version in (260, 272):
|
||||||
|
raise DRMError('eReader DRM is not supported.')
|
||||||
|
else:
|
||||||
|
raise EreaderError('Unknown book version %i.' % self.header_record.version)
|
||||||
|
|
||||||
|
def section_data(self, number):
|
||||||
|
return self.sections[number]
|
||||||
|
|
||||||
|
def decompress_text(self, number):
|
||||||
|
if self.header_record.version == 2:
|
||||||
|
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
if self.header_record.version == 10:
|
||||||
|
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
|
||||||
|
|
||||||
|
def get_image(self, number):
|
||||||
|
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
|
||||||
|
return 'empty', ''
|
||||||
|
data = self.section_data(number)
|
||||||
|
name = data[4:4+32].strip('\0')
|
||||||
|
img = data[62:]
|
||||||
|
return name, img
|
||||||
|
|
||||||
|
def get_text_page(self, number):
|
||||||
|
'''
|
||||||
|
Only palmdoc and zlib compressed are supported. The text is
|
||||||
|
assumed to be encoded as Windows-1252. The encoding is part of
|
||||||
|
the eReader file spec and should always be this encoding.
|
||||||
|
'''
|
||||||
|
if number not in range(1, self.header_record.num_text_pages):
|
||||||
|
return ''
|
||||||
|
|
||||||
|
return self.decompress_text(number)
|
||||||
|
|
||||||
|
def get_footnote_page(self, number):
|
||||||
|
if number not in range(self.header_record.footnote_offset, self.header_record.footnote_offset + self.header_record.num_footnote_pages):
|
||||||
|
return ''
|
||||||
|
|
||||||
|
return self.decompress_text(number)
|
||||||
|
|
||||||
|
def get_sidebar_page(self, number):
|
||||||
|
if number not in range(self.header_record.sidebar_offset, self.header_record.sidebar_offset + self.header_record.num_sidebar_pages - 1):
|
||||||
|
return ''
|
||||||
|
|
||||||
|
return self.decompress_text(number)
|
||||||
|
|
||||||
|
def has_footnotes(self):
|
||||||
|
if self.header_record.num_footnote_pages > 1:
|
||||||
|
try:
|
||||||
|
content = self.decompress_text(self.header_record.footnote_offset)
|
||||||
|
|
||||||
|
if content.contains('</footnote>'):
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
def has_sidebar(self):
|
||||||
|
if self.header_record.num_sidebar_pages > 1:
|
||||||
|
try:
|
||||||
|
content = self.decompress_text(self.header_record.sidebar_offset)
|
||||||
|
|
||||||
|
if content.contains('</sidebar>'):
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
def extract_content(self, output_dir):
|
||||||
|
output_dir = os.path.abspath(output_dir)
|
||||||
|
|
||||||
|
if not os.path.exists(output_dir):
|
||||||
|
os.makedirs(output_dir)
|
||||||
|
|
||||||
|
html = '<html><head><title></title></head><body>'
|
||||||
|
|
||||||
|
for i in range(1, self.header_record.num_text_pages + 1):
|
||||||
|
self.log.debug('Extracting text page %i' % i)
|
||||||
|
html += pml_to_html(self.get_text_page(i))
|
||||||
|
|
||||||
|
# Untested: The num_.._pages variable may not be correct!
|
||||||
|
# Possibly use .._rec instead?
|
||||||
|
'''
|
||||||
|
if has_footnotes():
|
||||||
|
html += '<br /><h1>%s</h1>' % _('Footnotes')
|
||||||
|
for i in range(self.header_record.footnote_offset, self.header_record.num_footnote_pages):
|
||||||
|
self.log.debug('Extracting footnote page %i' % i)
|
||||||
|
html += footnote_to_html(self.get_footnote_page(i))
|
||||||
|
|
||||||
|
if has_sidebar():
|
||||||
|
html += '<br /><h1>%s</h1>' % _('Sidebar')
|
||||||
|
for i in range(self.header_record.sidebar_offset, self.header_record.num_sidebar_pages):
|
||||||
|
self.log.debug('Extracting sidebar page %i' % i)
|
||||||
|
html += sidebar_to_html(self.get_sidebar_page(i))
|
||||||
|
'''
|
||||||
|
|
||||||
|
html += '</body></html>'
|
||||||
|
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
with open('index.html', 'wb') as index:
|
||||||
|
self.log.debug('Writing text to index.html')
|
||||||
|
index.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
if not os.path.exists(os.path.join(output_dir, 'images/')):
|
||||||
|
os.makedirs(os.path.join(output_dir, 'images/'))
|
||||||
|
images = []
|
||||||
|
with CurrentDir(os.path.join(output_dir, 'images/')):
|
||||||
|
for i in range(0, self.header_record.num_image_pages):
|
||||||
|
name, img = self.get_image(self.header_record.image_data_offset + i)
|
||||||
|
images.append(name)
|
||||||
|
with open(name, 'wb') as imgf:
|
||||||
|
self.log.debug('Writing image %s to images/' % name)
|
||||||
|
imgf.write(img)
|
||||||
|
|
||||||
|
opf_path = self.create_opf(output_dir, images)
|
||||||
|
|
||||||
|
return opf_path
|
||||||
|
|
||||||
|
def create_opf(self, output_dir, images):
|
||||||
|
mi = MetaInformation(None, None)
|
||||||
|
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
opf = OPFCreator(output_dir, mi)
|
||||||
|
|
||||||
|
manifest = [('index.html', None)]
|
||||||
|
|
||||||
|
for i in images:
|
||||||
|
manifest.append((os.path.join('images/', i), None))
|
||||||
|
|
||||||
|
opf.create_manifest(manifest)
|
||||||
|
opf.create_spine(['index.html'])
|
||||||
|
with open('metadata.opf', 'wb') as opffile:
|
||||||
|
opf.render(opffile)
|
||||||
|
|
||||||
|
return os.path.join(output_dir, 'metadata.opf')
|
||||||
|
|
||||||
|
def dump_pml(self):
|
||||||
|
pml = ''
|
||||||
|
|
||||||
|
for i in range(1, self.header_record.num_text_pages + 1):
|
||||||
|
pml += self.get_text_page(i)
|
||||||
|
|
||||||
|
return pml
|
||||||
|
|
||||||
|
|
||||||
|
class EreaderMetadata(object):
|
||||||
|
|
||||||
|
def __init__(self, record):
|
||||||
|
pass
|
18
src/calibre/ebooks/pdb/formatreader.py
Normal file
18
src/calibre/ebooks/pdb/formatreader.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import with_statement
|
||||||
|
'''
|
||||||
|
Interface defining the necessary public functions for a pdb format reader.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
class FormatReader(object):
|
||||||
|
|
||||||
|
def __init__(self, header, stream, log, encoding=None):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def extract_content(self, output_dir):
|
||||||
|
raise NotImplementedError()
|
60
src/calibre/ebooks/pdb/header.py
Normal file
60
src/calibre/ebooks/pdb/header.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import with_statement
|
||||||
|
'''
|
||||||
|
Read the header data from a pdb file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, struct
|
||||||
|
|
||||||
|
class PdbHeader(object):
|
||||||
|
|
||||||
|
def __init__(self, stream):
|
||||||
|
self.stream = stream
|
||||||
|
self.ident = self.identity()
|
||||||
|
self.num_sections = self.section_count()
|
||||||
|
self.title = self.name()
|
||||||
|
|
||||||
|
def identity(self):
|
||||||
|
self.stream.seek(60)
|
||||||
|
ident = self.stream.read(8)
|
||||||
|
return ident
|
||||||
|
|
||||||
|
def section_count(self):
|
||||||
|
self.stream.seek(76)
|
||||||
|
return struct.unpack('>H', self.stream.read(2))[0]
|
||||||
|
|
||||||
|
def name(self):
|
||||||
|
self.stream.seek(0)
|
||||||
|
return self.stream.read(32).replace('\x00', '')
|
||||||
|
|
||||||
|
def full_section_info(self, number):
|
||||||
|
if number not in range(0, self.num_sections):
|
||||||
|
raise ValueError('Not a valid section number %i' % number)
|
||||||
|
|
||||||
|
self.stream.seek(78+number*8)
|
||||||
|
offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', self.stream.read(8))[0]
|
||||||
|
flags, val = a1, a2<<16 | a3<<8 | a4
|
||||||
|
return (offset, flags, val)
|
||||||
|
|
||||||
|
def section_offset(self, number):
|
||||||
|
if number not in range(0, self.num_sections):
|
||||||
|
raise ValueError('Not a valid section number %i' % number)
|
||||||
|
|
||||||
|
self.stream.seek(78+number*8)
|
||||||
|
return struct.unpack('>LBBBB', self.stream.read(8))[0]
|
||||||
|
|
||||||
|
def section_data(self, number):
|
||||||
|
if number not in range(0, self.num_sections):
|
||||||
|
raise ValueError('Not a valid section number %i' % number)
|
||||||
|
|
||||||
|
start = self.section_offset(number)
|
||||||
|
if number == self.num_sections -1:
|
||||||
|
end = os.stat(self.stream.name).st_size
|
||||||
|
else:
|
||||||
|
end = self.section_offset(number + 1)
|
||||||
|
self.stream.seek(start)
|
||||||
|
return self.stream.read(end - start)
|
34
src/calibre/ebooks/pdb/input.py
Normal file
34
src/calibre/ebooks/pdb/input.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
from calibre.ebooks.pdb.header import PdbHeader
|
||||||
|
from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
|
||||||
|
|
||||||
|
class PDBInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'PDB Input'
|
||||||
|
author = 'John Schember'
|
||||||
|
description = 'Convert PDB to HTML'
|
||||||
|
file_types = set(['pdb'])
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
header = PdbHeader(stream)
|
||||||
|
Reader = get_reader(header.ident)
|
||||||
|
|
||||||
|
if Reader is None:
|
||||||
|
raise PDBError('Unknown format in pdb file. Identity is %s' % header.identity)
|
||||||
|
|
||||||
|
log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))
|
||||||
|
|
||||||
|
reader = Reader(header, stream, log, options.input_encoding)
|
||||||
|
opf = reader.extract_content(os.getcwd())
|
||||||
|
|
||||||
|
return opf
|
Loading…
x
Reference in New Issue
Block a user