mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Start of plucker input support.
This commit is contained in:
parent
0114039787
commit
b857fd3fd1
@ -12,6 +12,7 @@ from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
|
||||
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
|
||||
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
|
||||
from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
|
||||
from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader
|
||||
|
||||
FORMAT_READERS = {
|
||||
'PNPdPPrs': ereader_reader,
|
||||
@ -19,6 +20,7 @@ FORMAT_READERS = {
|
||||
'zTXTGPlm': ztxt_reader,
|
||||
'TEXtREAd': palmdoc_reader,
|
||||
'.pdfADBE': pdf_reader,
|
||||
'DataPlkr': plucker_reader,
|
||||
}
|
||||
|
||||
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
|
||||
@ -37,6 +39,7 @@ IDENTITY_TO_NAME = {
|
||||
'zTXTGPlm': 'zTXT',
|
||||
'TEXtREAd': 'PalmDOC',
|
||||
'.pdfADBE': 'Adobe Reader',
|
||||
'DataPlkr': 'Plucker',
|
||||
|
||||
'BVokBDIC': 'BDicty',
|
||||
'DB99DBOS': 'DB (Database program)',
|
||||
@ -50,7 +53,6 @@ IDENTITY_TO_NAME = {
|
||||
'DATALSdb': 'LIST',
|
||||
'Mdb1Mdb1': 'MobileDB',
|
||||
'BOOKMOBI': 'MobiPocket',
|
||||
'DataPlkr': 'Plucker',
|
||||
'DataSprd': 'QuickSheet',
|
||||
'SM01SMem': 'SuperMemo',
|
||||
'TEXtTlDc': 'TealDoc',
|
||||
|
0
src/calibre/ebooks/pdb/plucker/__init__.py
Normal file
0
src/calibre/ebooks/pdb/plucker/__init__.py
Normal file
149
src/calibre/ebooks/pdb/plucker/reader.py
Normal file
149
src/calibre/ebooks/pdb/plucker/reader.py
Normal file
@ -0,0 +1,149 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
#from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '20011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
import struct
|
||||
import zlib
|
||||
|
||||
from calibre import CurrentDir
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||
|
||||
DATATYPE_PHTML = 0
|
||||
DATATYPE_PHTML_COMPRESSED = 1
|
||||
DATATYPE_TBMP = 2
|
||||
DATATYPE_TBMP_COMPRESSED = 3
|
||||
DATATYPE_MAILTO = 4
|
||||
DATATYPE_LINK_INDEX = 5
|
||||
DATATYPE_LINKS = 6
|
||||
DATATYPE_LINKS_COMPRESSED = 7
|
||||
DATATYPE_BOOKMARKS = 8
|
||||
DATATYPE_CATEGORY = 9
|
||||
DATATYPE_METADATA = 10
|
||||
DATATYPE_STYLE_SHEET = 11
|
||||
DATATYPE_FONT_PAGE = 12
|
||||
DATATYPE_TABLE = 13
|
||||
DATATYPE_TABLE_COMPRESSED = 14
|
||||
DATATYPE_COMPOSITE_IMAGE = 15
|
||||
DATATYPE_PAGELIST_METADATA = 16
|
||||
DATATYPE_SORTED_URL_INDEX = 17
|
||||
DATATYPE_SORTED_URL = 18
|
||||
DATATYPE_SORTED_URL_COMPRESSED = 19
|
||||
DATATYPE_EXT_ANCHOR_INDEX = 20
|
||||
DATATYPE_EXT_ANCHOR = 21
|
||||
DATATYPE_EXT_ANCHOR_COMPRESSED = 22
|
||||
|
||||
class HeaderRecord(object):
|
||||
|
||||
def __init__(self, raw):
|
||||
self.uid, = struct.unpack('>H', raw[0:2])
|
||||
# This is labled version in the spec.
|
||||
# 2 is ZLIB compressed,
|
||||
# 1 is DOC compressed
|
||||
self.compression, = struct.unpack('>H', raw[2:4])
|
||||
self.records, = struct.unpack('>H', raw[4:6])
|
||||
|
||||
self.reserved = {}
|
||||
for i in xrange(self.records):
|
||||
adv = 4*i
|
||||
name, = struct.unpack('>H', raw[6+adv:8+adv])
|
||||
id, = struct.unpack('>H', raw[8+adv:10+adv])
|
||||
self.reserved[id] = name
|
||||
|
||||
|
||||
class SectionHeader(object):
|
||||
|
||||
def __init__(self, raw):
|
||||
self.uid, = struct.unpack('>H', raw[0:2])
|
||||
self.paragraphs, = struct.unpack('>H', raw[2:4])
|
||||
self.size, = struct.unpack('>H', raw[4:6])
|
||||
self.type, = struct.unpack('>B', raw[6])
|
||||
self.flags, = struct.unpack('>B', raw[7])
|
||||
|
||||
|
||||
class SectionHeaderText(object):
|
||||
|
||||
def __init__(self, data_header, raw):
|
||||
self.sizes = []
|
||||
self.attributes = []
|
||||
|
||||
for i in xrange(data_header.paragraphs):
|
||||
adv = 4*i
|
||||
self.sizes.append(struct.unpack('>H', raw[8+adv:10+adv])[0])
|
||||
self.attributes.append(struct.unpack('>H', raw[10+adv:12+adv])[0])
|
||||
|
||||
|
||||
class Reader(FormatReader):
|
||||
|
||||
def __init__(self, header, stream, log, options):
|
||||
self.stream = stream
|
||||
self.log = log
|
||||
self.options = options
|
||||
|
||||
self.sections = []
|
||||
for i in range(1, header.num_sections):
|
||||
start = 8
|
||||
raw_data = header.section_data(i)
|
||||
data_header = SectionHeader(raw_data)
|
||||
sub_header = None
|
||||
if data_header.type in (DATATYPE_PHTML, DATATYPE_PHTML_COMPRESSED):
|
||||
sub_header = SectionHeaderText(data_header, raw_data)
|
||||
start += data_header.paragraphs * 4
|
||||
self.sections.append((data_header, sub_header, raw_data[start:]))
|
||||
|
||||
self.header_record = HeaderRecord(header.section_data(0))
|
||||
|
||||
from calibre.ebooks.metadata.pdb import get_metadata
|
||||
self.mi = get_metadata(stream, False)
|
||||
|
||||
def extract_content(self, output_dir):
|
||||
html = u''
|
||||
images = []
|
||||
|
||||
for header, sub_header, data in self.sections:
|
||||
if header.type == DATATYPE_PHTML:
|
||||
html += data
|
||||
elif header.type == DATATYPE_PHTML_COMPRESSED:
|
||||
d = self.decompress_phtml(data).decode('latin-1', 'replace')
|
||||
print len(d) == header.size
|
||||
html += d
|
||||
|
||||
print html
|
||||
with CurrentDir(output_dir):
|
||||
with open('index.html', 'wb') as index:
|
||||
self.log.debug('Writing text to index.html')
|
||||
index.write(html.encode('utf-8'))
|
||||
|
||||
opf_path = self.create_opf(output_dir, images)
|
||||
|
||||
return opf_path
|
||||
|
||||
def decompress_phtml(self, data):
|
||||
if self.header_record.compression == 2:
|
||||
raise NotImplementedError
|
||||
#return zlib.decompress(data)
|
||||
elif self.header_record.compression == 1:
|
||||
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||
return decompress_doc(data)
|
||||
|
||||
|
||||
def create_opf(self, output_dir, images):
|
||||
with CurrentDir(output_dir):
|
||||
opf = OPFCreator(output_dir, self.mi)
|
||||
|
||||
manifest = [('index.html', None)]
|
||||
|
||||
for i in images:
|
||||
manifest.append((os.path.join('images/', i), None))
|
||||
|
||||
opf.create_manifest(manifest)
|
||||
opf.create_spine(['index.html'])
|
||||
with open('metadata.opf', 'wb') as opffile:
|
||||
opf.render(opffile)
|
||||
|
||||
return os.path.join(output_dir, 'metadata.opf')
|
Loading…
x
Reference in New Issue
Block a user