mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from driver dev
This commit is contained in:
commit
4afc1a7106
@ -29,7 +29,7 @@ def get_metadata(stream, extract_cover=True):
|
|||||||
MetadataReader = MREADER.get(pheader.ident, None)
|
MetadataReader = MREADER.get(pheader.ident, None)
|
||||||
|
|
||||||
if MetadataReader is None:
|
if MetadataReader is None:
|
||||||
return MetaInformation(_('Unknown'), [_('Unknown')])
|
return MetaInformation(pheader.title, [_('Unknown')])
|
||||||
|
|
||||||
|
|
||||||
return MetadataReader(stream, extract_cover)
|
return MetadataReader(stream, extract_cover)
|
||||||
|
@ -6,15 +6,46 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from calibre.ebooks.pdb.ereader.reader import Reader as eReader
|
from calibre.ebooks.pdb.ereader.reader import Reader as eReader
|
||||||
|
from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT
|
||||||
|
from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc
|
||||||
|
|
||||||
FORMATS = {
|
FORMATS = {
|
||||||
'PNPdPPrs' : eReader,
|
'PNPdPPrs' : eReader,
|
||||||
'PNRdPPrs' : eReader,
|
'PNRdPPrs' : eReader,
|
||||||
|
'zTXTGPlm' : zTXT,
|
||||||
|
'TEXtREAd' : PalmDoc,
|
||||||
}
|
}
|
||||||
|
|
||||||
IDENTITY_TO_NAME = {
|
IDENTITY_TO_NAME = {
|
||||||
'PNPdPPrs' : 'eReader',
|
'PNPdPPrs' : 'eReader',
|
||||||
'PNRdPPrs' : 'eReader',
|
'PNRdPPrs' : 'eReader',
|
||||||
|
'zTXTGPlm' : 'zTXT',
|
||||||
|
'TEXtREAd' : 'PalmDOC',
|
||||||
|
|
||||||
|
'.pdfADBE' : 'Adobe Reader',
|
||||||
|
'BVokBDIC' : 'BDicty',
|
||||||
|
'DB99DBOS' : 'DB (Database program)',
|
||||||
|
'vIMGView' : 'FireViewer (ImageViewer)',
|
||||||
|
'PmDBPmDB' : 'HanDBase',
|
||||||
|
'InfoINDB' : 'InfoView',
|
||||||
|
'ToGoToGo' : 'iSilo',
|
||||||
|
'SDocSilX' : 'iSilo 3',
|
||||||
|
'JbDbJBas' : 'JFile',
|
||||||
|
'JfDbJFil' : 'JFile Pro',
|
||||||
|
'DATALSdb' : 'LIST',
|
||||||
|
'Mdb1Mdb1' : 'MobileDB',
|
||||||
|
'BOOKMOBI' : 'MobiPocket',
|
||||||
|
'DataPlkr' : 'Plucker',
|
||||||
|
'DataSprd' : 'QuickSheet',
|
||||||
|
'SM01SMem' : 'SuperMemo',
|
||||||
|
'TEXtTlDc' : 'TealDoc',
|
||||||
|
'InfoTlIf' : 'TealInfo',
|
||||||
|
'DataTlMl' : 'TealMeal',
|
||||||
|
'DataTlPt' : 'TealPaint',
|
||||||
|
'dataTDBP' : 'ThinkDB',
|
||||||
|
'TdatTide' : 'Tides',
|
||||||
|
'ToRaTRPW' : 'TomeRaider',
|
||||||
|
'BDOCWrdS' : 'WordSmith',
|
||||||
}
|
}
|
||||||
|
|
||||||
class PDBError(Exception):
|
class PDBError(Exception):
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -16,7 +16,7 @@ from calibre.ebooks.metadata import MetaInformation
|
|||||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
from calibre.ebooks.pdb.ereader import EreaderError
|
from calibre.ebooks.pdb.ereader import EreaderError
|
||||||
from calibre.ebooks.pml.pmlconverter import pml_to_html, \
|
from calibre.ebooks.pml.pmlconverter import pml_to_html, \
|
||||||
footnote_sidebar_to_html
|
footnote_sidebar_to_html
|
||||||
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ class PDBInput(InputFormatPlugin):
|
|||||||
Reader = get_reader(header.ident)
|
Reader = get_reader(header.ident)
|
||||||
|
|
||||||
if Reader is None:
|
if Reader is None:
|
||||||
raise PDBError('Unknown format in pdb file. Identity is %s' % header.identity)
|
raise PDBError('No reader avaliable for format within container.\n Identity is %s. Book type is %s' % (header.ident, IDENTITY_TO_NAME.get(header.ident, _('Unknown'))))
|
||||||
|
|
||||||
log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))
|
log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))
|
||||||
|
|
||||||
|
0
src/calibre/ebooks/pdb/palmdoc/__init__.py
Normal file
0
src/calibre/ebooks/pdb/palmdoc/__init__.py
Normal file
70
src/calibre/ebooks/pdb/palmdoc/reader.py
Normal file
70
src/calibre/ebooks/pdb/palmdoc/reader.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
'''
|
||||||
|
Read content from palmdoc pdb file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, struct, zlib
|
||||||
|
|
||||||
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
|
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
||||||
|
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
|
||||||
|
|
||||||
|
class HeaderRecord(object):
|
||||||
|
'''
|
||||||
|
The first record in the file is always the header record. It holds
|
||||||
|
information related to the location of text, images, and so on
|
||||||
|
in the file. This is used in conjunction with the sections
|
||||||
|
defined in the file header.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.compression, = struct.unpack('>H', raw[0:2])
|
||||||
|
self.num_records, = struct.unpack('>H', raw[8:10])
|
||||||
|
|
||||||
|
|
||||||
|
class Reader(FormatReader):
|
||||||
|
|
||||||
|
def __init__(self, header, stream, log, encoding=None):
|
||||||
|
self.stream = stream
|
||||||
|
self.log = log
|
||||||
|
self.encoding = encoding
|
||||||
|
|
||||||
|
self.sections = []
|
||||||
|
for i in range(header.num_sections):
|
||||||
|
self.sections.append(header.section_data(i))
|
||||||
|
|
||||||
|
self.header_record = HeaderRecord(self.section_data(0))
|
||||||
|
|
||||||
|
def section_data(self, number):
|
||||||
|
return self.sections[number]
|
||||||
|
|
||||||
|
def decompress_text(self, number):
|
||||||
|
if self.header_record.compression == 1:
|
||||||
|
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
if self.header_record.compression == 2:
|
||||||
|
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def extract_content(self, output_dir):
|
||||||
|
txt = ''
|
||||||
|
|
||||||
|
for i in range(1, self.header_record.num_records + 1):
|
||||||
|
txt += self.decompress_text(i)
|
||||||
|
|
||||||
|
html = txt_to_markdown(txt)
|
||||||
|
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||||
|
index.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
mi = get_metadata(self.stream, 'pdb')
|
||||||
|
manifest = [('index.html', None)]
|
||||||
|
spine = ['index.html']
|
||||||
|
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
|
||||||
|
|
||||||
|
return os.path.join(output_dir, 'metadata.opf')
|
||||||
|
|
11
src/calibre/ebooks/pdb/ztxt/__init__.py
Normal file
11
src/calibre/ebooks/pdb/ztxt/__init__.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
class zTXTError(Exception):
|
||||||
|
pass
|
||||||
|
|
74
src/calibre/ebooks/pdb/ztxt/reader.py
Normal file
74
src/calibre/ebooks/pdb/ztxt/reader.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
'''
|
||||||
|
Read content from ztxt pdb file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, struct, zlib
|
||||||
|
|
||||||
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
|
from calibre.ebooks.pdb.ztxt import zTXTError
|
||||||
|
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
|
||||||
|
|
||||||
|
class HeaderRecord(object):
|
||||||
|
'''
|
||||||
|
The first record in the file is always the header record. It holds
|
||||||
|
information related to the location of text, images, and so on
|
||||||
|
in the file. This is used in conjunction with the sections
|
||||||
|
defined in the file header.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.version, = struct.unpack('>H', raw[0:2])
|
||||||
|
self.num_records, = struct.unpack('>H', raw[2:4])
|
||||||
|
self.size, = struct.unpack('>L', raw[4:8])
|
||||||
|
self.record_size, = struct.unpack('>H', raw[8:10])
|
||||||
|
|
||||||
|
|
||||||
|
class Reader(FormatReader):
|
||||||
|
|
||||||
|
def __init__(self, header, stream, log, encoding=None):
|
||||||
|
self.stream = stream
|
||||||
|
self.log = log
|
||||||
|
self.encoding = encoding
|
||||||
|
|
||||||
|
self.sections = []
|
||||||
|
for i in range(header.num_sections):
|
||||||
|
self.sections.append(header.section_data(i))
|
||||||
|
|
||||||
|
self.header_record = HeaderRecord(self.section_data(0))
|
||||||
|
|
||||||
|
# Initalize the decompressor
|
||||||
|
self.uncompressor = zlib.decompressobj()
|
||||||
|
self.uncompressor.decompress(self.section_data(1))
|
||||||
|
|
||||||
|
def section_data(self, number):
|
||||||
|
return self.sections[number]
|
||||||
|
|
||||||
|
def decompress_text(self, number):
|
||||||
|
if number == 1:
|
||||||
|
self.uncompressor = zlib.decompressobj()
|
||||||
|
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
|
||||||
|
def extract_content(self, output_dir):
|
||||||
|
txt = ''
|
||||||
|
|
||||||
|
for i in range(1, self.header_record.num_records + 1):
|
||||||
|
txt += self.decompress_text(i)
|
||||||
|
|
||||||
|
html = txt_to_markdown(txt)
|
||||||
|
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||||
|
index.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
mi = get_metadata(self.stream, 'pdb')
|
||||||
|
manifest = [('index.html', None)]
|
||||||
|
spine = ['index.html']
|
||||||
|
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
|
||||||
|
|
||||||
|
return os.path.join(output_dir, 'metadata.opf')
|
||||||
|
|
@ -62,12 +62,12 @@ class PDFOutput(OutputFormatPlugin):
|
|||||||
self.write(ImagePDFWriter, images)
|
self.write(ImagePDFWriter, images)
|
||||||
|
|
||||||
def convert_text(self, oeb_book):
|
def convert_text(self, oeb_book):
|
||||||
with TemporaryDirectory('_pdf_out') as oebdir:
|
with TemporaryDirectory('_pdf_out') as oeb_dir:
|
||||||
from calibre.customize.ui import plugin_for_output_format
|
from calibre.customize.ui import plugin_for_output_format
|
||||||
oeb_output = plugin_for_output_format('oeb')
|
oeb_output = plugin_for_output_format('oeb')
|
||||||
oeb_output.convert(oeb, oeb_dir, self.input_plugin, self.opts, self.log)
|
oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log)
|
||||||
|
|
||||||
opfpath = glob.glob(os.path.join(oebdir, '*.opf'))[0]
|
opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0]
|
||||||
opf = OPF(opfpath, os.path.dirname(opfpath))
|
opf = OPF(opfpath, os.path.dirname(opfpath))
|
||||||
|
|
||||||
self.write(PDFWriter, [s.path for s in opf.spine])
|
self.write(PDFWriter, [s.path for s in opf.spine])
|
||||||
|
@ -8,8 +8,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ebooks.markdown import markdown
|
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
|
||||||
|
|
||||||
class TXTInput(InputFormatPlugin):
|
class TXTInput(InputFormatPlugin):
|
||||||
|
|
||||||
@ -25,19 +24,15 @@ class TXTInput(InputFormatPlugin):
|
|||||||
ienc = options.input_encoding
|
ienc = options.input_encoding
|
||||||
txt = stream.read().decode(ienc)
|
txt = stream.read().decode(ienc)
|
||||||
|
|
||||||
md = markdown.Markdown(
|
html = txt_to_markdown(txt)
|
||||||
extensions=['footnotes', 'tables', 'toc'],
|
|
||||||
safe_mode=False,)
|
|
||||||
html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>'
|
|
||||||
with open('index.html', 'wb') as index:
|
with open('index.html', 'wb') as index:
|
||||||
index.write(html.encode('utf-8'))
|
index.write(html.encode('utf-8'))
|
||||||
|
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
mi = get_metadata(stream, 'txt')
|
mi = get_metadata(stream, 'txt')
|
||||||
opf = OPFCreator(os.getcwd(), mi)
|
manifest = [('index.html', None)]
|
||||||
opf.create_manifest([('index.html', None)])
|
spine = ['index.html']
|
||||||
opf.create_spine(['index.html'])
|
opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)
|
||||||
with open('metadata.opf', 'wb') as opffile:
|
|
||||||
opf.render(opffile)
|
|
||||||
|
|
||||||
return os.path.join(os.getcwd(), 'metadata.opf')
|
return os.path.join(os.getcwd(), 'metadata.opf')
|
||||||
|
|
||||||
|
30
src/calibre/ebooks/txt/processor.py
Normal file
30
src/calibre/ebooks/txt/processor.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
'''
|
||||||
|
Read content from txt file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from calibre.ebooks.markdown import markdown
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
def txt_to_markdown(txt):
|
||||||
|
md = markdown.Markdown(
|
||||||
|
extensions=['footnotes', 'tables', 'toc'],
|
||||||
|
safe_mode=False,)
|
||||||
|
html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>'
|
||||||
|
|
||||||
|
return html
|
||||||
|
|
||||||
|
def opf_writer(path, opf_name, manifest, spine, mi):
|
||||||
|
opf = OPFCreator(path, mi)
|
||||||
|
opf.create_manifest(manifest)
|
||||||
|
opf.create_spine(spine)
|
||||||
|
with open(os.path.join(path, opf_name), 'wb') as opffile:
|
||||||
|
opf.render(opffile)
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user