mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from driver dev
This commit is contained in:
commit
4afc1a7106
@ -29,7 +29,7 @@ def get_metadata(stream, extract_cover=True):
|
||||
MetadataReader = MREADER.get(pheader.ident, None)
|
||||
|
||||
if MetadataReader is None:
|
||||
return MetaInformation(_('Unknown'), [_('Unknown')])
|
||||
return MetaInformation(pheader.title, [_('Unknown')])
|
||||
|
||||
|
||||
return MetadataReader(stream, extract_cover)
|
||||
|
@ -6,15 +6,46 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.ebooks.pdb.ereader.reader import Reader as eReader
|
||||
from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT
|
||||
from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc
|
||||
|
||||
FORMATS = {
|
||||
'PNPdPPrs' : eReader,
|
||||
'PNRdPPrs' : eReader,
|
||||
'zTXTGPlm' : zTXT,
|
||||
'TEXtREAd' : PalmDoc,
|
||||
}
|
||||
|
||||
IDENTITY_TO_NAME = {
|
||||
'PNPdPPrs' : 'eReader',
|
||||
'PNRdPPrs' : 'eReader',
|
||||
'zTXTGPlm' : 'zTXT',
|
||||
'TEXtREAd' : 'PalmDOC',
|
||||
|
||||
'.pdfADBE' : 'Adobe Reader',
|
||||
'BVokBDIC' : 'BDicty',
|
||||
'DB99DBOS' : 'DB (Database program)',
|
||||
'vIMGView' : 'FireViewer (ImageViewer)',
|
||||
'PmDBPmDB' : 'HanDBase',
|
||||
'InfoINDB' : 'InfoView',
|
||||
'ToGoToGo' : 'iSilo',
|
||||
'SDocSilX' : 'iSilo 3',
|
||||
'JbDbJBas' : 'JFile',
|
||||
'JfDbJFil' : 'JFile Pro',
|
||||
'DATALSdb' : 'LIST',
|
||||
'Mdb1Mdb1' : 'MobileDB',
|
||||
'BOOKMOBI' : 'MobiPocket',
|
||||
'DataPlkr' : 'Plucker',
|
||||
'DataSprd' : 'QuickSheet',
|
||||
'SM01SMem' : 'SuperMemo',
|
||||
'TEXtTlDc' : 'TealDoc',
|
||||
'InfoTlIf' : 'TealInfo',
|
||||
'DataTlMl' : 'TealMeal',
|
||||
'DataTlPt' : 'TealPaint',
|
||||
'dataTDBP' : 'ThinkDB',
|
||||
'TdatTide' : 'Tides',
|
||||
'ToRaTRPW' : 'TomeRaider',
|
||||
'BDOCWrdS' : 'WordSmith',
|
||||
}
|
||||
|
||||
class PDBError(Exception):
|
||||
|
@ -1,5 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
|
@ -24,7 +24,7 @@ class PDBInput(InputFormatPlugin):
|
||||
Reader = get_reader(header.ident)
|
||||
|
||||
if Reader is None:
|
||||
raise PDBError('Unknown format in pdb file. Identity is %s' % header.identity)
|
||||
raise PDBError('No reader avaliable for format within container.\n Identity is %s. Book type is %s' % (header.ident, IDENTITY_TO_NAME.get(header.ident, _('Unknown'))))
|
||||
|
||||
log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))
|
||||
|
||||
|
0
src/calibre/ebooks/pdb/palmdoc/__init__.py
Normal file
0
src/calibre/ebooks/pdb/palmdoc/__init__.py
Normal file
70
src/calibre/ebooks/pdb/palmdoc/reader.py
Normal file
70
src/calibre/ebooks/pdb/palmdoc/reader.py
Normal file
@ -0,0 +1,70 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
Read content from palmdoc pdb file.
|
||||
'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, struct, zlib
|
||||
|
||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
||||
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
|
||||
|
||||
class HeaderRecord(object):
|
||||
'''
|
||||
The first record in the file is always the header record. It holds
|
||||
information related to the location of text, images, and so on
|
||||
in the file. This is used in conjunction with the sections
|
||||
defined in the file header.
|
||||
'''
|
||||
|
||||
def __init__(self, raw):
|
||||
self.compression, = struct.unpack('>H', raw[0:2])
|
||||
self.num_records, = struct.unpack('>H', raw[8:10])
|
||||
|
||||
|
||||
class Reader(FormatReader):
|
||||
|
||||
def __init__(self, header, stream, log, encoding=None):
|
||||
self.stream = stream
|
||||
self.log = log
|
||||
self.encoding = encoding
|
||||
|
||||
self.sections = []
|
||||
for i in range(header.num_sections):
|
||||
self.sections.append(header.section_data(i))
|
||||
|
||||
self.header_record = HeaderRecord(self.section_data(0))
|
||||
|
||||
def section_data(self, number):
|
||||
return self.sections[number]
|
||||
|
||||
def decompress_text(self, number):
|
||||
if self.header_record.compression == 1:
|
||||
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
if self.header_record.compression == 2:
|
||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
return ''
|
||||
|
||||
def extract_content(self, output_dir):
|
||||
txt = ''
|
||||
|
||||
for i in range(1, self.header_record.num_records + 1):
|
||||
txt += self.decompress_text(i)
|
||||
|
||||
html = txt_to_markdown(txt)
|
||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||
index.write(html.encode('utf-8'))
|
||||
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
mi = get_metadata(self.stream, 'pdb')
|
||||
manifest = [('index.html', None)]
|
||||
spine = ['index.html']
|
||||
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
|
||||
|
||||
return os.path.join(output_dir, 'metadata.opf')
|
||||
|
11
src/calibre/ebooks/pdb/ztxt/__init__.py
Normal file
11
src/calibre/ebooks/pdb/ztxt/__init__.py
Normal file
@ -0,0 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
class zTXTError(Exception):
|
||||
pass
|
||||
|
74
src/calibre/ebooks/pdb/ztxt/reader.py
Normal file
74
src/calibre/ebooks/pdb/ztxt/reader.py
Normal file
@ -0,0 +1,74 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
Read content from ztxt pdb file.
|
||||
'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, struct, zlib
|
||||
|
||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||
from calibre.ebooks.pdb.ztxt import zTXTError
|
||||
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
|
||||
|
||||
class HeaderRecord(object):
|
||||
'''
|
||||
The first record in the file is always the header record. It holds
|
||||
information related to the location of text, images, and so on
|
||||
in the file. This is used in conjunction with the sections
|
||||
defined in the file header.
|
||||
'''
|
||||
|
||||
def __init__(self, raw):
|
||||
self.version, = struct.unpack('>H', raw[0:2])
|
||||
self.num_records, = struct.unpack('>H', raw[2:4])
|
||||
self.size, = struct.unpack('>L', raw[4:8])
|
||||
self.record_size, = struct.unpack('>H', raw[8:10])
|
||||
|
||||
|
||||
class Reader(FormatReader):
|
||||
|
||||
def __init__(self, header, stream, log, encoding=None):
|
||||
self.stream = stream
|
||||
self.log = log
|
||||
self.encoding = encoding
|
||||
|
||||
self.sections = []
|
||||
for i in range(header.num_sections):
|
||||
self.sections.append(header.section_data(i))
|
||||
|
||||
self.header_record = HeaderRecord(self.section_data(0))
|
||||
|
||||
# Initalize the decompressor
|
||||
self.uncompressor = zlib.decompressobj()
|
||||
self.uncompressor.decompress(self.section_data(1))
|
||||
|
||||
def section_data(self, number):
|
||||
return self.sections[number]
|
||||
|
||||
def decompress_text(self, number):
|
||||
if number == 1:
|
||||
self.uncompressor = zlib.decompressobj()
|
||||
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
|
||||
def extract_content(self, output_dir):
|
||||
txt = ''
|
||||
|
||||
for i in range(1, self.header_record.num_records + 1):
|
||||
txt += self.decompress_text(i)
|
||||
|
||||
html = txt_to_markdown(txt)
|
||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||
index.write(html.encode('utf-8'))
|
||||
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
mi = get_metadata(self.stream, 'pdb')
|
||||
manifest = [('index.html', None)]
|
||||
spine = ['index.html']
|
||||
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
|
||||
|
||||
return os.path.join(output_dir, 'metadata.opf')
|
||||
|
@ -62,12 +62,12 @@ class PDFOutput(OutputFormatPlugin):
|
||||
self.write(ImagePDFWriter, images)
|
||||
|
||||
def convert_text(self, oeb_book):
|
||||
with TemporaryDirectory('_pdf_out') as oebdir:
|
||||
with TemporaryDirectory('_pdf_out') as oeb_dir:
|
||||
from calibre.customize.ui import plugin_for_output_format
|
||||
oeb_output = plugin_for_output_format('oeb')
|
||||
oeb_output.convert(oeb, oeb_dir, self.input_plugin, self.opts, self.log)
|
||||
oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log)
|
||||
|
||||
opfpath = glob.glob(os.path.join(oebdir, '*.opf'))[0]
|
||||
opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0]
|
||||
opf = OPF(opfpath, os.path.dirname(opfpath))
|
||||
|
||||
self.write(PDFWriter, [s.path for s in opf.spine])
|
||||
|
@ -8,8 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.markdown import markdown
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
|
||||
|
||||
class TXTInput(InputFormatPlugin):
|
||||
|
||||
@ -25,19 +24,15 @@ class TXTInput(InputFormatPlugin):
|
||||
ienc = options.input_encoding
|
||||
txt = stream.read().decode(ienc)
|
||||
|
||||
md = markdown.Markdown(
|
||||
extensions=['footnotes', 'tables', 'toc'],
|
||||
safe_mode=False,)
|
||||
html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>'
|
||||
html = txt_to_markdown(txt)
|
||||
with open('index.html', 'wb') as index:
|
||||
index.write(html.encode('utf-8'))
|
||||
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
mi = get_metadata(stream, 'txt')
|
||||
opf = OPFCreator(os.getcwd(), mi)
|
||||
opf.create_manifest([('index.html', None)])
|
||||
opf.create_spine(['index.html'])
|
||||
with open('metadata.opf', 'wb') as opffile:
|
||||
opf.render(opffile)
|
||||
manifest = [('index.html', None)]
|
||||
spine = ['index.html']
|
||||
opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)
|
||||
|
||||
return os.path.join(os.getcwd(), 'metadata.opf')
|
||||
|
||||
|
30
src/calibre/ebooks/txt/processor.py
Normal file
30
src/calibre/ebooks/txt/processor.py
Normal file
@ -0,0 +1,30 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
Read content from txt file.
|
||||
'''
|
||||
|
||||
import os
|
||||
|
||||
from calibre.ebooks.markdown import markdown
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
def txt_to_markdown(txt):
|
||||
md = markdown.Markdown(
|
||||
extensions=['footnotes', 'tables', 'toc'],
|
||||
safe_mode=False,)
|
||||
html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>'
|
||||
|
||||
return html
|
||||
|
||||
def opf_writer(path, opf_name, manifest, spine, mi):
|
||||
opf = OPFCreator(path, mi)
|
||||
opf.create_manifest(manifest)
|
||||
opf.create_spine(spine)
|
||||
with open(os.path.join(path, opf_name), 'wb') as opffile:
|
||||
opf.render(opffile)
|
||||
|
Loading…
x
Reference in New Issue
Block a user