mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
eReader Metadata: Extract cover.
This commit is contained in:
parent
5574f36c75
commit
56fd576055
@ -16,6 +16,18 @@ from calibre.ebooks.pdb.ereader.reader132 import HeaderRecord
|
|||||||
from calibre.ebooks.pdb.header import PdbHeaderBuilder
|
from calibre.ebooks.pdb.header import PdbHeaderBuilder
|
||||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
|
|
||||||
|
def get_cover(pheader, eheader):
|
||||||
|
cover_data = None
|
||||||
|
|
||||||
|
for i in range(eheader.image_count):
|
||||||
|
raw = pheader.section_data(eheader.image_data_offset + i)
|
||||||
|
|
||||||
|
if raw[4:4 + 32].strip('\x00') == 'cover.png':
|
||||||
|
cover_data = raw[62:]
|
||||||
|
break
|
||||||
|
|
||||||
|
return ('png', cover_data)
|
||||||
|
|
||||||
def get_metadata(stream, extract_cover=True):
|
def get_metadata(stream, extract_cover=True):
|
||||||
"""
|
"""
|
||||||
Return metadata as a L{MetaInfo} object
|
Return metadata as a L{MetaInfo} object
|
||||||
@ -29,7 +41,7 @@ def get_metadata(stream, extract_cover=True):
|
|||||||
if len(pheader.section_data(0)) == 132:
|
if len(pheader.section_data(0)) == 132:
|
||||||
hr = HeaderRecord(pheader.section_data(0))
|
hr = HeaderRecord(pheader.section_data(0))
|
||||||
|
|
||||||
if hr.version in (2, 10) and hr.has_metadata == 1:
|
if hr.compression in (2, 10) and hr.has_metadata == 1:
|
||||||
try:
|
try:
|
||||||
mdata = pheader.section_data(hr.metadata_offset)
|
mdata = pheader.section_data(hr.metadata_offset)
|
||||||
|
|
||||||
@ -41,6 +53,9 @@ def get_metadata(stream, extract_cover=True):
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
if extract_cover:
|
||||||
|
mi.cover_data = get_cover(pheader, hr)
|
||||||
|
|
||||||
if not mi.title:
|
if not mi.title:
|
||||||
mi.title = pheader.title if pheader.title else _('Unknown')
|
mi.title = pheader.title if pheader.title else _('Unknown')
|
||||||
|
|
||||||
@ -56,7 +71,7 @@ def set_metadata(stream, mi):
|
|||||||
sections = [pheader.section_data(x) for x in range(0, pheader.section_count())]
|
sections = [pheader.section_data(x) for x in range(0, pheader.section_count())]
|
||||||
hr = HeaderRecord(sections[0])
|
hr = HeaderRecord(sections[0])
|
||||||
|
|
||||||
if hr.version not in (2, 10):
|
if hr.compression not in (2, 10):
|
||||||
return
|
return
|
||||||
|
|
||||||
# Create a metadata record for the file if one does not alreay exist
|
# Create a metadata record for the file if one does not alreay exist
|
||||||
|
@ -29,12 +29,19 @@ class HeaderRecord(object):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, raw):
|
def __init__(self, raw):
|
||||||
self.version, = struct.unpack('>H', raw[0:2])
|
self.compression, = struct.unpack('>H', raw[0:2])
|
||||||
self.non_text_offset, = struct.unpack('>H', raw[12:14])
|
self.non_text_offset, = struct.unpack('>H', raw[12:14])
|
||||||
|
self.chapter_count, = struct.unpack('>H', raw[14:16])
|
||||||
|
self.image_count, = struct.unpack('>H', raw[20:22])
|
||||||
|
self.link_count, = struct.unpack('>H', raw[22:24])
|
||||||
self.has_metadata, = struct.unpack('>H', raw[24:26])
|
self.has_metadata, = struct.unpack('>H', raw[24:26])
|
||||||
self.footnote_rec, = struct.unpack('>H', raw[28:30])
|
self.footnote_count, = struct.unpack('>H', raw[28:30])
|
||||||
self.sidebar_rec, = struct.unpack('>H', raw[30:32])
|
self.sidebar_count, = struct.unpack('>H', raw[30:32])
|
||||||
|
self.chapter_offset, = struct.unpack('>H', raw[32:34])
|
||||||
|
self.small_font_page_offset, = struct.unpack('>H', raw[36:38])
|
||||||
|
self.large_font_page_offset, = struct.unpack('>H', raw[38:40])
|
||||||
self.image_data_offset, = struct.unpack('>H', raw[40:42])
|
self.image_data_offset, = struct.unpack('>H', raw[40:42])
|
||||||
|
self.link_offset, = struct.unpack('>H', raw[42:44])
|
||||||
self.metadata_offset, = struct.unpack('>H', raw[44:46])
|
self.metadata_offset, = struct.unpack('>H', raw[44:46])
|
||||||
self.footnote_offset, = struct.unpack('>H', raw[48:50])
|
self.footnote_offset, = struct.unpack('>H', raw[48:50])
|
||||||
self.sidebar_offset, = struct.unpack('>H', raw[50:52])
|
self.sidebar_offset, = struct.unpack('>H', raw[50:52])
|
||||||
@ -58,11 +65,11 @@ class Reader132(FormatReader):
|
|||||||
|
|
||||||
self.header_record = HeaderRecord(self.section_data(0))
|
self.header_record = HeaderRecord(self.section_data(0))
|
||||||
|
|
||||||
if self.header_record.version not in (2, 10):
|
if self.header_record.compression not in (2, 10):
|
||||||
if self.header_record.version in (260, 272):
|
if self.header_record.compression in (260, 272):
|
||||||
raise DRMError('eReader DRM is not supported.')
|
raise DRMError('eReader DRM is not supported.')
|
||||||
else:
|
else:
|
||||||
raise EreaderError('Unknown book version %i.' % self.header_record.version)
|
raise EreaderError('Unknown book compression %i.' % self.header_record.compression)
|
||||||
|
|
||||||
from calibre.ebooks.metadata.pdb import get_metadata
|
from calibre.ebooks.metadata.pdb import get_metadata
|
||||||
self.mi = get_metadata(stream, False)
|
self.mi = get_metadata(stream, False)
|
||||||
@ -71,9 +78,9 @@ class Reader132(FormatReader):
|
|||||||
return self.sections[number]
|
return self.sections[number]
|
||||||
|
|
||||||
def decompress_text(self, number):
|
def decompress_text(self, number):
|
||||||
if self.header_record.version == 2:
|
if self.header_record.compression == 2:
|
||||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
if self.header_record.version == 10:
|
if self.header_record.compression == 10:
|
||||||
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
|
|
||||||
def get_image(self, number):
|
def get_image(self, number):
|
||||||
@ -115,19 +122,19 @@ class Reader132(FormatReader):
|
|||||||
pml += self.get_text_page(i)
|
pml += self.get_text_page(i)
|
||||||
html += pml_to_html(pml)
|
html += pml_to_html(pml)
|
||||||
|
|
||||||
if self.header_record.footnote_rec > 0:
|
if self.header_record.footnote_count > 0:
|
||||||
html += '<br /><h1>%s</h1>' % _('Footnotes')
|
html += '<br /><h1>%s</h1>' % _('Footnotes')
|
||||||
footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding))
|
footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding))
|
||||||
for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_rec)):
|
for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_count)):
|
||||||
self.log.debug('Extracting footnote page %i' % i)
|
self.log.debug('Extracting footnote page %i' % i)
|
||||||
html += '<dl>'
|
html += '<dl>'
|
||||||
html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i))
|
html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i))
|
||||||
html += '</dl>'
|
html += '</dl>'
|
||||||
|
|
||||||
if self.header_record.sidebar_rec > 0:
|
if self.header_record.sidebar_count > 0:
|
||||||
html += '<br /><h1>%s</h1>' % _('Sidebar')
|
html += '<br /><h1>%s</h1>' % _('Sidebar')
|
||||||
sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
|
sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
|
||||||
for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_rec)):
|
for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_count)):
|
||||||
self.log.debug('Extracting sidebar page %i' % i)
|
self.log.debug('Extracting sidebar page %i' % i)
|
||||||
html += '<dl>'
|
html += '<dl>'
|
||||||
html += footnote_sidebar_to_html(sidebarids[sid], self.decompress_text(i))
|
html += footnote_sidebar_to_html(sidebarids[sid], self.decompress_text(i))
|
||||||
|
@ -27,20 +27,20 @@ class PMLInput(InputFormatPlugin):
|
|||||||
def process_pml(self, pml_path, html_path, close_all=False):
|
def process_pml(self, pml_path, html_path, close_all=False):
|
||||||
pclose = False
|
pclose = False
|
||||||
hclose = False
|
hclose = False
|
||||||
|
|
||||||
if not hasattr(pml_path, 'read'):
|
if not hasattr(pml_path, 'read'):
|
||||||
pml_stream = open(pml_path, 'rb')
|
pml_stream = open(pml_path, 'rb')
|
||||||
pclose = True
|
pclose = True
|
||||||
else:
|
else:
|
||||||
pml_stream = pml_path
|
pml_stream = pml_path
|
||||||
pml_stream.seek(0)
|
pml_stream.seek(0)
|
||||||
|
|
||||||
if not hasattr(html_path, 'write'):
|
if not hasattr(html_path, 'write'):
|
||||||
html_stream = open(html_path, 'wb')
|
html_stream = open(html_path, 'wb')
|
||||||
hclose = True
|
hclose = True
|
||||||
else:
|
else:
|
||||||
html_stream = html_path
|
html_stream = html_path
|
||||||
|
|
||||||
ienc = pml_stream.encoding if pml_stream.encoding else 'cp1252'
|
ienc = pml_stream.encoding if pml_stream.encoding else 'cp1252'
|
||||||
if self.options.input_encoding:
|
if self.options.input_encoding:
|
||||||
ienc = self.options.input_encoding
|
ienc = self.options.input_encoding
|
||||||
@ -95,12 +95,12 @@ class PMLInput(InputFormatPlugin):
|
|||||||
with TemporaryDirectory('_unpmlz') as tdir:
|
with TemporaryDirectory('_unpmlz') as tdir:
|
||||||
zf = ZipFile(stream)
|
zf = ZipFile(stream)
|
||||||
zf.extractall(tdir)
|
zf.extractall(tdir)
|
||||||
|
|
||||||
pmls = glob.glob(os.path.join(tdir, '*.pml'))
|
pmls = glob.glob(os.path.join(tdir, '*.pml'))
|
||||||
for pml in pmls:
|
for pml in pmls:
|
||||||
html_name = os.path.splitext(os.path.basename(pml))[0]+'.html'
|
html_name = os.path.splitext(os.path.basename(pml))[0]+'.html'
|
||||||
html_path = os.path.join(os.getcwd(), html_name)
|
html_path = os.path.join(os.getcwd(), html_name)
|
||||||
|
|
||||||
pages.append(html_name)
|
pages.append(html_name)
|
||||||
log.debug('Processing PML item %s...' % pml)
|
log.debug('Processing PML item %s...' % pml)
|
||||||
ttoc = self.process_pml(pml, html_path)
|
ttoc = self.process_pml(pml, html_path)
|
||||||
@ -119,7 +119,7 @@ class PMLInput(InputFormatPlugin):
|
|||||||
manifest_items = []
|
manifest_items = []
|
||||||
for item in pages+images:
|
for item in pages+images:
|
||||||
manifest_items.append((item, None))
|
manifest_items.append((item, None))
|
||||||
|
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
log.debug('Reading metadata from input file...')
|
log.debug('Reading metadata from input file...')
|
||||||
mi = get_metadata(stream, 'pml')
|
mi = get_metadata(stream, 'pml')
|
||||||
@ -133,5 +133,5 @@ class PMLInput(InputFormatPlugin):
|
|||||||
with open('metadata.opf', 'wb') as opffile:
|
with open('metadata.opf', 'wb') as opffile:
|
||||||
with open('toc.ncx', 'wb') as tocfile:
|
with open('toc.ncx', 'wb') as tocfile:
|
||||||
opf.render(opffile, tocfile, 'toc.ncx')
|
opf.render(opffile, tocfile, 'toc.ncx')
|
||||||
|
|
||||||
return os.path.join(os.getcwd(), 'metadata.opf')
|
return os.path.join(os.getcwd(), 'metadata.opf')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user