EPUB metadata: Extract the raster cover image from malformed EPUB files that specify the href instead of the id in their <meta name=cover> OPF entry

This commit is contained in:
Kovid Goyal 2012-02-06 09:35:35 +05:30
parent a3286903df
commit 1f97500e84
2 changed files with 29 additions and 20 deletions

View File

@ -129,28 +129,9 @@ class OCFDirReader(OCFReader):
def open(self, path, *args, **kwargs): def open(self, path, *args, **kwargs):
return open(os.path.join(self.root, path), *args, **kwargs) return open(os.path.join(self.root, path), *args, **kwargs)
def get_cover(opf, opf_path, stream, reader=None): def render_cover(opf, opf_path, zf, reader=None):
from calibre.ebooks import render_html_svg_workaround from calibre.ebooks import render_html_svg_workaround
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
raster_cover = opf.raster_cover
stream.seek(0)
zf = ZipFile(stream)
if raster_cover:
base = posixpath.dirname(opf_path)
cpath = posixpath.normpath(posixpath.join(base, raster_cover))
if reader is not None and \
reader.encryption_meta.is_encrypted(cpath):
return
try:
member = zf.getinfo(cpath)
except:
pass
else:
f = zf.open(member)
data = f.read()
f.close()
zf.close()
return data
cpage = opf.first_spine_item() cpage = opf.first_spine_item()
if not cpage: if not cpage:
@ -174,6 +155,29 @@ def get_cover(opf, opf_path, stream, reader=None):
return return
return render_html_svg_workaround(cpage, default_log) return render_html_svg_workaround(cpage, default_log)
def get_cover(opf, opf_path, stream, reader=None):
raster_cover = opf.raster_cover
stream.seek(0)
zf = ZipFile(stream)
if raster_cover:
base = posixpath.dirname(opf_path)
cpath = posixpath.normpath(posixpath.join(base, raster_cover))
if reader is not None and \
reader.encryption_meta.is_encrypted(cpath):
return
try:
member = zf.getinfo(cpath)
except:
pass
else:
f = zf.open(member)
data = f.read()
f.close()
zf.close()
return data
return render_cover(opf, opf_path, zf, reader=reader)
def get_metadata(stream, extract_cover=True): def get_metadata(stream, extract_cover=True):
""" Return metadata as a :class:`Metadata` object """ """ Return metadata as a :class:`Metadata` object """
stream.seek(0) stream.seek(0)

View File

@ -1019,6 +1019,11 @@ class OPF(object): # {{{
mt = item.get('media-type', '') mt = item.get('media-type', '')
if 'xml' not in mt: if 'xml' not in mt:
return item.get('href', None) return item.get('href', None)
for item in self.itermanifest():
if item.get('href', None) == cover_id:
mt = item.get('media-type', '')
if mt.startswith('image/'):
return item.get('href', None)
@dynamic_property @dynamic_property
def cover(self): def cover(self):