Read cover image from EPUB files

This commit is contained in:
Kovid Goyal 2008-10-10 17:44:56 -07:00
parent 59bf676dc0
commit 361c8e9467
3 changed files with 142 additions and 37 deletions

View File

@ -44,7 +44,7 @@ class HTMLTableRenderer(QObject):
cutoff_height = int(self.height/factor)-3
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
image.setDotsPerMeterX(self.dpi*(100/2.54))
image.setDotsPerMeterX(self.dpi*(100/2.54))
image.setDotsPerMeterY(self.dpi*(100/2.54))
painter = QPainter(image)
self.page.mainFrame().render(painter)
painter.end()

View File

@ -5,15 +5,23 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from epub files'''
import sys, os
import sys, os, time
from cStringIO import StringIO
from contextlib import closing
from ctypes import c_long, byref
from PyQt4.Qt import QWebPage, QUrl, QEventLoop, QSize, QByteArray, QBuffer, \
SIGNAL, QPainter, QImage, QObject, QApplication
from calibre.utils.PythonMagickWand import ImageMagick, NewMagickWand, MagickGetImageBlob, \
MagickReadImageBlob, MagickTrimImage, MagickSetFormat
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.metadata import get_parser, MetaInformation
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir
class EPubException(Exception):
pass
@ -65,9 +73,9 @@ class OCFReader(OCF):
self.container = Container(f)
except KeyError:
raise EPubException("missing OCF container.xml file")
self.opf_path = self.container[OPF.MIMETYPE]
try:
with closing(self.open(self.container[OPF.MIMETYPE])) as f:
with closing(self.open(self.opf_path)) as f:
self.opf = OPF(f, self.root)
except KeyError:
raise EPubException("missing OPF package file")
@ -97,12 +105,95 @@ class OCFDirReader(OCFReader):
def open(self, path, *args, **kwargs):
return open(os.path.join(self.root, path), *args, **kwargs)
class CoverRenderer(QObject):
def get_metadata(stream):
""" Return metadata as a L{MetaInfo} object """
return OCFZipReader(stream).opf
def __init__(self, url, size, loop):
QObject.__init__(self)
self.loop = loop
self.page = QWebPage()
QObject.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html)
self.image_data = None
self.rendered = False
self.page.mainFrame().load(url)
def render_html(self, ok):
self.rendered = True
try:
if not ok:
return
self.page.setViewportSize(QSize(1280, 1024))
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
image.setDotsPerMeterX(96*(100/2.54))
image.setDotsPerMeterY(96*(100/2.54))
painter = QPainter(image)
self.page.mainFrame().render(painter)
painter.end()
ba = QByteArray()
buf = QBuffer(ba)
buf.open(QBuffer.WriteOnly)
image.save(buf, 'JPEG')
raw = str(ba.data())
with ImageMagick():
wand = NewMagickWand()
if not MagickReadImageBlob(wand, raw, len(raw)):
raise ValueError('Failed to load cover image')
if not MagickTrimImage(wand, 10.):
raise ValueError('Failed to process cover image')
x = c_long(0)
if not MagickSetFormat(wand, 'JPEG'):
raise Exception()
dat = MagickGetImageBlob(wand, byref(x))
obuf = StringIO()
for i in xrange(x.value):
obuf.write(chr(dat[i]))
self.image_data = obuf.getvalue()
finally:
self.loop.exit(0)
def get_cover(opf, opf_path, stream):
spine = list(opf.spine_items())
if not spine:
return
cpage = spine[0]
with TemporaryDirectory('_epub_meta') as tdir:
with CurrentDir(tdir):
stream.seek(0)
ZipFile(stream).extractall()
opf_path = opf_path.replace('/', os.sep)
cpage = os.path.join(tdir, os.path.dirname(opf_path), *cpage.split('/'))
if not os.path.exists(cpage):
return
if QApplication.instance() is None:
QApplication([])
url = QUrl.fromLocalFile(cpage)
loop = QEventLoop()
cr = CoverRenderer(url, os.stat(cpage).st_size, loop)
loop.exec_()
count = 0
while count < 50 and not cr.rendered:
time.sleep(0.1)
count += 1
return cr.image_data
def get_metadata(stream, extract_cover=True):
""" Return metadata as a :class:`MetaInformation` object """
stream.seek(0)
reader = OCFZipReader(stream)
mi = MetaInformation(reader.opf)
if extract_cover:
try:
cdata = get_cover(reader.opf, reader.opf_path, stream)
if cdata is not None:
mi.cover_data = ('jpg', cdata)
except:
import traceback
traceback.print_exc()
return mi
def set_metadata(stream, mi):
stream.seek(0)
reader = OCFZipReader(stream, root=os.getcwdu())
reader.opf.smart_update(mi)
newopf = StringIO(reader.opf.render())
@ -119,6 +210,8 @@ def option_parser():
help=_('The series index'))
parser.add_option('--language', default=None,
help=_('The book language'))
parser.add_option('--get-cover', default=False, action='store_true',
help=_('Extract the cover'))
return parser
def main(args=sys.argv):
@ -127,37 +220,41 @@ def main(args=sys.argv):
if len(args) != 2:
parser.print_help()
return 1
stream = open(args[1], 'r+b')
mi = MetaInformation(OCFZipReader(stream, root=os.getcwdu()).opf)
changed = False
if opts.title:
mi.title = opts.title
changed = True
if opts.authors:
mi.authors = opts.authors.split(',')
changed = True
if opts.tags:
mi.tags = opts.tags.split(',')
changed = True
if opts.comment:
mi.comments = opts.comment
changed = True
if opts.series:
mi.series = opts.series
changed = True
if opts.series_index:
mi.series_index = opts.series_index
changed = True
if opts.language is not None:
mi.language = opts.language
changed = True
with open(args[1], 'r+b') as stream:
mi = get_metadata(stream, extract_cover=opts.get_cover)
changed = False
if opts.title:
mi.title = opts.title
changed = True
if opts.authors:
mi.authors = opts.authors.split(',')
changed = True
if opts.tags:
mi.tags = opts.tags.split(',')
changed = True
if opts.comment:
mi.comments = opts.comment
changed = True
if opts.series:
mi.series = opts.series
changed = True
if opts.series_index:
mi.series_index = opts.series_index
changed = True
if opts.language is not None:
mi.language = opts.language
changed = True
if changed:
set_metadata(stream, mi)
print unicode(get_metadata(stream, extract_cover=False))
if mi.cover_data[1] is not None:
cpath = os.path.splitext(os.path.basename(args[1]))[0] + '_cover.jpg'
with open(cpath, 'wb') as f:
f.write(mi.cover_data[1])
print 'Cover saved to', f.name
if changed:
stream.seek(0)
set_metadata(stream, mi)
stream.seek(0)
print unicode(MetaInformation(OCFZipReader(stream, root=os.getcwdu()).opf))
stream.close()
return 0
if __name__ == '__main__':

View File

@ -34,6 +34,7 @@ class Resource(object):
'''
def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True):
self.orig = href_or_path
self._href = None
self._basedir = basedir
self.path = None
@ -522,6 +523,13 @@ class OPF(object):
def iterspine(self):
return self.spine_path(self.root)
def spine_items(self):
for item in self.iterspine():
idref = item.get('idref', '')
for x in self.itermanifest():
if x.get('id', None) == idref:
yield x.get('href', '')
def create_spine_item(self, idref):
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
ans.tail = '\n\t\t'