Read cover image from EPUB files

This commit is contained in:
Kovid Goyal 2008-10-10 17:44:56 -07:00
parent 59bf676dc0
commit 361c8e9467
3 changed files with 142 additions and 37 deletions

View File

@ -44,7 +44,7 @@ class HTMLTableRenderer(QObject):
cutoff_height = int(self.height/factor)-3 cutoff_height = int(self.height/factor)-3
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32) image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
image.setDotsPerMeterX(self.dpi*(100/2.54)) image.setDotsPerMeterX(self.dpi*(100/2.54))
image.setDotsPerMeterX(self.dpi*(100/2.54)) image.setDotsPerMeterY(self.dpi*(100/2.54))
painter = QPainter(image) painter = QPainter(image)
self.page.mainFrame().render(painter) self.page.mainFrame().render(painter)
painter.end() painter.end()

View File

@ -5,15 +5,23 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from epub files''' '''Read meta information from epub files'''
import sys, os import sys, os, time
from cStringIO import StringIO from cStringIO import StringIO
from contextlib import closing from contextlib import closing
from ctypes import c_long, byref
from PyQt4.Qt import QWebPage, QUrl, QEventLoop, QSize, QByteArray, QBuffer, \
SIGNAL, QPainter, QImage, QObject, QApplication
from calibre.utils.PythonMagickWand import ImageMagick, NewMagickWand, MagickGetImageBlob, \
MagickReadImageBlob, MagickTrimImage, MagickSetFormat
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.metadata import get_parser, MetaInformation from calibre.ebooks.metadata import get_parser, MetaInformation
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir
class EPubException(Exception): class EPubException(Exception):
pass pass
@ -65,9 +73,9 @@ class OCFReader(OCF):
self.container = Container(f) self.container = Container(f)
except KeyError: except KeyError:
raise EPubException("missing OCF container.xml file") raise EPubException("missing OCF container.xml file")
self.opf_path = self.container[OPF.MIMETYPE]
try: try:
with closing(self.open(self.container[OPF.MIMETYPE])) as f: with closing(self.open(self.opf_path)) as f:
self.opf = OPF(f, self.root) self.opf = OPF(f, self.root)
except KeyError: except KeyError:
raise EPubException("missing OPF package file") raise EPubException("missing OPF package file")
@ -96,13 +104,96 @@ class OCFDirReader(OCFReader):
def open(self, path, *args, **kwargs): def open(self, path, *args, **kwargs):
return open(os.path.join(self.root, path), *args, **kwargs) return open(os.path.join(self.root, path), *args, **kwargs)
class CoverRenderer(QObject):
def __init__(self, url, size, loop):
QObject.__init__(self)
self.loop = loop
self.page = QWebPage()
QObject.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html)
self.image_data = None
self.rendered = False
self.page.mainFrame().load(url)
def render_html(self, ok):
self.rendered = True
try:
if not ok:
return
self.page.setViewportSize(QSize(1280, 1024))
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
image.setDotsPerMeterX(96*(100/2.54))
image.setDotsPerMeterY(96*(100/2.54))
painter = QPainter(image)
self.page.mainFrame().render(painter)
painter.end()
ba = QByteArray()
buf = QBuffer(ba)
buf.open(QBuffer.WriteOnly)
image.save(buf, 'JPEG')
raw = str(ba.data())
with ImageMagick():
wand = NewMagickWand()
if not MagickReadImageBlob(wand, raw, len(raw)):
raise ValueError('Failed to load cover image')
if not MagickTrimImage(wand, 10.):
raise ValueError('Failed to process cover image')
x = c_long(0)
if not MagickSetFormat(wand, 'JPEG'):
raise Exception()
dat = MagickGetImageBlob(wand, byref(x))
obuf = StringIO()
for i in xrange(x.value):
obuf.write(chr(dat[i]))
self.image_data = obuf.getvalue()
finally:
self.loop.exit(0)
def get_cover(opf, opf_path, stream):
spine = list(opf.spine_items())
if not spine:
return
cpage = spine[0]
with TemporaryDirectory('_epub_meta') as tdir:
with CurrentDir(tdir):
stream.seek(0)
ZipFile(stream).extractall()
opf_path = opf_path.replace('/', os.sep)
cpage = os.path.join(tdir, os.path.dirname(opf_path), *cpage.split('/'))
if not os.path.exists(cpage):
return
if QApplication.instance() is None:
QApplication([])
url = QUrl.fromLocalFile(cpage)
loop = QEventLoop()
cr = CoverRenderer(url, os.stat(cpage).st_size, loop)
loop.exec_()
count = 0
while count < 50 and not cr.rendered:
time.sleep(0.1)
count += 1
return cr.image_data
def get_metadata(stream): def get_metadata(stream, extract_cover=True):
""" Return metadata as a L{MetaInfo} object """ """ Return metadata as a :class:`MetaInformation` object """
return OCFZipReader(stream).opf stream.seek(0)
reader = OCFZipReader(stream)
mi = MetaInformation(reader.opf)
if extract_cover:
try:
cdata = get_cover(reader.opf, reader.opf_path, stream)
if cdata is not None:
mi.cover_data = ('jpg', cdata)
except:
import traceback
traceback.print_exc()
return mi
def set_metadata(stream, mi): def set_metadata(stream, mi):
stream.seek(0)
reader = OCFZipReader(stream, root=os.getcwdu()) reader = OCFZipReader(stream, root=os.getcwdu())
reader.opf.smart_update(mi) reader.opf.smart_update(mi)
newopf = StringIO(reader.opf.render()) newopf = StringIO(reader.opf.render())
@ -119,6 +210,8 @@ def option_parser():
help=_('The series index')) help=_('The series index'))
parser.add_option('--language', default=None, parser.add_option('--language', default=None,
help=_('The book language')) help=_('The book language'))
parser.add_option('--get-cover', default=False, action='store_true',
help=_('Extract the cover'))
return parser return parser
def main(args=sys.argv): def main(args=sys.argv):
@ -127,37 +220,41 @@ def main(args=sys.argv):
if len(args) != 2: if len(args) != 2:
parser.print_help() parser.print_help()
return 1 return 1
stream = open(args[1], 'r+b') with open(args[1], 'r+b') as stream:
mi = MetaInformation(OCFZipReader(stream, root=os.getcwdu()).opf) mi = get_metadata(stream, extract_cover=opts.get_cover)
changed = False changed = False
if opts.title: if opts.title:
mi.title = opts.title mi.title = opts.title
changed = True changed = True
if opts.authors: if opts.authors:
mi.authors = opts.authors.split(',') mi.authors = opts.authors.split(',')
changed = True changed = True
if opts.tags: if opts.tags:
mi.tags = opts.tags.split(',') mi.tags = opts.tags.split(',')
changed = True changed = True
if opts.comment: if opts.comment:
mi.comments = opts.comment mi.comments = opts.comment
changed = True changed = True
if opts.series: if opts.series:
mi.series = opts.series mi.series = opts.series
changed = True changed = True
if opts.series_index: if opts.series_index:
mi.series_index = opts.series_index mi.series_index = opts.series_index
changed = True changed = True
if opts.language is not None: if opts.language is not None:
mi.language = opts.language mi.language = opts.language
changed = True changed = True
if changed:
set_metadata(stream, mi)
print unicode(get_metadata(stream, extract_cover=False))
if mi.cover_data[1] is not None:
cpath = os.path.splitext(os.path.basename(args[1]))[0] + '_cover.jpg'
with open(cpath, 'wb') as f:
f.write(mi.cover_data[1])
print 'Cover saved to', f.name
if changed:
stream.seek(0)
set_metadata(stream, mi)
stream.seek(0)
print unicode(MetaInformation(OCFZipReader(stream, root=os.getcwdu()).opf))
stream.close()
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -34,6 +34,7 @@ class Resource(object):
''' '''
def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True): def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True):
self.orig = href_or_path
self._href = None self._href = None
self._basedir = basedir self._basedir = basedir
self.path = None self.path = None
@ -522,6 +523,13 @@ class OPF(object):
def iterspine(self): def iterspine(self):
return self.spine_path(self.root) return self.spine_path(self.root)
def spine_items(self):
for item in self.iterspine():
idref = item.get('idref', '')
for x in self.itermanifest():
if x.get('id', None) == idref:
yield x.get('href', '')
def create_spine_item(self, idref): def create_spine_item(self, idref):
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref) ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
ans.tail = '\n\t\t' ans.tail = '\n\t\t'