diff --git a/src/calibre/ebooks/lrf/html/table_as_image.py b/src/calibre/ebooks/lrf/html/table_as_image.py
index 6db42865be..bdde4aa440 100644
--- a/src/calibre/ebooks/lrf/html/table_as_image.py
+++ b/src/calibre/ebooks/lrf/html/table_as_image.py
@@ -44,7 +44,7 @@ class HTMLTableRenderer(QObject):
cutoff_height = int(self.height/factor)-3
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
image.setDotsPerMeterX(self.dpi*(100/2.54))
- image.setDotsPerMeterX(self.dpi*(100/2.54))
+ image.setDotsPerMeterY(self.dpi*(100/2.54))
painter = QPainter(image)
self.page.mainFrame().render(painter)
painter.end()
diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py
index 25f74b99c9..2ec4a2c8db 100644
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@@ -5,15 +5,23 @@ __copyright__ = '2008, Kovid Goyal '
'''Read meta information from epub files'''
-import sys, os
+import sys, os, time
from cStringIO import StringIO
from contextlib import closing
+from ctypes import c_long, byref
+from PyQt4.Qt import QWebPage, QUrl, QEventLoop, QSize, QByteArray, QBuffer, \
+ SIGNAL, QPainter, QImage, QObject, QApplication
+
+from calibre.utils.PythonMagickWand import ImageMagick, NewMagickWand, MagickGetImageBlob, \
+ MagickReadImageBlob, MagickTrimImage, MagickSetFormat
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.metadata import get_parser, MetaInformation
from calibre.ebooks.metadata.opf2 import OPF
+from calibre.ptempfile import TemporaryDirectory
+from calibre import CurrentDir
class EPubException(Exception):
pass
@@ -65,9 +73,9 @@ class OCFReader(OCF):
self.container = Container(f)
except KeyError:
raise EPubException("missing OCF container.xml file")
-
+ self.opf_path = self.container[OPF.MIMETYPE]
try:
- with closing(self.open(self.container[OPF.MIMETYPE])) as f:
+ with closing(self.open(self.opf_path)) as f:
self.opf = OPF(f, self.root)
except KeyError:
raise EPubException("missing OPF package file")
@@ -96,13 +104,96 @@ class OCFDirReader(OCFReader):
def open(self, path, *args, **kwargs):
return open(os.path.join(self.root, path), *args, **kwargs)
+
+class CoverRenderer(QObject):
+ def __init__(self, url, size, loop):
+ QObject.__init__(self)
+ self.loop = loop
+ self.page = QWebPage()
+ QObject.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html)
+ self.image_data = None
+ self.rendered = False
+ self.page.mainFrame().load(url)
+
+ def render_html(self, ok):
+ self.rendered = True
+ try:
+ if not ok:
+ return
+ self.page.setViewportSize(QSize(1280, 1024))
+ image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
+ image.setDotsPerMeterX(96*(100/2.54))
+ image.setDotsPerMeterY(96*(100/2.54))
+ painter = QPainter(image)
+ self.page.mainFrame().render(painter)
+ painter.end()
+
+ ba = QByteArray()
+ buf = QBuffer(ba)
+ buf.open(QBuffer.WriteOnly)
+ image.save(buf, 'JPEG')
+ raw = str(ba.data())
+ with ImageMagick():
+ wand = NewMagickWand()
+ if not MagickReadImageBlob(wand, raw, len(raw)):
+ raise ValueError('Failed to load cover image')
+ if not MagickTrimImage(wand, 10.):
+ raise ValueError('Failed to process cover image')
+ x = c_long(0)
+ if not MagickSetFormat(wand, 'JPEG'):
+ raise Exception()
+ dat = MagickGetImageBlob(wand, byref(x))
+ obuf = StringIO()
+ for i in xrange(x.value):
+ obuf.write(chr(dat[i]))
+ self.image_data = obuf.getvalue()
+ finally:
+ self.loop.exit(0)
+
+
+def get_cover(opf, opf_path, stream):
+ spine = list(opf.spine_items())
+ if not spine:
+ return
+ cpage = spine[0]
+ with TemporaryDirectory('_epub_meta') as tdir:
+ with CurrentDir(tdir):
+ stream.seek(0)
+ ZipFile(stream).extractall()
+ opf_path = opf_path.replace('/', os.sep)
+ cpage = os.path.join(tdir, os.path.dirname(opf_path), *cpage.split('/'))
+ if not os.path.exists(cpage):
+ return
+ if QApplication.instance() is None:
+ QApplication([])
+ url = QUrl.fromLocalFile(cpage)
+ loop = QEventLoop()
+ cr = CoverRenderer(url, os.stat(cpage).st_size, loop)
+ loop.exec_()
+ count = 0
+ while count < 50 and not cr.rendered:
+ time.sleep(0.1)
+ count += 1
+ return cr.image_data
-def get_metadata(stream):
- """ Return metadata as a L{MetaInfo} object """
- return OCFZipReader(stream).opf
+def get_metadata(stream, extract_cover=True):
+ """ Return metadata as a :class:`MetaInformation` object """
+ stream.seek(0)
+ reader = OCFZipReader(stream)
+ mi = MetaInformation(reader.opf)
+ if extract_cover:
+ try:
+ cdata = get_cover(reader.opf, reader.opf_path, stream)
+ if cdata is not None:
+ mi.cover_data = ('jpg', cdata)
+ except:
+ import traceback
+ traceback.print_exc()
+ return mi
def set_metadata(stream, mi):
+ stream.seek(0)
reader = OCFZipReader(stream, root=os.getcwdu())
reader.opf.smart_update(mi)
newopf = StringIO(reader.opf.render())
@@ -119,6 +210,8 @@ def option_parser():
help=_('The series index'))
parser.add_option('--language', default=None,
help=_('The book language'))
+ parser.add_option('--get-cover', default=False, action='store_true',
+ help=_('Extract the cover'))
return parser
def main(args=sys.argv):
@@ -127,37 +220,41 @@ def main(args=sys.argv):
if len(args) != 2:
parser.print_help()
return 1
- stream = open(args[1], 'r+b')
- mi = MetaInformation(OCFZipReader(stream, root=os.getcwdu()).opf)
- changed = False
- if opts.title:
- mi.title = opts.title
- changed = True
- if opts.authors:
- mi.authors = opts.authors.split(',')
- changed = True
- if opts.tags:
- mi.tags = opts.tags.split(',')
- changed = True
- if opts.comment:
- mi.comments = opts.comment
- changed = True
- if opts.series:
- mi.series = opts.series
- changed = True
- if opts.series_index:
- mi.series_index = opts.series_index
- changed = True
- if opts.language is not None:
- mi.language = opts.language
- changed = True
+ with open(args[1], 'r+b') as stream:
+ mi = get_metadata(stream, extract_cover=opts.get_cover)
+ changed = False
+ if opts.title:
+ mi.title = opts.title
+ changed = True
+ if opts.authors:
+ mi.authors = opts.authors.split(',')
+ changed = True
+ if opts.tags:
+ mi.tags = opts.tags.split(',')
+ changed = True
+ if opts.comment:
+ mi.comments = opts.comment
+ changed = True
+ if opts.series:
+ mi.series = opts.series
+ changed = True
+ if opts.series_index:
+ mi.series_index = opts.series_index
+ changed = True
+ if opts.language is not None:
+ mi.language = opts.language
+ changed = True
+
+ if changed:
+ set_metadata(stream, mi)
+ print unicode(get_metadata(stream, extract_cover=False))
+
+ if mi.cover_data[1] is not None:
+ cpath = os.path.splitext(os.path.basename(args[1]))[0] + '_cover.jpg'
+ with open(cpath, 'wb') as f:
+ f.write(mi.cover_data[1])
+ print 'Cover saved to', f.name
- if changed:
- stream.seek(0)
- set_metadata(stream, mi)
- stream.seek(0)
- print unicode(MetaInformation(OCFZipReader(stream, root=os.getcwdu()).opf))
- stream.close()
return 0
if __name__ == '__main__':
diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py
index ce628f51bc..2d8725f7cd 100644
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@@ -34,6 +34,7 @@ class Resource(object):
'''
def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True):
+ self.orig = href_or_path
self._href = None
self._basedir = basedir
self.path = None
@@ -522,6 +523,13 @@ class OPF(object):
def iterspine(self):
return self.spine_path(self.root)
+ def spine_items(self):
+ for item in self.iterspine():
+ idref = item.get('idref', '')
+ for x in self.itermanifest():
+ if x.get('id', None) == idref:
+ yield x.get('href', '')
+
def create_spine_item(self, idref):
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
ans.tail = '\n\t\t'