diff --git a/src/calibre/ebooks/lrf/html/table_as_image.py b/src/calibre/ebooks/lrf/html/table_as_image.py index 6db42865be..bdde4aa440 100644 --- a/src/calibre/ebooks/lrf/html/table_as_image.py +++ b/src/calibre/ebooks/lrf/html/table_as_image.py @@ -44,7 +44,7 @@ class HTMLTableRenderer(QObject): cutoff_height = int(self.height/factor)-3 image = QImage(self.page.viewportSize(), QImage.Format_ARGB32) image.setDotsPerMeterX(self.dpi*(100/2.54)) - image.setDotsPerMeterX(self.dpi*(100/2.54)) + image.setDotsPerMeterY(self.dpi*(100/2.54)) painter = QPainter(image) self.page.mainFrame().render(painter) painter.end() diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 25f74b99c9..2ec4a2c8db 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -5,15 +5,23 @@ __copyright__ = '2008, Kovid Goyal ' '''Read meta information from epub files''' -import sys, os +import sys, os, time from cStringIO import StringIO from contextlib import closing +from ctypes import c_long, byref +from PyQt4.Qt import QWebPage, QUrl, QEventLoop, QSize, QByteArray, QBuffer, \ + SIGNAL, QPainter, QImage, QObject, QApplication + +from calibre.utils.PythonMagickWand import ImageMagick, NewMagickWand, MagickGetImageBlob, \ + MagickReadImageBlob, MagickTrimImage, MagickSetFormat from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup from calibre.ebooks.metadata import get_parser, MetaInformation from calibre.ebooks.metadata.opf2 import OPF +from calibre.ptempfile import TemporaryDirectory +from calibre import CurrentDir class EPubException(Exception): pass @@ -65,9 +73,9 @@ class OCFReader(OCF): self.container = Container(f) except KeyError: raise EPubException("missing OCF container.xml file") - + self.opf_path = self.container[OPF.MIMETYPE] try: - with closing(self.open(self.container[OPF.MIMETYPE])) as f: + with closing(self.open(self.opf_path)) as f: self.opf = OPF(f, self.root) except KeyError: raise EPubException("missing OPF package file") @@ -96,13 +104,96 @@ class OCFDirReader(OCFReader): def open(self, path, *args, **kwargs): return open(os.path.join(self.root, path), *args, **kwargs) + +class CoverRenderer(QObject): + def __init__(self, url, size, loop): + QObject.__init__(self) + self.loop = loop + self.page = QWebPage() + QObject.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html) + self.image_data = None + self.rendered = False + self.page.mainFrame().load(url) + + def render_html(self, ok): + self.rendered = True + try: + if not ok: + return + self.page.setViewportSize(QSize(1280, 1024)) + image = QImage(self.page.viewportSize(), QImage.Format_ARGB32) + image.setDotsPerMeterX(96*(100/2.54)) + image.setDotsPerMeterY(96*(100/2.54)) + painter = QPainter(image) + self.page.mainFrame().render(painter) + painter.end() + + ba = QByteArray() + buf = QBuffer(ba) + buf.open(QBuffer.WriteOnly) + image.save(buf, 'JPEG') + raw = str(ba.data()) + with ImageMagick(): + wand = NewMagickWand() + if not MagickReadImageBlob(wand, raw, len(raw)): + raise ValueError('Failed to load cover image') + if not MagickTrimImage(wand, 10.): + raise ValueError('Failed to process cover image') + x = c_long(0) + if not MagickSetFormat(wand, 'JPEG'): + raise Exception() + dat = MagickGetImageBlob(wand, byref(x)) + obuf = StringIO() + for i in xrange(x.value): + obuf.write(chr(dat[i])) + self.image_data = obuf.getvalue() + finally: + self.loop.exit(0) + + +def get_cover(opf, opf_path, stream): + spine = list(opf.spine_items()) + if not spine: + return + cpage = spine[0] + with TemporaryDirectory('_epub_meta') as tdir: + with CurrentDir(tdir): + stream.seek(0) + ZipFile(stream).extractall() + opf_path = opf_path.replace('/', os.sep) + cpage = os.path.join(tdir, os.path.dirname(opf_path), *cpage.split('/')) + if not os.path.exists(cpage): + return + if QApplication.instance() is None: + QApplication([]) + url = QUrl.fromLocalFile(cpage) + loop = QEventLoop() + cr = CoverRenderer(url, os.stat(cpage).st_size, loop) + loop.exec_() + count = 0 + while count < 50 and not cr.rendered: + time.sleep(0.1) + count += 1 + return cr.image_data -def get_metadata(stream): - """ Return metadata as a L{MetaInfo} object """ - return OCFZipReader(stream).opf +def get_metadata(stream, extract_cover=True): + """ Return metadata as a :class:`MetaInformation` object """ + stream.seek(0) + reader = OCFZipReader(stream) + mi = MetaInformation(reader.opf) + if extract_cover: + try: + cdata = get_cover(reader.opf, reader.opf_path, stream) + if cdata is not None: + mi.cover_data = ('jpg', cdata) + except: + import traceback + traceback.print_exc() + return mi def set_metadata(stream, mi): + stream.seek(0) reader = OCFZipReader(stream, root=os.getcwdu()) reader.opf.smart_update(mi) newopf = StringIO(reader.opf.render()) @@ -119,6 +210,8 @@ def option_parser(): help=_('The series index')) parser.add_option('--language', default=None, help=_('The book language')) + parser.add_option('--get-cover', default=False, action='store_true', + help=_('Extract the cover')) return parser def main(args=sys.argv): @@ -127,37 +220,41 @@ def main(args=sys.argv): if len(args) != 2: parser.print_help() return 1 - stream = open(args[1], 'r+b') - mi = MetaInformation(OCFZipReader(stream, root=os.getcwdu()).opf) - changed = False - if opts.title: - mi.title = opts.title - changed = True - if opts.authors: - mi.authors = opts.authors.split(',') - changed = True - if opts.tags: - mi.tags = opts.tags.split(',') - changed = True - if opts.comment: - mi.comments = opts.comment - changed = True - if opts.series: - mi.series = opts.series - changed = True - if opts.series_index: - mi.series_index = opts.series_index - changed = True - if opts.language is not None: - mi.language = opts.language - changed = True + with open(args[1], 'r+b') as stream: + mi = get_metadata(stream, extract_cover=opts.get_cover) + changed = False + if opts.title: + mi.title = opts.title + changed = True + if opts.authors: + mi.authors = opts.authors.split(',') + changed = True + if opts.tags: + mi.tags = opts.tags.split(',') + changed = True + if opts.comment: + mi.comments = opts.comment + changed = True + if opts.series: + mi.series = opts.series + changed = True + if opts.series_index: + mi.series_index = opts.series_index + changed = True + if opts.language is not None: + mi.language = opts.language + changed = True + + if changed: + set_metadata(stream, mi) + print unicode(get_metadata(stream, extract_cover=False)) + + if mi.cover_data[1] is not None: + cpath = os.path.splitext(os.path.basename(args[1]))[0] + '_cover.jpg' + with open(cpath, 'wb') as f: + f.write(mi.cover_data[1]) + print 'Cover saved to', f.name - if changed: - stream.seek(0) - set_metadata(stream, mi) - stream.seek(0) - print unicode(MetaInformation(OCFZipReader(stream, root=os.getcwdu()).opf)) - stream.close() return 0 if __name__ == '__main__': diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index ce628f51bc..2d8725f7cd 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -34,6 +34,7 @@ class Resource(object): ''' def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True): + self.orig = href_or_path self._href = None self._basedir = basedir self.path = None @@ -522,6 +523,13 @@ class OPF(object): def iterspine(self): return self.spine_path(self.root) + def spine_items(self): + for item in self.iterspine(): + idref = item.get('idref', '') + for x in self.itermanifest(): + if x.get('id', None) == idref: + yield x.get('href', '') + def create_spine_item(self, idref): ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref) ans.tail = '\n\t\t'