diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index a9cf7c7045..6e3ac55a77 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -361,6 +361,8 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252'): return '&'+ent+';' if ent == 'apos': return "'" + if ent == 'hellips': + ent = 'hellip' if ent.startswith(u'#x'): num = int(ent[2:], 16) if encoding is None or num > 255: diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index dcd5604aa8..4034a8810b 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -57,6 +57,35 @@ class HTMLRenderer(object): self.loop.exit(0) +def extract_cover_from_embedded_svg(html, base, log): + from lxml import etree + from calibre.ebooks.oeb.base import XPath, SVG, XLINK + root = etree.fromstring(html) + + svg = XPath('//svg:svg')(root) + if len(svg) == 1 and len(svg[0]) == 1 and svg[0][0].tag == SVG('image'): + image = svg[0][0] + href = image.get(XLINK('href'), None) + path = os.path.join(base, *href.split('/')) + if href and os.access(path, os.R_OK): + return open(path, 'rb').read() + +def render_html_svg_workaround(path_to_html, log, width=590, height=750): + from calibre.ebooks.oeb.base import SVG_NS + raw = open(path_to_html, 'rb').read() + data = None + if SVG_NS in raw: + try: + data = extract_cover_from_embedded_svg(raw, + os.path.dirname(path_to_html), log) + except: + pass + if data is None: + renderer = render_html(path_to_html, width, height) + data = getattr(renderer, 'data', None) + return data + + def render_html(path_to_html, width=590, height=750): from PyQt4.QtWebKit import QWebPage from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/epub/input.py index dc2aa230d5..3f8b563d96 100644 --- a/src/calibre/ebooks/epub/input.py +++ b/src/calibre/ebooks/epub/input.py @@ -54,7 +54,7 @@ class EPUBInput(InputFormatPlugin): return False @classmethod - def rationalize_cover(self, opf): + def rationalize_cover(self, opf, log): guide_cover, guide_elem = None, None for guide_elem in opf.iterguide(): if guide_elem.get('type', '').lower() == 'cover': @@ -65,28 +65,37 @@ class EPUBInput(InputFormatPlugin): spine = list(opf.iterspine()) if not spine: return + # Check if the cover specified in the guide is also + # the first element in spine idref = spine[0].get('idref', '') manifest = list(opf.itermanifest()) if not manifest: return - if manifest[0].get('id', False) != idref: + elem = [x for x in manifest if x.get('id', '') == idref] + if not elem or elem[0].get('href', None) != guide_cover: return + log('Found HTML cover', guide_cover) + + # Remove from spine as covers must be treated + # specially spine[0].getparent().remove(spine[0]) guide_elem.set('href', 'calibre_raster_cover.jpg') + from calibre.ebooks.oeb.base import OPF + t = etree.SubElement(elem[0].getparent(), OPF('item'), + href=guide_elem.get('href'), id='calibre_raster_cover') + t.set('media-type', 'image/jpeg') for elem in list(opf.iterguide()): if elem.get('type', '').lower() == 'titlepage': elem.getparent().remove(elem) - from calibre.ebooks.oeb.base import OPF t = etree.SubElement(guide_elem.getparent(), OPF('reference')) t.set('type', 'titlepage') t.set('href', guide_cover) t.set('title', 'Title Page') - from calibre.ebooks import render_html - renderer = render_html(guide_cover) + from calibre.ebooks import render_html_svg_workaround + renderer = render_html_svg_workaround(guide_cover, log) if renderer is not None: open('calibre_raster_cover.jpg', 'wb').write( - renderer.data) - + renderer) def convert(self, stream, options, file_ext, log, accelerators): from calibre.utils.zipfile import ZipFile @@ -121,7 +130,7 @@ class EPUBInput(InputFormatPlugin): for elem in opf.iterguide(): elem.set('href', delta+elem.get('href')) - self.rationalize_cover(opf) + self.rationalize_cover(opf, log) with open('content.opf', 'wb') as nopf: nopf.write(opf.render()) diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 040fee78a4..9fd8bf44e9 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -5,14 +5,10 @@ __copyright__ = '2008, Kovid Goyal ' '''Read meta information from epub files''' -import os, time +import os from cStringIO import StringIO from contextlib import closing -from PyQt4.Qt import QUrl, QEventLoop, QSize, QByteArray, QBuffer, \ - SIGNAL, QPainter, QImage, QObject, QApplication, Qt, QPalette -from PyQt4.QtWebKit import QWebPage - from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup from calibre.ebooks.metadata import MetaInformation @@ -102,64 +98,9 @@ class OCFDirReader(OCFReader): def open(self, path, *args, **kwargs): return open(os.path.join(self.root, path), *args, **kwargs) -class CoverRenderer(QObject): - WIDTH = 600 - HEIGHT = 800 - - def __init__(self, path): - if QApplication.instance() is None: - QApplication([]) - QObject.__init__(self) - self.loop = QEventLoop() - self.page = QWebPage() - pal = self.page.palette() - pal.setBrush(QPalette.Background, Qt.white) - self.page.setPalette(pal) - self.page.setViewportSize(QSize(self.WIDTH, self.HEIGHT)) - self.page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff) - self.page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff) - QObject.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html) - self._image_data = None - self.rendered = False - url = QUrl.fromLocalFile(os.path.normpath(path)) - self.page.mainFrame().load(url) - - def render_html(self, ok): - try: - if not ok: - self.rendered = True - return - image = QImage(self.page.viewportSize(), QImage.Format_ARGB32) - image.setDotsPerMeterX(96*(100/2.54)) - image.setDotsPerMeterY(96*(100/2.54)) - painter = QPainter(image) - self.page.mainFrame().render(painter) - painter.end() - ba = QByteArray() - buf = QBuffer(ba) - buf.open(QBuffer.WriteOnly) - image.save(buf, 'JPEG') - self._image_data = str(ba.data()) - finally: - self.loop.exit(0) - self.rendered = True - - def image_data(): - def fget(self): - if not self.rendered: - self.loop.exec_() - count = 0 - while count < 50 and not self.rendered: - time.sleep(0.1) - count += 1 - return self._image_data - return property(fget=fget) - image_data = image_data() - - def get_cover(opf, opf_path, stream): - from calibre.gui2 import is_ok_to_use_qt - if not is_ok_to_use_qt(): return None + from calibre.ebooks import render_html_svg_workaround + from calibre.utils.logging import default_log spine = list(opf.spine_items()) if not spine: return @@ -172,8 +113,7 @@ def get_cover(opf, opf_path, stream): cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage) if not os.path.exists(cpage): return - cr = CoverRenderer(cpage) - return cr.image_data + return render_html_svg_workaround(cpage, default_log) def get_metadata(stream, extract_cover=True): """ Return metadata as a :class:`MetaInformation` object """ diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 28a9bf70ee..0a305dbe1c 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -1556,7 +1556,8 @@ class MobiWriter(object): else: raise NotImplementedError("missing date or timestamp needed for mobi_periodical") - if oeb.metadata.cover: + if oeb.metadata.cover and \ + unicode(oeb.metadata.cover[0]) in oeb.manifest.ids: id = unicode(oeb.metadata.cover[0]) item = oeb.manifest.ids[id] href = item.href diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 35b211bb7d..03c878b9d2 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -27,7 +27,6 @@ from calibre.ebooks.oeb.base import namespace, barename, XPath, xpath, \ OEBError, OEBBook, DirContainer from calibre.ebooks.oeb.writer import OEBWriter from calibre.ebooks.oeb.entitydefs import ENTITYDEFS -from calibre.ebooks.metadata.epub import CoverRenderer from calibre.startup import get_lang from calibre.ptempfile import TemporaryDirectory from calibre.constants import __appname__, __version__ @@ -346,6 +345,8 @@ class OEBReader(object): if descriptionElement: description = etree.tostring(descriptionElement[0], method='text', encoding=unicode).strip() + if not description: + description = None else : description = None @@ -525,12 +526,14 @@ class OEBReader(object): return def _cover_from_html(self, hcover): + from calibre.ebooks import render_html_svg_workaround with TemporaryDirectory('_html_cover') as tdir: writer = OEBWriter() writer(self.oeb, tdir) path = os.path.join(tdir, urlunquote(hcover.href)) - renderer = CoverRenderer(path) - data = renderer.image_data + data = render_html_svg_workaround(path, self.logger) + if not data: + data = '' id, href = self.oeb.manifest.generate('cover', 'cover.jpeg') item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data) return item diff --git a/src/calibre/utils/logging.py b/src/calibre/utils/logging.py index b6dd885278..98c7da178e 100644 --- a/src/calibre/utils/logging.py +++ b/src/calibre/utils/logging.py @@ -102,3 +102,5 @@ class Log(object): def __call__(self, *args, **kwargs): self.prints(INFO, *args, **kwargs) + +default_log = Log() diff --git a/src/calibre/web/feeds/recipes/recipe_publico.py b/src/calibre/web/feeds/recipes/recipe_publico.py index 17e168955f..34c89ccb6c 100644 --- a/src/calibre/web/feeds/recipes/recipe_publico.py +++ b/src/calibre/web/feeds/recipes/recipe_publico.py @@ -17,7 +17,7 @@ class Publico(BasicNewsRecipe): max_articles_per_feed = 30 encoding='utf-8' no_stylesheets = True - language = _('Portuguese') + language = _('Portugese') preprocess_regexps = [(re.compile(u"\uFFFD", re.DOTALL|re.IGNORECASE), lambda match: ''),] feeds = [