From 24925ddbf94a8082fe49b65551048d4a3f82f0e0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 12 Apr 2012 19:10:56 +0530 Subject: [PATCH] EPUB Input: Automatically extract cover image from simple HTML title page that consists of only a single tag --- src/calibre/ebooks/__init__.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 0adfab4e11..82e8c6f925 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -93,6 +93,20 @@ def extract_calibre_cover(raw, base, log): if os.path.exists(img): return open(img, 'rb').read() + # Look for a simple cover, i.e. a body with no text and only one tag + if matches is None: + body = soup.find('body') + if body is not None: + text = u''.join(map(unicode, body.findAll(text=True))) + if text.strip(): + # Body has text, abort + return + images = body.findAll('img', src=True) + if 0 < len(images) < 2: + img = os.path.join(base, *images[0]['src'].split('/')) + if os.path.exists(img): + return open(img, 'rb').read() + def render_html_svg_workaround(path_to_html, log, width=590, height=750): from calibre.ebooks.oeb.base import SVG_NS raw = open(path_to_html, 'rb').read() @@ -108,6 +122,7 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750): data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log) except: pass + if data is None: renderer = render_html(path_to_html, width, height) data = getattr(renderer, 'data', None)