From 24925ddbf94a8082fe49b65551048d4a3f82f0e0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 12 Apr 2012 19:10:56 +0530
Subject: [PATCH] EPUB Input: Automatically extract cover image from simple
 HTML title page that consists of only a single <img> tag

---
 src/calibre/ebooks/__init__.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py
index 0adfab4e11..82e8c6f925 100644
--- a/src/calibre/ebooks/__init__.py
+++ b/src/calibre/ebooks/__init__.py
@@ -93,6 +93,20 @@ def extract_calibre_cover(raw, base, log):
         if os.path.exists(img):
             return open(img, 'rb').read()
 
+    # Look for a simple cover, i.e. a body with no text and only one <img> tag
+    if matches is None:
+        body = soup.find('body')
+        if body is not None:
+            text = u''.join(map(unicode, body.findAll(text=True)))
+            if text.strip():
+                # Body has text, abort
+                return
+            images = body.findAll('img', src=True)
+            if 0 < len(images) < 2:
+                img = os.path.join(base, *images[0]['src'].split('/'))
+                if os.path.exists(img):
+                    return open(img, 'rb').read()
+
 def render_html_svg_workaround(path_to_html, log, width=590, height=750):
     from calibre.ebooks.oeb.base import SVG_NS
     raw = open(path_to_html, 'rb').read()
@@ -108,6 +122,7 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
             data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
         except:
             pass
+
     if data is None:
         renderer = render_html(path_to_html, width, height)
         data = getattr(renderer, 'data', None)