From a1365513ddde400a0d88c09952d943ca52b69443 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 7 Feb 2010 12:49:54 -0700
Subject: [PATCH] EPUB metadata: Extract the cover image from the html it is
 embededd in if possible, instead of rendering the html. Removes the white
 margins on covers and speeds up cover extraction

---
 src/calibre/ebooks/__init__.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py
index d5651568fa..38a8ef3662 100644
--- a/src/calibre/ebooks/__init__.py
+++ b/src/calibre/ebooks/__init__.py
@@ -70,6 +70,19 @@ def extract_cover_from_embedded_svg(html, base, log):
         if href and os.access(path, os.R_OK):
             return open(path, 'rb').read()
 
+def extract_calibre_cover(raw, base, log):
+    from calibre.ebooks.BeautifulSoup import BeautifulSoup
+    soup = BeautifulSoup(raw)
+    matches = soup.find(name=['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span',
+        'font', 'br'])
+    images = soup.findAll('img')
+    if matches is None and len(images) == 1 and \
+            images[0].get('alt', '')=='cover':
+        img = images[0]
+        img = os.path.join(base, *img['src'].split('/'))
+        if os.path.exists(img):
+            return open(img, 'rb').read()
+
 def render_html_svg_workaround(path_to_html, log, width=590, height=750):
     from calibre.ebooks.oeb.base import SVG_NS
     raw = open(path_to_html, 'rb').read()
@@ -80,6 +93,11 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
                    os.path.dirname(path_to_html), log)
         except:
             pass
+    if data is None:
+        try:
+            data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
+        except:
+            pass
     if data is None:
         renderer = render_html(path_to_html, width, height)
         data = getattr(renderer, 'data', None)