EPUB Input: Automatically extract cover image from simple HTML title page that consists of only a single <img> tag

2025-07-09 03:04:10 -04:00 · 2012-04-12 19:10:56 +05:30 · 2012-04-12 19:10:56 +05:30 · 24925ddbf9
commit 24925ddbf9
parent 63d578f99b
1 changed files with 15 additions and 0 deletions
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -93,6 +93,20 @@ def extract_calibre_cover(raw, base, log):
        if os.path.exists(img):
            return open(img, 'rb').read()

+    # Look for a simple cover, i.e. a body with no text and only one <img> tag
+    if matches is None:
+        body = soup.find('body')
+        if body is not None:
+            text = u''.join(map(unicode, body.findAll(text=True)))
+            if text.strip():
+                # Body has text, abort
+                return
+            images = body.findAll('img', src=True)
+            if 0 < len(images) < 2:
+                img = os.path.join(base, *images[0]['src'].split('/'))
+                if os.path.exists(img):
+                    return open(img, 'rb').read()
+
 def render_html_svg_workaround(path_to_html, log, width=590, height=750):
    from calibre.ebooks.oeb.base import SVG_NS
    raw = open(path_to_html, 'rb').read()
@ -108,6 +122,7 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
            data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
        except:
            pass
+
    if data is None:
        renderer = render_html(path_to_html, width, height)
        data = getattr(renderer, 'data', None)