HTML Input: Guess mimetype correctly for references to image files without file extensions. Fixes #1059349 (missing images on html to mobi conversion)

2025-09-29 15:31:08 -04:00 · 2012-10-01 19:39:23 +05:30 · 2012-10-01 19:39:23 +05:30 · 92eb7e7ac1
commit 92eb7e7ac1
parent ee7c2ca0ec
1 changed files with 9 additions and 1 deletions
--- a/src/calibre/ebooks/conversion/plugins/html_input.py
+++ b/src/calibre/ebooks/conversion/plugins/html_input.py
@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import re, tempfile, os
+import re, tempfile, os, imghdr
 from functools import partial
 from itertools import izip
 from urllib import quote
@ -247,6 +247,14 @@ class HTMLInput(InputFormatPlugin):
            if media_type == 'text/plain':
                self.log.warn('Ignoring link to text file %r'%link_)
                return None
+            if media_type == self.BINARY_MIME:
+                # Check for the common case, images
+                try:
+                    img = imghdr.what(link)
+                except EnvironmentError:
+                    pass
+                else:
+                    media_type = self.guess_type('dummy.'+img)[0] or self.BINARY_MIME

            self.oeb.log.debug('Added', link)
            self.oeb.container = self.DirContainer(os.path.dirname(link),