HTML Input: Guess mimetype correctly for references to image files without file extensions. Fixes #1059349 (missing images on html to mobi conversion)

This commit is contained in:
Kovid Goyal 2012-10-01 19:39:23 +05:30
parent ee7c2ca0ec
commit 92eb7e7ac1

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, tempfile, os import re, tempfile, os, imghdr
from functools import partial from functools import partial
from itertools import izip from itertools import izip
from urllib import quote from urllib import quote
@ -247,6 +247,14 @@ class HTMLInput(InputFormatPlugin):
if media_type == 'text/plain': if media_type == 'text/plain':
self.log.warn('Ignoring link to text file %r'%link_) self.log.warn('Ignoring link to text file %r'%link_)
return None return None
if media_type == self.BINARY_MIME:
# Check for the common case, images
try:
img = imghdr.what(link)
except EnvironmentError:
pass
else:
media_type = self.guess_type('dummy.'+img)[0] or self.BINARY_MIME
self.oeb.log.debug('Added', link) self.oeb.log.debug('Added', link)
self.oeb.container = self.DirContainer(os.path.dirname(link), self.oeb.container = self.DirContainer(os.path.dirname(link),