News download: Add support for images embedded in the HTML

This commit is contained in:
Kovid Goyal 2012-06-24 08:14:33 +05:30
parent f187d5f7e8
commit a7fe71a54c

View File

@ -12,6 +12,7 @@ from urllib import url2pathname, quote
from httplib import responses
from PIL import Image
from cStringIO import StringIO
from base64 import b64decode
from calibre import browser, relpath, unicode_path
from calibre.constants import filesystem_encoding, iswindows
@ -346,6 +347,13 @@ class RecursiveFetcher(object):
c = 0
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
if iurl.startswith('data:image/'):
try:
data = b64decode(iurl.partition(',')[-1])
except:
self.log.exception('Failed to decode embedded image')
continue
else:
if callable(self.image_url_processor):
iurl = self.image_url_processor(baseurl, iurl)
if not urlparse.urlsplit(iurl).scheme: