News download: Add support for images embedded in the HTML

This commit is contained in:
Kovid Goyal 2012-06-24 08:14:33 +05:30
parent f187d5f7e8
commit a7fe71a54c

View File

@ -12,6 +12,7 @@ from urllib import url2pathname, quote
from httplib import responses from httplib import responses
from PIL import Image from PIL import Image
from cStringIO import StringIO from cStringIO import StringIO
from base64 import b64decode
from calibre import browser, relpath, unicode_path from calibre import browser, relpath, unicode_path
from calibre.constants import filesystem_encoding, iswindows from calibre.constants import filesystem_encoding, iswindows
@ -346,6 +347,13 @@ class RecursiveFetcher(object):
c = 0 c = 0
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src'] iurl = tag['src']
if iurl.startswith('data:image/'):
try:
data = b64decode(iurl.partition(',')[-1])
except:
self.log.exception('Failed to decode embedded image')
continue
else:
if callable(self.image_url_processor): if callable(self.image_url_processor):
iurl = self.image_url_processor(baseurl, iurl) iurl = self.image_url_processor(baseurl, iurl)
if not urlparse.urlsplit(iurl).scheme: if not urlparse.urlsplit(iurl).scheme: