mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
News system: Make handling of data urls in img tags more robust
This commit is contained in:
parent
f16d7fb89f
commit
d7d329554b
@ -17,6 +17,7 @@ import sys
|
|||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
from calibre import browser, relpath, unicode_path
|
from calibre import browser, relpath, unicode_path
|
||||||
from calibre.constants import filesystem_encoding, iswindows
|
from calibre.constants import filesystem_encoding, iswindows
|
||||||
@ -28,7 +29,6 @@ from calibre.utils.img import image_from_data, image_to_data
|
|||||||
from calibre.utils.imghdr import what
|
from calibre.utils.imghdr import what
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
from calibre.web.fetch.utils import rescale_image
|
from calibre.web.fetch.utils import rescale_image
|
||||||
from polyglot.binary import from_base64_bytes
|
|
||||||
from polyglot.builtins import as_bytes, unicode_type
|
from polyglot.builtins import as_bytes, unicode_type
|
||||||
from polyglot.http_client import responses
|
from polyglot.http_client import responses
|
||||||
from polyglot.urllib import (
|
from polyglot.urllib import (
|
||||||
@ -402,9 +402,9 @@ class RecursiveFetcher(object):
|
|||||||
c = 0
|
c = 0
|
||||||
for tag in soup.findAll('img', src=True):
|
for tag in soup.findAll('img', src=True):
|
||||||
iurl = tag['src']
|
iurl = tag['src']
|
||||||
if iurl.startswith('data:image/'):
|
if iurl.startswith('data:'):
|
||||||
try:
|
try:
|
||||||
data = from_base64_bytes(iurl.partition(',')[-1])
|
data = urlopen(iurl).read()
|
||||||
except Exception:
|
except Exception:
|
||||||
self.log.exception('Failed to decode embedded image')
|
self.log.exception('Failed to decode embedded image')
|
||||||
continue
|
continue
|
||||||
|
Loading…
x
Reference in New Issue
Block a user