Use the mechanize browser for downloading external resources instead of python stdlib

Also remove deprecated import of cgi
This commit is contained in:
Kovid Goyal 2023-05-16 06:55:01 +05:30
parent dc8be3bbcd
commit 84ec9819f2
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -2,7 +2,6 @@
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net> # License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
import cgi
import mimetypes import mimetypes
import os import os
import posixpath import posixpath
@ -15,7 +14,7 @@ from io import BytesIO
from multiprocessing.dummy import Pool from multiprocessing.dummy import Pool
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
from calibre import as_unicode, sanitize_file_name as sanitize_file_name_base from calibre import as_unicode, browser, sanitize_file_name as sanitize_file_name_base
from calibre.constants import iswindows from calibre.constants import iswindows
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, barename, iterlinks from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, barename, iterlinks
from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.polish.utils import guess_type
@ -23,7 +22,7 @@ from calibre.ptempfile import TemporaryDirectory
from calibre.web import get_download_filename_from_response from calibre.web import get_download_filename_from_response
from polyglot.binary import from_base64_bytes from polyglot.binary import from_base64_bytes
from polyglot.builtins import iteritems from polyglot.builtins import iteritems
from polyglot.urllib import unquote, urlopen, urlparse from polyglot.urllib import unquote, urlparse
def is_external(url): def is_external(url):
@ -57,9 +56,11 @@ def get_external_resources(container):
def get_filename(original_url_parsed, response): def get_filename(original_url_parsed, response):
ans = get_download_filename_from_response(response) or posixpath.basename(original_url_parsed.path) or 'unknown' ans = get_download_filename_from_response(response) or posixpath.basename(original_url_parsed.path) or 'unknown'
ct = response.info().get('Content-Type', '') headers = response.info()
if ct: try:
ct = cgi.parse_header(ct)[0].lower() ct = headers.get_params()[0][0].lower()
except Exception:
ct = ''
if ct: if ct:
mt = guess_type(ans) mt = guess_type(ans)
if mt != ct: if mt != ct:
@ -137,7 +138,7 @@ def download_one(tdir, timeout, progress_report, data_uri_map, url):
break break
filename = 'data-uri.' + ext filename = 'data-uri.' + ext
else: else:
src = urlopen(url, timeout=timeout) src = browser().open(url, timeout=timeout)
filename = get_filename(purl, src) filename = get_filename(purl, src)
sz = get_content_length(src) sz = get_content_length(src)
progress_report(url, 0, sz) progress_report(url, 0, sz)