From 84ec9819f2fe89f6834cb0b91125fb364b742c9b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 16 May 2023 06:55:01 +0530 Subject: [PATCH] Use the mechanize browser for downloading external resources instead of python stdlib Also remove deprecated import of cgi --- src/calibre/ebooks/oeb/polish/download.py | 25 ++++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/download.py b/src/calibre/ebooks/oeb/polish/download.py index ec409e96d1..26de19e1ef 100644 --- a/src/calibre/ebooks/oeb/polish/download.py +++ b/src/calibre/ebooks/oeb/polish/download.py @@ -2,7 +2,6 @@ # License: GPLv3 Copyright: 2016, Kovid Goyal -import cgi import mimetypes import os import posixpath @@ -15,7 +14,7 @@ from io import BytesIO from multiprocessing.dummy import Pool from tempfile import NamedTemporaryFile -from calibre import as_unicode, sanitize_file_name as sanitize_file_name_base +from calibre import as_unicode, browser, sanitize_file_name as sanitize_file_name_base from calibre.constants import iswindows from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, barename, iterlinks from calibre.ebooks.oeb.polish.utils import guess_type @@ -23,7 +22,7 @@ from calibre.ptempfile import TemporaryDirectory from calibre.web import get_download_filename_from_response from polyglot.binary import from_base64_bytes from polyglot.builtins import iteritems -from polyglot.urllib import unquote, urlopen, urlparse +from polyglot.urllib import unquote, urlparse def is_external(url): @@ -57,15 +56,17 @@ def get_external_resources(container): def get_filename(original_url_parsed, response): ans = get_download_filename_from_response(response) or posixpath.basename(original_url_parsed.path) or 'unknown' - ct = response.info().get('Content-Type', '') + headers = response.info() + try: + ct = headers.get_params()[0][0].lower() + except Exception: + ct = '' if ct: - ct = cgi.parse_header(ct)[0].lower() - if ct: - mt = guess_type(ans) - if mt != ct: - exts = mimetypes.guess_all_extensions(ct) - if exts: - ans += exts[0] + mt = guess_type(ans) + if mt != ct: + exts = mimetypes.guess_all_extensions(ct) + if exts: + ans += exts[0] return ans @@ -137,7 +138,7 @@ def download_one(tdir, timeout, progress_report, data_uri_map, url): break filename = 'data-uri.' + ext else: - src = urlopen(url, timeout=timeout) + src = browser().open(url, timeout=timeout) filename = get_filename(purl, src) sz = get_content_length(src) progress_report(url, 0, sz)