From b9eea6b91c2797484fac2c8f0ade0f6735ec8918 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 2 Mar 2017 17:02:27 +0530 Subject: [PATCH] Resolve wayback links to original location instead of wayback cache Better performance since wayback cache is so slow --- src/calibre/ebooks/metadata/sources/search_engines.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/sources/search_engines.py b/src/calibre/ebooks/metadata/sources/search_engines.py index faf1e5f927..28cf7f7570 100644 --- a/src/calibre/ebooks/metadata/sources/search_engines.py +++ b/src/calibre/ebooks/metadata/sources/search_engines.py @@ -5,6 +5,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals import json +import re import time from collections import defaultdict, namedtuple from future_builtins import map @@ -99,7 +100,13 @@ def wayback_machine_cached_url(url, br=None, log=prints, timeout=60): def wayback_url_processor(url): if url.startswith('/'): - url = 'https://web.archive.org' + url + # Use original URL instead of absolutizing to wayback URL as wayback is + # slow + m = re.search('https?:', url) + if m is None: + url = 'https://web.archive.org' + url + else: + url = url[m.start():] return url