From d5d577a0a2514933e18543541073e4ee5e00482c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 14 Sep 2022 19:50:28 +0530 Subject: [PATCH] Get unmodified HTML from the wayback machine --- src/calibre/ebooks/metadata/sources/search_engines.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/sources/search_engines.py b/src/calibre/ebooks/metadata/sources/search_engines.py index 4d958be2c6..fb12e68614 100644 --- a/src/calibre/ebooks/metadata/sources/search_engines.py +++ b/src/calibre/ebooks/metadata/sources/search_engines.py @@ -140,7 +140,10 @@ def wayback_machine_cached_url(url, br=None, log=prints, timeout=60): try: closest = data['archived_snapshots']['closest'] if closest['available']: - return closest['url'].replace('http:', 'https:') + ans = closest['url'].replace('http:', 'https:', 1) + # get unmodified HTML + ans = ans.replace(closest['timestamp'], closest['timestamp'] + 'id_', 1) + return ans except Exception: pass from pprint import pformat