mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Resolve wayback links to original location instead of wayback cache
Better performance since wayback cache is so slow
This commit is contained in:
parent
5c9c40431f
commit
b9eea6b91c
@ -5,6 +5,7 @@
|
|||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
from collections import defaultdict, namedtuple
|
from collections import defaultdict, namedtuple
|
||||||
from future_builtins import map
|
from future_builtins import map
|
||||||
@ -99,7 +100,13 @@ def wayback_machine_cached_url(url, br=None, log=prints, timeout=60):
|
|||||||
|
|
||||||
def wayback_url_processor(url):
|
def wayback_url_processor(url):
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
|
# Use original URL instead of absolutizing to wayback URL as wayback is
|
||||||
|
# slow
|
||||||
|
m = re.search('https?:', url)
|
||||||
|
if m is None:
|
||||||
url = 'https://web.archive.org' + url
|
url = 'https://web.archive.org' + url
|
||||||
|
else:
|
||||||
|
url = url[m.start():]
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user