Update the host used for wayback downloads

This commit is contained in:
Kovid Goyal 2022-09-15 13:59:33 +05:30
parent 6c9fc1c833
commit d2977ebec4
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -4,12 +4,12 @@
import json import json
import re import re
from xml.sax.saxutils import escape, quoteattr import sys
from pprint import pprint from pprint import pprint
from xml.sax.saxutils import escape, quoteattr
from calibre.utils.iso8601 import parse_iso8601 from calibre.utils.iso8601 import parse_iso8601
module_version = 4 # needed for live updates module_version = 4 # needed for live updates
pprint pprint
@ -186,20 +186,26 @@ def extract_html(soup):
return json_to_html(raw) return json_to_html(raw)
def download_url(url, br): def download_url(url=None, br=None):
# Get the URL from the Wayback machine # Get the URL from the Wayback machine
from mechanize import Request from mechanize import Request
host = 'http://localhost:8090'
host = 'https://wayback1.calibre-ebook.com'
if url is None:
url = sys.argv[-1]
rq = Request( rq = Request(
'http://localhost:8090/nytimes', host + '/nytimes',
data=json.dumps({"url": url}), data=json.dumps({"url": url}),
headers={'User-Agent': 'calibre', 'Content-Type': 'application/json'} headers={'User-Agent': 'calibre', 'Content-Type': 'application/json'}
) )
if br is None:
from calibre import browser
br = browser()
br.set_handle_gzip(True) br.set_handle_gzip(True)
return br.open_novisit(rq, timeout=3 * 60).read() return br.open_novisit(rq, timeout=3 * 60).read()
if __name__ == '__main__': if __name__ == '__main__':
import sys
f = sys.argv[-1] f = sys.argv[-1]
raw = open(f).read() raw = open(f).read()
if f.endswith('.html'): if f.endswith('.html'):