Update the host used for wayback downloads

This commit is contained in:
Kovid Goyal 2022-09-15 13:59:33 +05:30
parent 6c9fc1c833
commit d2977ebec4
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -4,12 +4,12 @@
import json
import re
from xml.sax.saxutils import escape, quoteattr
import sys
from pprint import pprint
from xml.sax.saxutils import escape, quoteattr
from calibre.utils.iso8601 import parse_iso8601
module_version = 4 # needed for live updates
pprint
@ -186,20 +186,26 @@ def extract_html(soup):
return json_to_html(raw)
def download_url(url, br):
def download_url(url=None, br=None):
# Get the URL from the Wayback machine
from mechanize import Request
host = 'http://localhost:8090'
host = 'https://wayback1.calibre-ebook.com'
if url is None:
url = sys.argv[-1]
rq = Request(
'http://localhost:8090/nytimes',
host + '/nytimes',
data=json.dumps({"url": url}),
headers={'User-Agent': 'calibre', 'Content-Type': 'application/json'}
)
if br is None:
from calibre import browser
br = browser()
br.set_handle_gzip(True)
return br.open_novisit(rq, timeout=3 * 60).read()
if __name__ == '__main__':
import sys
f = sys.argv[-1]
raw = open(f).read()
if f.endswith('.html'):