diff --git a/src/calibre/web/site_parsers/nytimes.py b/src/calibre/web/site_parsers/nytimes.py index acbc8b3df0..7b94f97ae9 100644 --- a/src/calibre/web/site_parsers/nytimes.py +++ b/src/calibre/web/site_parsers/nytimes.py @@ -4,12 +4,12 @@ import json import re -from xml.sax.saxutils import escape, quoteattr +import sys from pprint import pprint +from xml.sax.saxutils import escape, quoteattr from calibre.utils.iso8601 import parse_iso8601 - module_version = 4 # needed for live updates pprint @@ -186,20 +186,26 @@ def extract_html(soup): return json_to_html(raw) -def download_url(url, br): +def download_url(url=None, br=None): # Get the URL from the Wayback machine from mechanize import Request + host = 'http://localhost:8090' + host = 'https://wayback1.calibre-ebook.com' + if url is None: + url = sys.argv[-1] rq = Request( - 'http://localhost:8090/nytimes', + host + '/nytimes', data=json.dumps({"url": url}), headers={'User-Agent': 'calibre', 'Content-Type': 'application/json'} ) + if br is None: + from calibre import browser + br = browser() br.set_handle_gzip(True) return br.open_novisit(rq, timeout=3 * 60).read() if __name__ == '__main__': - import sys f = sys.argv[-1] raw = open(f).read() if f.endswith('.html'):