From a067f1d519c48a10b0bc399b20e95379e36417ec Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 28 Apr 2024 14:16:55 +0530 Subject: [PATCH] Make code re-useable --- src/calibre/web/site_parsers/nytimes.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/calibre/web/site_parsers/nytimes.py b/src/calibre/web/site_parsers/nytimes.py index c78e3edc08..919cabe61a 100644 --- a/src/calibre/web/site_parsers/nytimes.py +++ b/src/calibre/web/site_parsers/nytimes.py @@ -9,7 +9,7 @@ from xml.sax.saxutils import escape, quoteattr from calibre.utils.iso8601 import parse_iso8601 -module_version = 4 # needed for live updates +module_version = 5 # needed for live updates pprint @@ -185,15 +185,12 @@ def extract_html(soup): return json_to_html(raw) -def download_url(url=None, br=None): - # Get the URL from the Wayback machine +def download_url_from_wayback(category, url, br=None): from mechanize import Request host = 'http://localhost:8090' host = 'https://wayback1.calibre-ebook.com' - if url is None: - url = sys.argv[-1] rq = Request( - host + '/nytimes', + host + '/' + category, data=json.dumps({"url": url}), headers={'User-Agent': 'calibre', 'Content-Type': 'application/json'} ) @@ -204,6 +201,13 @@ def download_url(url=None, br=None): return br.open_novisit(rq, timeout=3 * 60).read() +def download_url(url=None, br=None): + # Get the URL from the Wayback machine + if url is None: + url = sys.argv[-1] + return download_url_from_wayback('nytimes', url, br) + + if __name__ == '__main__': f = sys.argv[-1] raw = open(f).read()