diff --git a/src/calibre/web/site_parsers/nytimes.py b/src/calibre/web/site_parsers/nytimes.py index e4f9f8ca4c..2526bf2df6 100644 --- a/src/calibre/web/site_parsers/nytimes.py +++ b/src/calibre/web/site_parsers/nytimes.py @@ -9,7 +9,7 @@ from xml.sax.saxutils import escape, quoteattr from calibre.utils.iso8601 import parse_iso8601 -module_version = 14 # needed for live updates +module_version = 15 # needed for live updates pprint @@ -198,8 +198,8 @@ def article_parse(data): def clean_js_json(text): text = re.sub(r'\bundefined\b', 'null', text) text = re.sub( - r',?\s*"[^"]+"\s*:\s*function\s*\([^)]*\)\s*\{.*?\}', - '', + r'{\"checkGate\":.*', + 'null}}', text, flags=re.DOTALL ) @@ -207,7 +207,7 @@ def clean_js_json(text): def json_to_html(raw): - data = json.JSONDecoder(strict=False).raw_decode(raw)[0] + data = json.loads(clean_js_json(raw)) # open('/t/raw.json', 'w').write(json.dumps(data, indent=2)) try: data = data['initialData']['data']