diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe index 944502fa93..1f7826de1f 100644 --- a/recipes/nytimes.recipe +++ b/recipes/nytimes.recipe @@ -93,8 +93,6 @@ class NewYorkTimes(BasicNewsRecipe): conversion_options = {'flow_size': 0} def preprocess_raw_html(self, raw_html, url): - if '/live/' in url: - self.abort_article('Cant be bothered decoding the JSON for NYT live articles') if not hasattr(self, 'nyt_parser'): from calibre.live import load_module m = load_module('calibre.web.site_parsers.nytimes') diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index 28f5e3582e..6b42573e88 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -93,8 +93,6 @@ class NewYorkTimes(BasicNewsRecipe): conversion_options = {'flow_size': 0} def preprocess_raw_html(self, raw_html, url): - if '/live/' in url: - self.abort_article('Cant be bothered decoding the JSON for NYT live articles') if not hasattr(self, 'nyt_parser'): from calibre.live import load_module m = load_module('calibre.web.site_parsers.nytimes') diff --git a/src/calibre/web/site_parsers/nytimes.py b/src/calibre/web/site_parsers/nytimes.py index 1e1e6b881c..f849ac9098 100644 --- a/src/calibre/web/site_parsers/nytimes.py +++ b/src/calibre/web/site_parsers/nytimes.py @@ -5,11 +5,13 @@ import json import re from xml.sax.saxutils import escape, quoteattr +from pprint import pprint from calibre.utils.iso8601 import parse_iso8601 -module_version = 1 # needed for live updates +module_version = 2 # needed for live updates +pprint def is_heading(tn): @@ -99,7 +101,11 @@ def process_image_block(lines, block): def json_to_html(raw): data = json.loads(raw.replace(':undefined', ':null')) # open('/t/raw.json', 'w').write(json.dumps(data, indent=2)) - data = data['initialData']['data'] + try: + data = data['initialData']['data'] + except TypeError: + data = data['initialState'] + return live_json_to_html(data) article = next(iter(data.values())) body = article['sprinkledBody']['content'] lines = [] @@ -114,6 +120,65 @@ def json_to_html(raw): return '
' + '\n'.join(lines) + '' +def add_live_item(item, item_type, lines): + a = lines.append + if item_type == 'text': + a('' + item['value'] + '
') + elif item_type == 'list': + a('' + b['leadIn'] + '
') + if 'items' in b: + add_live_item({'value': b['items']}, 'items', lines) + return + if 'bulletedList' in b: + add_live_item({'value': b['bulletedList']}, 'bulletedList', lines) + return + if 'sections' in b: + for section in b['sections']: + add_live_item({'value': section['section']}, 'section', lines) + return + raise Exception('Unknown item: %s' % b) + else: + raise Exception('Unknown item: %s' % b) + + +def live_json_to_html(data): + for k, v in data["ROOT_QUERY"].items(): + if isinstance(v, dict) and 'id' in v: + root = data[v['id']] + s = data[root['storylines'][0]['id']] + s = data[s['storyline']['id']] + title = s['displayName'] + lines = ['