workaround for nyt todays paper page using undefinedinstead of null in its json

This commit is contained in:
Kovid Goyal 2022-02-18 19:59:22 +05:30
parent 803da449ba
commit 07b4239634
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 4 additions and 2 deletions

View File

@ -186,7 +186,8 @@ class NewYorkTimes(BasicNewsRecipe):
soup = self.read_nyt_metadata()
script = soup.findAll('script', text=lambda x: x and 'window.__preloadedData' in x)[0]
script = type(u'')(script)
data = json.loads(script[script.find('{'):script.rfind(';')].strip().rstrip(';'))['initialState']
json_data = script[script.find('{'):script.rfind(';')].strip().rstrip(';')
data = json.loads(json_data.replace(':undefined', ':null'))['initialState']
containers, sections = {}, {}
article_map = {}
gc_pat = re.compile(r'groupings.(\d+).containers.(\d+)')

View File

@ -186,7 +186,8 @@ class NewYorkTimes(BasicNewsRecipe):
soup = self.read_nyt_metadata()
script = soup.findAll('script', text=lambda x: x and 'window.__preloadedData' in x)[0]
script = type(u'')(script)
data = json.loads(script[script.find('{'):script.rfind(';')].strip().rstrip(';'))['initialState']
json_data = script[script.find('{'):script.rfind(';')].strip().rstrip(';')
data = json.loads(json_data.replace(':undefined', ':null'))['initialState']
containers, sections = {}, {}
article_map = {}
gc_pat = re.compile(r'groupings.(\d+).containers.(\d+)')