This commit is contained in:
Kovid Goyal 2024-03-10 09:55:32 +05:30
commit 2aec8675cd
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 45 additions and 36 deletions

View File

@ -19,8 +19,10 @@ def classes(classes):
def extract_json(raw):
s = raw.find("window['__natgeo__']")
script = raw[s:raw.find('</script>', s)]
return json.loads(
script[script.find('{'):].rstrip(';'))['page']['content']['article']
data = json.loads(script[script.find('{'):].rstrip(';'))['page']['content']
if 'article' in data:
return data['article']
return data['prismarticle']
def parse_contributors(grp):
@ -32,6 +34,7 @@ def parse_contributors(grp):
def parse_lead_image(media):
if 'image' in media:
if 'dsc' in media['image']:
yield '<p><div><img src="{}" alt="{}"></div>'.format(
escape(media['image']['src'], True), escape(media['image']['dsc'], True))

View File

@ -18,8 +18,10 @@ def classes(classes):
def extract_json(raw):
s = raw.find("window['__natgeo__']")
script = raw[s:raw.find('</script>', s)]
return json.loads(
script[script.find('{'):].rstrip(';'))['page']['content']['article']
data = json.loads(script[script.find('{'):].rstrip(';'))['page']['content']
if 'article' in data:
return data['article']
return data['prismarticle']
def parse_contributors(grp):
@ -31,6 +33,7 @@ def parse_contributors(grp):
def parse_lead_image(media):
if 'image' in media:
if 'dsc' in media['image']:
yield '<p><div><img src="{}" alt="{}"></div>'.format(
escape(media['image']['src'], True), escape(media['image']['dsc'], True))

View File

@ -23,8 +23,10 @@ def classes(classes):
def extract_json(raw):
s = raw.find("window['__natgeo__']")
script = raw[s:raw.find('</script>', s)]
return json.loads(
script[script.find('{'):].rstrip(';'))['page']['content']['article']
data = json.loads(script[script.find('{'):].rstrip(';'))['page']['content']
if 'article' in data:
return data['article']
return data['prismarticle']
def parse_contributors(grp):
@ -36,6 +38,7 @@ def parse_contributors(grp):
def parse_lead_image(media):
if 'image' in media:
if 'dsc' in media['image']:
yield '<p><div><img src="{}" alt="{}"></div>'.format(
escape(media['image']['src'], True), escape(media['image']['dsc'], True))