This commit is contained in:
unkn0w7n 2024-03-10 09:52:46 +05:30
parent 6a88069f01
commit 4559d2cda7
2 changed files with 30 additions and 24 deletions

View File

@ -19,8 +19,10 @@ def classes(classes):
def extract_json(raw): def extract_json(raw):
s = raw.find("window['__natgeo__']") s = raw.find("window['__natgeo__']")
script = raw[s:raw.find('</script>', s)] script = raw[s:raw.find('</script>', s)]
return json.loads( data = json.loads(script[script.find('{'):].rstrip(';'))['page']['content']
script[script.find('{'):].rstrip(';'))['page']['content']['article'] if 'article' in data:
return data['article']
return data['prismarticle']
def parse_contributors(grp): def parse_contributors(grp):
@ -32,6 +34,7 @@ def parse_contributors(grp):
def parse_lead_image(media): def parse_lead_image(media):
if 'image' in media:
if 'dsc' in media['image']: if 'dsc' in media['image']:
yield '<p><div><img src="{}" alt="{}"></div>'.format( yield '<p><div><img src="{}" alt="{}"></div>'.format(
escape(media['image']['src'], True), escape(media['image']['dsc'], True)) escape(media['image']['src'], True), escape(media['image']['dsc'], True))

View File

@ -18,8 +18,10 @@ def classes(classes):
def extract_json(raw): def extract_json(raw):
s = raw.find("window['__natgeo__']") s = raw.find("window['__natgeo__']")
script = raw[s:raw.find('</script>', s)] script = raw[s:raw.find('</script>', s)]
return json.loads( data = json.loads(script[script.find('{'):].rstrip(';'))['page']['content']
script[script.find('{'):].rstrip(';'))['page']['content']['article'] if 'article' in data:
return data['article']
return data['prismarticle']
def parse_contributors(grp): def parse_contributors(grp):
@ -31,6 +33,7 @@ def parse_contributors(grp):
def parse_lead_image(media): def parse_lead_image(media):
if 'image' in media:
if 'dsc' in media['image']: if 'dsc' in media['image']:
yield '<p><div><img src="{}" alt="{}"></div>'.format( yield '<p><div><img src="{}" alt="{}"></div>'.format(
escape(media['image']['src'], True), escape(media['image']['dsc'], True)) escape(media['image']['src'], True), escape(media['image']['dsc'], True))