From 6a88069f0192c4a01ef3709f49a6aca3e5aa05b1 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Sun, 10 Mar 2024 09:50:28 +0530
Subject: [PATCH 1/2] Update natgeomag.recipe
---
recipes/natgeomag.recipe | 27 +++++++++++++++------------
1 file changed, 15 insertions(+), 12 deletions(-)
diff --git a/recipes/natgeomag.recipe b/recipes/natgeomag.recipe
index 5f3aaee5eb..4e5fc6bb4d 100644
--- a/recipes/natgeomag.recipe
+++ b/recipes/natgeomag.recipe
@@ -23,8 +23,10 @@ def classes(classes):
def extract_json(raw):
s = raw.find("window['__natgeo__']")
script = raw[s:raw.find('', s)]
- return json.loads(
- script[script.find('{'):].rstrip(';'))['page']['content']['article']
+ data = json.loads(script[script.find('{'):].rstrip(';'))['page']['content']
+ if 'article' in data:
+ return data['article']
+ return data['prismarticle']
def parse_contributors(grp):
@@ -36,16 +38,17 @@ def parse_contributors(grp):
def parse_lead_image(media):
- if 'dsc' in media['image']:
- yield '

'.format(
- escape(media['image']['src'], True), escape(media['image']['dsc'], True))
- else:
- yield '
'.format(escape(media['image']['src'], True))
- if 'caption' in media:
- yield '' + media['caption'] + '
'
- if 'credit' in media:
- yield '' + media['credit'] + '
'
- yield ''
+ if 'image' in media:
+ if 'dsc' in media['image']:
+ yield ''.format(
+ escape(media['image']['src'], True), escape(media['image']['dsc'], True))
+ else:
+ yield ''.format(escape(media['image']['src'], True))
+ if 'caption' in media:
+ yield '' + media['caption'] + '
'
+ if 'credit' in media:
+ yield '' + media['credit'] + '
'
+ yield ''
def parse_body(item):
From 4559d2cda7a08281ed751b3a29cd59c3843955d5 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Sun, 10 Mar 2024 09:52:46 +0530
Subject: [PATCH 2/2] ...
---
recipes/natgeo.recipe | 27 +++++++++++++++------------
recipes/natgeohis.recipe | 27 +++++++++++++++------------
2 files changed, 30 insertions(+), 24 deletions(-)
diff --git a/recipes/natgeo.recipe b/recipes/natgeo.recipe
index f21172927a..b8a42b1311 100644
--- a/recipes/natgeo.recipe
+++ b/recipes/natgeo.recipe
@@ -19,8 +19,10 @@ def classes(classes):
def extract_json(raw):
s = raw.find("window['__natgeo__']")
script = raw[s:raw.find('', s)]
- return json.loads(
- script[script.find('{'):].rstrip(';'))['page']['content']['article']
+ data = json.loads(script[script.find('{'):].rstrip(';'))['page']['content']
+ if 'article' in data:
+ return data['article']
+ return data['prismarticle']
def parse_contributors(grp):
@@ -32,16 +34,17 @@ def parse_contributors(grp):
def parse_lead_image(media):
- if 'dsc' in media['image']:
- yield ''.format(
- escape(media['image']['src'], True), escape(media['image']['dsc'], True))
- else:
- yield ''.format(escape(media['image']['src'], True))
- if 'caption' in media:
- yield '' + media['caption'] + '
'
- if 'credit' in media:
- yield '' + media['credit'] + '
'
- yield ''
+ if 'image' in media:
+ if 'dsc' in media['image']:
+ yield ''.format(
+ escape(media['image']['src'], True), escape(media['image']['dsc'], True))
+ else:
+ yield ''.format(escape(media['image']['src'], True))
+ if 'caption' in media:
+ yield '' + media['caption'] + '
'
+ if 'credit' in media:
+ yield '' + media['credit'] + '
'
+ yield ''
def parse_body(item):
diff --git a/recipes/natgeohis.recipe b/recipes/natgeohis.recipe
index d40f5c727d..2055309e2c 100644
--- a/recipes/natgeohis.recipe
+++ b/recipes/natgeohis.recipe
@@ -18,8 +18,10 @@ def classes(classes):
def extract_json(raw):
s = raw.find("window['__natgeo__']")
script = raw[s:raw.find('', s)]
- return json.loads(
- script[script.find('{'):].rstrip(';'))['page']['content']['article']
+ data = json.loads(script[script.find('{'):].rstrip(';'))['page']['content']
+ if 'article' in data:
+ return data['article']
+ return data['prismarticle']
def parse_contributors(grp):
@@ -31,16 +33,17 @@ def parse_contributors(grp):
def parse_lead_image(media):
- if 'dsc' in media['image']:
- yield ''.format(
- escape(media['image']['src'], True), escape(media['image']['dsc'], True))
- else:
- yield ''.format(escape(media['image']['src'], True))
- if 'caption' in media:
- yield '' + media['caption'] + '
'
- if 'credit' in media:
- yield '' + media['credit'] + '
'
- yield ''
+ if 'image' in media:
+ if 'dsc' in media['image']:
+ yield ''.format(
+ escape(media['image']['src'], True), escape(media['image']['dsc'], True))
+ else:
+ yield ''.format(escape(media['image']['src'], True))
+ if 'caption' in media:
+ yield '' + media['caption'] + '
'
+ if 'credit' in media:
+ yield '' + media['credit'] + '
'
+ yield ''
def parse_body(item):