From 8e93f979f5f91a5ca4ff3ebf4ac43616b0db3d29 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Sat, 4 May 2024 12:15:32 +0530
Subject: [PATCH 1/2] Update NatGeo
---
recipes/natgeo.recipe | 2 +-
recipes/natgeohis.recipe | 2 +-
recipes/natgeomag.recipe | 16 ++++++++++------
3 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/recipes/natgeo.recipe b/recipes/natgeo.recipe
index cdcc896e42..4e02b194a7 100644
--- a/recipes/natgeo.recipe
+++ b/recipes/natgeo.recipe
@@ -106,7 +106,7 @@ def parse_article(edg):
yield '
' + escape(sc['sclTtl']) + '
'
yield '' + escape(sc['sclDsc']) + '
'
yield ''
- for line in parse_contributors(edg['cntrbGrp']):
+ for line in parse_contributors(edg.get('cntrbGrp', {})):
yield line
ts = parse_iso8601(edg['mdDt'], as_utc=False).strftime('%B %d, %Y')
yield '
Published: ' + escape(ts) + '
'
diff --git a/recipes/natgeohis.recipe b/recipes/natgeohis.recipe
index 0bf60aa91c..ee5fcec9af 100644
--- a/recipes/natgeohis.recipe
+++ b/recipes/natgeohis.recipe
@@ -105,7 +105,7 @@ def parse_article(edg):
yield '' + escape(sc['sclTtl']) + '
'
yield '' + escape(sc['sclDsc']) + '
'
yield ''
- for line in parse_contributors(edg['cntrbGrp']):
+ for line in parse_contributors(edg.get('cntrbGrp', {})):
yield line
ts = parse_iso8601(edg['mdDt'], as_utc=False).strftime('%B %d, %Y')
yield '
Published: ' + escape(ts) + '
'
diff --git a/recipes/natgeomag.recipe b/recipes/natgeomag.recipe
index 01b3fd95d3..d8933ee1c5 100644
--- a/recipes/natgeomag.recipe
+++ b/recipes/natgeomag.recipe
@@ -110,7 +110,7 @@ def parse_article(edg):
yield '' + escape(sc['sclTtl']) + '
'
yield '' + escape(sc['sclDsc']) + '
'
yield ''
- for line in parse_contributors(edg['cntrbGrp']):
+ for line in parse_contributors(edg.get('cntrbGrp', {})):
yield line
ts = parse_iso8601(edg['mdDt'], as_utc=False).strftime('%B %d, %Y')
yield '
Published: ' + escape(ts) + '
'
@@ -187,14 +187,20 @@ class NatGeo(BasicNewsRecipe):
name = soup.find(attrs={'class':lambda x: x and 'Header__Description' in x.split()})
self.title = 'National Geographic ' + self.tag_to_string(name)
ans = {}
- ans2 = None
if photoart := soup.find(attrs={'class':lambda x: x and 'BgImagePromo__Container__Text__Link' in x.split()}):
- ans2 = []
+ section = 'Photo Essay'
title = self.tag_to_string(photoart)
url = photoart['href']
if url.startswith('/'):
url = 'https://www.nationalgeographic.com' + url
- ans2.append(('Photo Essay', [{'title': title, 'url': url}]))
+ articles = ans.setdefault(section, [])
+ articles.append({'title': title, 'url': url})
+ for promo in soup.findAll(**classes('OneUpPromoCard__Content')):
+ url = promo.a['href']
+ section = self.tag_to_string(promo.find(**classes('SectionLabel')))
+ title = self.tag_to_string(promo.find(**classes('Card__Content__Heading')))
+ articles = ans.setdefault(section, [])
+ articles.append({'title': title, 'url': url})
for gird in soup.findAll(attrs={'class':'GridPromoTile'}):
for article in soup.findAll('article'):
a = article.find('a')
@@ -208,8 +214,6 @@ class NatGeo(BasicNewsRecipe):
articles = ans.setdefault(section, [])
articles.append({'title': title, 'url': url})
self.log(pformat(ans))
- if ans2:
- return list(ans.items()) + ans2
return list(ans.items())
def preprocess_raw_html(self, raw_html, url):
From 308945a6cf391d36eed3298ad5354436cd26e02c Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Sat, 4 May 2024 12:16:26 +0530
Subject: [PATCH 2/2] Update econ
---
recipes/economist.recipe | 19 +++++++++----------
recipes/economist_free.recipe | 19 +++++++++----------
2 files changed, 18 insertions(+), 20 deletions(-)
diff --git a/recipes/economist.recipe b/recipes/economist.recipe
index 960df8f624..5e62e3e04f 100644
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@@ -232,21 +232,20 @@ class Economist(BasicNewsRecipe):
if use_archive:
def parse_index(self):
# return self.economist_test_article()
- url = 'https://www.economist.com/weeklyedition/archive'
- if edition_date:
- url = 'https://www.economist.com/weeklyedition/' + edition_date
- soup = self.index_to_soup(url)
- script_tag = soup.find("script", id="__NEXT_DATA__")
- if script_tag is None:
- raise ValueError('No script tag with JSON data found in the weeklyedition archive')
- data = json.loads(script_tag.string)
- content_id = data['props']['pageProps']['content']['id'].split('/')[-1]
+ # url = 'https://www.economist.com/weeklyedition/archive'
query = {
'query': 'query LatestWeeklyAutoEditionQuery($ref:String!){canonical(ref:$ref){hasPart(from:0 size:1 sort:"datePublished:desc"){parts{...WeeklyEditionFragment __typename}__typename}__typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}', # noqa
'operationName': 'LatestWeeklyAutoEditionQuery',
- 'variables': '{{"ref":"/content/{}"}}'.format(content_id),
+ 'variables': '{"ref":"/content/d06tg8j85rifiq3oo544c6b9j61dno2n"}',
}
if edition_date:
+ url = 'https://www.economist.com/weeklyedition/' + edition_date
+ soup = self.index_to_soup(url)
+ script_tag = soup.find("script", id="__NEXT_DATA__")
+ if script_tag is None:
+ raise ValueError('No script tag with JSON data found in the weeklyedition archive')
+ data = json.loads(script_tag.string)
+ content_id = data['props']['pageProps']['content']['id'].split('/')[-1]
query = {
'query': 'query SpecificWeeklyEditionQuery($path:String!){section:canonical(ref:$path){...WeeklyEditionFragment __typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}', # noqa
'operationName': 'SpecificWeeklyEditionQuery',
diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe
index 960df8f624..5e62e3e04f 100644
--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@@ -232,21 +232,20 @@ class Economist(BasicNewsRecipe):
if use_archive:
def parse_index(self):
# return self.economist_test_article()
- url = 'https://www.economist.com/weeklyedition/archive'
- if edition_date:
- url = 'https://www.economist.com/weeklyedition/' + edition_date
- soup = self.index_to_soup(url)
- script_tag = soup.find("script", id="__NEXT_DATA__")
- if script_tag is None:
- raise ValueError('No script tag with JSON data found in the weeklyedition archive')
- data = json.loads(script_tag.string)
- content_id = data['props']['pageProps']['content']['id'].split('/')[-1]
+ # url = 'https://www.economist.com/weeklyedition/archive'
query = {
'query': 'query LatestWeeklyAutoEditionQuery($ref:String!){canonical(ref:$ref){hasPart(from:0 size:1 sort:"datePublished:desc"){parts{...WeeklyEditionFragment __typename}__typename}__typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}', # noqa
'operationName': 'LatestWeeklyAutoEditionQuery',
- 'variables': '{{"ref":"/content/{}"}}'.format(content_id),
+ 'variables': '{"ref":"/content/d06tg8j85rifiq3oo544c6b9j61dno2n"}',
}
if edition_date:
+ url = 'https://www.economist.com/weeklyedition/' + edition_date
+ soup = self.index_to_soup(url)
+ script_tag = soup.find("script", id="__NEXT_DATA__")
+ if script_tag is None:
+ raise ValueError('No script tag with JSON data found in the weeklyedition archive')
+ data = json.loads(script_tag.string)
+ content_id = data['props']['pageProps']['content']['id'].split('/')[-1]
query = {
'query': 'query SpecificWeeklyEditionQuery($path:String!){section:canonical(ref:$path){...WeeklyEditionFragment __typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}', # noqa
'operationName': 'SpecificWeeklyEditionQuery',