diff --git a/recipes/1843.recipe b/recipes/1843.recipe
index 75c7017f09..c698f2c67c 100644
--- a/recipes/1843.recipe
+++ b/recipes/1843.recipe
@@ -11,13 +11,30 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag
from calibre.web.feeds.news import BasicNewsRecipe
+def process_list(li_node):
+ li_html = ''
+ for li in li_node['items']:
+ if li.get('textHtml'):
+ li_html += f'
{li.get("textHtml")}'
+ else:
+ li_html += f'{li.get("text", "")}'
+ return li_html
+
+
+def process_info_box(bx):
+ info = ''
+ for x in safe_dict(bx, 'components'):
+ info += f'{process_node(x)}
'
+ return info
+
+
def process_node(node):
ntype = node.get('type', '')
if ntype == 'CROSSHEAD':
if node.get('textHtml'):
return f'{node.get("textHtml")}
'
return f'{node.get("text", "")}
'
- elif ntype == 'PARAGRAPH':
+ elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
if node.get('textHtml'):
return f'{node.get("textHtml")}
'
return f'{node.get("text", "")}
'
@@ -34,9 +51,14 @@ def process_node(node):
return f'{node.get("text", "")}
'
elif ntype == 'DIVIDER':
return '
'
+ elif ntype == 'INFOGRAPHIC':
+ if node.get('fallback'):
+ return process_node(node['fallback'])
elif ntype == 'INFOBOX':
- for x in safe_dict(node, 'components'):
- return f'{process_node(x)}
'
+ return process_info_box(node)
+ elif ntype == 'UNORDERED_LIST':
+ if node.get('items'):
+ return process_list(node)
elif ntype:
print('** ', ntype)
return ''
@@ -121,7 +143,7 @@ def process_url(url):
class Econ1843(BasicNewsRecipe):
title = 'Economist 1843'
- language = 'en'
+ language = 'en_GB'
encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
diff --git a/recipes/economist.recipe b/recipes/economist.recipe
index 0648e9e228..6cbed35d6d 100644
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@@ -86,13 +86,30 @@ def load_article_from_json(raw, root):
process_node(node, article)
+def process_web_list(li_node):
+ li_html = ''
+ for li in li_node['items']:
+ if li.get('textHtml'):
+ li_html += f'{li.get("textHtml")}'
+ else:
+ li_html += f'{li.get("text", "")}'
+ return li_html
+
+
+def process_info_box(bx):
+ info = ''
+ for x in safe_dict(bx, 'components'):
+ info += f'{process_web_node(x)}
'
+ return info
+
+
def process_web_node(node):
ntype = node.get('type', '')
if ntype == 'CROSSHEAD':
if node.get('textHtml'):
return f'{node.get("textHtml")}
'
return f'{node.get("text", "")}
'
- elif ntype == 'PARAGRAPH':
+ elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
if node.get('textHtml'):
return f'{node.get("textHtml")}
'
return f'{node.get("text", "")}
'
@@ -109,9 +126,14 @@ def process_web_node(node):
return f'{node.get("text", "")}
'
elif ntype == 'DIVIDER':
return '
'
+ elif ntype == 'INFOGRAPHIC':
+ if node.get('fallback'):
+ return process_web_node(node['fallback'])
elif ntype == 'INFOBOX':
- for x in safe_dict(node, 'components'):
- return f'{process_web_node(x)}
'
+ return process_info_box(node)
+ elif ntype == 'UNORDERED_LIST':
+ if node.get('items'):
+ return process_web_list(node)
elif ntype:
print('** ', ntype)
return ''
@@ -120,7 +142,10 @@ def process_web_node(node):
def load_article_from_web_json(raw):
# open('/t/raw.json', 'w').write(raw)
body = ''
- data = json.loads(raw)['props']['pageProps']['content']
+ try:
+ data = json.loads(raw)['props']['pageProps']['cp2Content']
+ except Exception:
+ data = json.loads(raw)['props']['pageProps']['content']
body += f'{data.get("flyTitle", "")}
'
body += f'{data["headline"]}
'
if data.get('rubric') and data.get('rubric') is not None:
@@ -182,7 +207,7 @@ def process_url(url):
class Economist(BasicNewsRecipe):
title = 'The Economist'
- language = 'en'
+ language = 'en_GB'
encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
@@ -274,7 +299,7 @@ class Economist(BasicNewsRecipe):
def economist_test_article(self):
return [('Articles', [{'title':'test',
- 'url':'https://www.economist.com/the-americas/2024/04/14/elon-musk-is-feuding-with-brazils-powerful-supreme-court'
+ 'url':'https://www.economist.com/leaders/2025/03/13/americas-bullied-allies-need-to-toughen-up'
}])]
def economist_return_index(self, ans):
diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe
index 0648e9e228..6cbed35d6d 100644
--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@@ -86,13 +86,30 @@ def load_article_from_json(raw, root):
process_node(node, article)
+def process_web_list(li_node):
+ li_html = ''
+ for li in li_node['items']:
+ if li.get('textHtml'):
+ li_html += f'{li.get("textHtml")}'
+ else:
+ li_html += f'{li.get("text", "")}'
+ return li_html
+
+
+def process_info_box(bx):
+ info = ''
+ for x in safe_dict(bx, 'components'):
+ info += f'{process_web_node(x)}
'
+ return info
+
+
def process_web_node(node):
ntype = node.get('type', '')
if ntype == 'CROSSHEAD':
if node.get('textHtml'):
return f'{node.get("textHtml")}
'
return f'{node.get("text", "")}
'
- elif ntype == 'PARAGRAPH':
+ elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
if node.get('textHtml'):
return f'{node.get("textHtml")}
'
return f'{node.get("text", "")}
'
@@ -109,9 +126,14 @@ def process_web_node(node):
return f'{node.get("text", "")}
'
elif ntype == 'DIVIDER':
return '
'
+ elif ntype == 'INFOGRAPHIC':
+ if node.get('fallback'):
+ return process_web_node(node['fallback'])
elif ntype == 'INFOBOX':
- for x in safe_dict(node, 'components'):
- return f'{process_web_node(x)}
'
+ return process_info_box(node)
+ elif ntype == 'UNORDERED_LIST':
+ if node.get('items'):
+ return process_web_list(node)
elif ntype:
print('** ', ntype)
return ''
@@ -120,7 +142,10 @@ def process_web_node(node):
def load_article_from_web_json(raw):
# open('/t/raw.json', 'w').write(raw)
body = ''
- data = json.loads(raw)['props']['pageProps']['content']
+ try:
+ data = json.loads(raw)['props']['pageProps']['cp2Content']
+ except Exception:
+ data = json.loads(raw)['props']['pageProps']['content']
body += f'{data.get("flyTitle", "")}
'
body += f'{data["headline"]}
'
if data.get('rubric') and data.get('rubric') is not None:
@@ -182,7 +207,7 @@ def process_url(url):
class Economist(BasicNewsRecipe):
title = 'The Economist'
- language = 'en'
+ language = 'en_GB'
encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
@@ -274,7 +299,7 @@ class Economist(BasicNewsRecipe):
def economist_test_article(self):
return [('Articles', [{'title':'test',
- 'url':'https://www.economist.com/the-americas/2024/04/14/elon-musk-is-feuding-with-brazils-powerful-supreme-court'
+ 'url':'https://www.economist.com/leaders/2025/03/13/americas-bullied-allies-need-to-toughen-up'
}])]
def economist_return_index(self, ans):
diff --git a/recipes/economist_news.recipe b/recipes/economist_news.recipe
index e08f7ba91d..f2b50f57fa 100644
--- a/recipes/economist_news.recipe
+++ b/recipes/economist_news.recipe
@@ -121,7 +121,7 @@ def process_url(url):
class EconomistNews(BasicNewsRecipe):
title = 'The Economist News'
- language = 'en'
+ language = 'en_GB'
encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
diff --git a/recipes/economist_search.recipe b/recipes/economist_search.recipe
index 5ac61ad57f..f5ea674d54 100644
--- a/recipes/economist_search.recipe
+++ b/recipes/economist_search.recipe
@@ -12,13 +12,29 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag
from calibre.web.feeds.news import BasicNewsRecipe
+def process_list(li_node):
+ li_html = ''
+ for li in li_node['items']:
+ if li.get('textHtml'):
+ li_html += f'{li.get("textHtml")}'
+ else:
+ li_html += f'{li.get("text", "")}'
+ return li_html
+
+
+def process_info_box(bx):
+ info = ''
+ for x in safe_dict(bx, 'components'):
+ info += f'{process_node(x)}
'
+ return info
+
def process_node(node):
ntype = node.get('type', '')
if ntype == 'CROSSHEAD':
if node.get('textHtml'):
return f'{node.get("textHtml")}
'
return f'{node.get("text", "")}
'
- elif ntype == 'PARAGRAPH':
+ elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
if node.get('textHtml'):
return f'{node.get("textHtml")}
'
return f'{node.get("text", "")}
'
@@ -35,9 +51,14 @@ def process_node(node):
return f'{node.get("text", "")}
'
elif ntype == 'DIVIDER':
return '
'
+ elif ntype == 'INFOGRAPHIC':
+ if node.get('fallback'):
+ return process_node(node['fallback'])
elif ntype == 'INFOBOX':
- for x in safe_dict(node, 'components'):
- return f'{process_node(x)}
'
+ return process_info_box(node)
+ elif ntype == 'UNORDERED_LIST':
+ if node.get('items'):
+ return process_list(node)
elif ntype:
print('** ', ntype)
return ''
@@ -57,7 +78,10 @@ class JSONHasNoContent(ValueError):
def load_article_from_json(raw):
# open('/t/raw.json', 'w').write(raw)
body = ''
- data = json.loads(raw)['props']['pageProps']['cp2Content']
+ try:
+ data = json.loads(raw)['props']['pageProps']['cp2Content']
+ except Exception:
+ data = json.loads(raw)['props']['pageProps']['content']
body += f'{data.get("flyTitle", "")}
'
body += f'{data["headline"]}
'
body += f'{data.get("rubric", "")}
'
@@ -114,7 +138,7 @@ def process_url(url):
class econ_search(BasicNewsRecipe):
title = 'The Economist - Search'
- language = 'en'
+ language = 'en_GB'
encoding = 'utf-8'
__author__ = 'unkn0wn'
description = (
diff --git a/recipes/economist_world_ahead.recipe b/recipes/economist_world_ahead.recipe
index 6d7e2336ee..85ee8f50ce 100644
--- a/recipes/economist_world_ahead.recipe
+++ b/recipes/economist_world_ahead.recipe
@@ -12,13 +12,30 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag
from calibre.web.feeds.news import BasicNewsRecipe
+def process_list(li_node):
+ li_html = ''
+ for li in li_node['items']:
+ if li.get('textHtml'):
+ li_html += f'{li.get("textHtml")}'
+ else:
+ li_html += f'{li.get("text", "")}'
+ return li_html
+
+
+def process_info_box(bx):
+ info = ''
+ for x in safe_dict(bx, 'components'):
+ info += f'{process_node(x)}
'
+ return info
+
+
def process_node(node):
ntype = node.get('type', '')
if ntype == 'CROSSHEAD':
if node.get('textHtml'):
return f'{node.get("textHtml")}
'
return f'{node.get("text", "")}
'
- elif ntype == 'PARAGRAPH':
+ elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
if node.get('textHtml'):
return f'{node.get("textHtml")}
'
return f'{node.get("text", "")}
'
@@ -35,9 +52,14 @@ def process_node(node):
return f'{node.get("text", "")}
'
elif ntype == 'DIVIDER':
return '
'
+ elif ntype == 'INFOGRAPHIC':
+ if node.get('fallback'):
+ return process_node(node['fallback'])
elif ntype == 'INFOBOX':
- for x in safe_dict(node, 'components'):
- return f'{process_node(x)}
'
+ return process_info_box(node)
+ elif ntype == 'UNORDERED_LIST':
+ if node.get('items'):
+ return process_list(node)
elif ntype:
print('** ', ntype)
return ''
@@ -57,7 +79,10 @@ class JSONHasNoContent(ValueError):
def load_article_from_json(raw):
# open('/t/raw.json', 'w').write(raw)
body = ''
- data = json.loads(raw)['props']['pageProps']['cp2Content']
+ try:
+ data = json.loads(raw)['props']['pageProps']['cp2Content']
+ except Exception:
+ data = json.loads(raw)['props']['pageProps']['content']
body += f'{data.get("flyTitle", "")}
'
body += f'{data["headline"]}
'
body += f'{data.get("rubric", "")}
'
@@ -118,7 +143,7 @@ def process_url(url):
class EconomistWorld(BasicNewsRecipe):
title = 'The Economist World Ahead'
- language = 'en'
+ language = 'en_GB'
encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
diff --git a/recipes/hindufeeds.recipe b/recipes/hindufeeds.recipe
index 73148c0ea9..1fc7e76cb0 100644
--- a/recipes/hindufeeds.recipe
+++ b/recipes/hindufeeds.recipe
@@ -21,7 +21,6 @@ class TheHindufeeds(BasicNewsRecipe):
.author, .dateLine, .publish-time {font-size:small; font-weight:bold;}
.subhead, .subhead_lead, .bold {font-weight:bold;}
.update-publish-time, .publish-time-new {font-size:small; }
- img {display:block; margin:0 auto;}
.italic {font-style:italic; color:#202020;}
'''
@@ -55,7 +54,7 @@ class TheHindufeeds(BasicNewsRecipe):
def preprocess_html(self, soup):
for cap in soup.findAll('p', attrs={'class': 'caption'}):
- cap.name = 'figcaption'
+ cap.name = 'div'
for img in soup.findAll('img', attrs={'data-original': True}):
if img['data-original'].endswith('1x1_spacer.png'):
source = img.findPrevious('source', srcset=True)
@@ -91,7 +90,7 @@ class TheHindufeeds(BasicNewsRecipe):
('Business', 'https://www.thehindu.com/business/feeder/default.rss'),
('World', 'https://www.thehindu.com/news/international/feeder/default.rss'),
# ('Sport', 'https://www.thehindu.com/sport/feeder/default.rss'),
- ('Entertainment', 'https://www.thehindu.com/entertainment/feeder/default.rss'),
+ # ('Entertainment', 'https://www.thehindu.com/entertainment/feeder/default.rss'),
# ('Crossword', 'https://crossword.thehindu.com/?utm_source=thehindu&utm_medium=mainmenufeeder/default.rss'),
('Science', 'https://www.thehindu.com/sci-tech/science/feeder/default.rss'),
('Life and Style', 'https://www.thehindu.com/life-and-style/feeder/default.rss'),
diff --git a/recipes/spectator_magazine.recipe b/recipes/spectator_magazine.recipe
index 84b29306b8..54a131a8b6 100644
--- a/recipes/spectator_magazine.recipe
+++ b/recipes/spectator_magazine.recipe
@@ -13,7 +13,7 @@ class spectator(BasicNewsRecipe):
title = 'Spectator Magazine'
__author__ = 'unkn0wn'
description = 'The Spectator was established in 1828, and is the best-written and most influential weekly in the English language.'
- language = 'en'
+ language = 'en_GB'
no_stylesheets = True
remove_attributes = ['height', 'width', 'style']
ignore_duplicate_articles = {'url'}