diff --git a/recipes/theeconomictimes_india_print_edition.recipe b/recipes/theeconomictimes_india_print_edition.recipe
index c610386a3e..293eb63e0d 100644
--- a/recipes/theeconomictimes_india_print_edition.recipe
+++ b/recipes/theeconomictimes_india_print_edition.recipe
@@ -79,11 +79,10 @@ class TheEconomicTimes(BasicNewsRecipe):
for h3 in section.findAll(("h1", "h3", "h4", "h5")):
span = h3.find(
'span',
- href=lambda x: x and x.startswith('/epaper/'),
+ href=lambda x: x and x.startswith('https://economictimes.indiatimes.com/epaper/'),
attrs={'class': 'banner'}
)
url = span['href']
- url = 'https://economictimes.indiatimes.com' + url
title = self.tag_to_string(span)
div = h3.find_next_sibling('div', attrs={'class': 'dsc'})
if div is not None:
diff --git a/recipes/toiprint.recipe b/recipes/toiprint.recipe
index b2a961cf82..9f3127b14e 100644
--- a/recipes/toiprint.recipe
+++ b/recipes/toiprint.recipe
@@ -1,6 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe
import json
from datetime import date
+from collections import defaultdict
# default edition is Delhi i.e., 'cap'
@@ -54,33 +55,32 @@ class toiprint(BasicNewsRecipe):
url = index + '/DayIndex/' + date_ + '_' + le + '.json'
raw = self.index_to_soup(url, raw=True)
data = json.loads(raw)
- if 'DigitalIndex' not in data:
+ if 'DayIndex' not in data:
raise ValueError(
'The Times of India Newspaper is not published today.'
)
- data = data['DigitalIndex']
- feeds = []
+ data = data['DayIndex']
+ feeds_dict = defaultdict(list)
for link in data:
sec_name = link['PageTitle']
+ if sec_name == 'Advertisement':
+ continue
self.log(sec_name)
articles = []
- if 'Views' in link:
- for sec in link['Views']:
- if 'Articles' in sec:
- for art in sec['Articles']:
- if 'ArticleName' not in art:
- continue
- url = art['ArticleName']
- title = art.get('ArticleTitle', 'unknown').replace('
', '')
- if art.get('ColumnTitle', '') == '':
- desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
- else:
- desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '')
- self.log('\t', title, '\n\t', desc, '\n\t\t', url)
- articles.append({'title': title, 'description':desc, 'url': url})
- if articles:
- feeds.append((sec_name, articles))
- return feeds
+ if 'Articles' in link:
+ for art in link['Articles']:
+ section = sec_name
+ if 'ArticleName' not in art:
+ continue
+ url = art['ArticleName']
+ title = art.get('ArticleTitle', 'unknown').replace('
', '')
+ if art.get('ColumnTitle', '') == '':
+ desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
+ else:
+ desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '')
+ self.log('\t', title, '\n\t', desc.replace('\n', ''))
+ feeds_dict[section].append({"title": title, "url": url, "description": desc})
+ return [(section, articles) for section, articles in feeds_dict.items()]
def preprocess_raw_html(self, raw, *a):
data = json.loads(raw)
@@ -107,8 +107,11 @@ class toiprint(BasicNewsRecipe):
elif 'ZoneText' in x:
body += '
' + x['ZoneText'] + '
' return '').replace('
', '
').replace('<br>', '
').replace('\n', '
').replace('
', '
').replace('<br>', '
').replace('\n', '
') \
+ '