This commit is contained in:
Kovid Goyal 2023-11-08 11:56:41 +05:30
commit 3eb7f21fde
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 27 additions and 23 deletions

View File

@ -79,11 +79,10 @@ class TheEconomicTimes(BasicNewsRecipe):
for h3 in section.findAll(("h1", "h3", "h4", "h5")): for h3 in section.findAll(("h1", "h3", "h4", "h5")):
span = h3.find( span = h3.find(
'span', 'span',
href=lambda x: x and x.startswith('/epaper/'), href=lambda x: x and x.startswith('https://economictimes.indiatimes.com/epaper/'),
attrs={'class': 'banner'} attrs={'class': 'banner'}
) )
url = span['href'] url = span['href']
url = 'https://economictimes.indiatimes.com' + url
title = self.tag_to_string(span) title = self.tag_to_string(span)
div = h3.find_next_sibling('div', attrs={'class': 'dsc'}) div = h3.find_next_sibling('div', attrs={'class': 'dsc'})
if div is not None: if div is not None:

View File

@ -1,6 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import json import json
from datetime import date from datetime import date
from collections import defaultdict
# default edition is Delhi i.e., 'cap' # default edition is Delhi i.e., 'cap'
@ -54,33 +55,32 @@ class toiprint(BasicNewsRecipe):
url = index + '/DayIndex/' + date_ + '_' + le + '.json' url = index + '/DayIndex/' + date_ + '_' + le + '.json'
raw = self.index_to_soup(url, raw=True) raw = self.index_to_soup(url, raw=True)
data = json.loads(raw) data = json.loads(raw)
if 'DigitalIndex' not in data: if 'DayIndex' not in data:
raise ValueError( raise ValueError(
'The Times of India Newspaper is not published today.' 'The Times of India Newspaper is not published today.'
) )
data = data['DigitalIndex'] data = data['DayIndex']
feeds = [] feeds_dict = defaultdict(list)
for link in data: for link in data:
sec_name = link['PageTitle'] sec_name = link['PageTitle']
if sec_name == 'Advertisement':
continue
self.log(sec_name) self.log(sec_name)
articles = [] articles = []
if 'Views' in link: if 'Articles' in link:
for sec in link['Views']: for art in link['Articles']:
if 'Articles' in sec: section = sec_name
for art in sec['Articles']: if 'ArticleName' not in art:
if 'ArticleName' not in art: continue
continue url = art['ArticleName']
url = art['ArticleName'] title = art.get('ArticleTitle', 'unknown').replace('<br>', '')
title = art.get('ArticleTitle', 'unknown').replace('<br>', '') if art.get('ColumnTitle', '') == '':
if art.get('ColumnTitle', '') == '': desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '') else:
else: desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '')
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '') self.log('\t', title, '\n\t', desc.replace('\n', ''))
self.log('\t', title, '\n\t', desc, '\n\t\t', url) feeds_dict[section].append({"title": title, "url": url, "description": desc})
articles.append({'title': title, 'description':desc, 'url': url}) return [(section, articles) for section, articles in feeds_dict.items()]
if articles:
feeds.append((sec_name, articles))
return feeds
def preprocess_raw_html(self, raw, *a): def preprocess_raw_html(self, raw, *a):
data = json.loads(raw) data = json.loads(raw)
@ -107,8 +107,11 @@ class toiprint(BasicNewsRecipe):
elif 'ZoneText' in x: elif 'ZoneText' in x:
body += '<p><i>' + x['ZoneText'] + '</i></p>' body += '<p><i>' + x['ZoneText'] + '</i></p>'
return '<html><body><div>' \ return '<html><body><div>' \
+ body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('&lt;br&gt;', '<p>').replace('\n', '<div>') \ + body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('&lt;br&gt;', '<p>').replace('\n', '<br>') \
+ '</div></body></html>' + '</div></body></html>'
def print_version(self, url): def print_version(self, url):
return index + '/ArticleZoneJson/' + url.split('_')[-3] + '/' + url + '.json' return index + '/ArticleZoneJson/' + url.split('_')[-3] + '/' + url + '.json'
def populate_article_metadata(self, article, soup, first):
article.url = '***'

View File

@ -35,6 +35,7 @@ class WSJ(BasicNewsRecipe):
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
description = 'News and current affairs' description = 'News and current affairs'
language = 'en' language = 'en'
masthead_url = 'https://s.wsj.net/media/wsj_amp_masthead_lg.png'
compress_news_images = True compress_news_images = True
compress_news_images_auto_size = 7 compress_news_images_auto_size = 7

View File

@ -35,6 +35,7 @@ class WSJ(BasicNewsRecipe):
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
description = 'News and current affairs' description = 'News and current affairs'
language = 'en' language = 'en'
masthead_url = 'https://s.wsj.net/media/wsj_amp_masthead_lg.png'
compress_news_images = True compress_news_images = True
compress_news_images_auto_size = 7 compress_news_images_auto_size = 7