mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'master' of https://github.com/unkn0w7n/calibre
This commit is contained in:
commit
3eb7f21fde
@ -79,11 +79,10 @@ class TheEconomicTimes(BasicNewsRecipe):
|
||||
for h3 in section.findAll(("h1", "h3", "h4", "h5")):
|
||||
span = h3.find(
|
||||
'span',
|
||||
href=lambda x: x and x.startswith('/epaper/'),
|
||||
href=lambda x: x and x.startswith('https://economictimes.indiatimes.com/epaper/'),
|
||||
attrs={'class': 'banner'}
|
||||
)
|
||||
url = span['href']
|
||||
url = 'https://economictimes.indiatimes.com' + url
|
||||
title = self.tag_to_string(span)
|
||||
div = h3.find_next_sibling('div', attrs={'class': 'dsc'})
|
||||
if div is not None:
|
||||
|
@ -1,6 +1,7 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import json
|
||||
from datetime import date
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
# default edition is Delhi i.e., 'cap'
|
||||
@ -54,33 +55,32 @@ class toiprint(BasicNewsRecipe):
|
||||
url = index + '/DayIndex/' + date_ + '_' + le + '.json'
|
||||
raw = self.index_to_soup(url, raw=True)
|
||||
data = json.loads(raw)
|
||||
if 'DigitalIndex' not in data:
|
||||
if 'DayIndex' not in data:
|
||||
raise ValueError(
|
||||
'The Times of India Newspaper is not published today.'
|
||||
)
|
||||
data = data['DigitalIndex']
|
||||
feeds = []
|
||||
data = data['DayIndex']
|
||||
feeds_dict = defaultdict(list)
|
||||
for link in data:
|
||||
sec_name = link['PageTitle']
|
||||
if sec_name == 'Advertisement':
|
||||
continue
|
||||
self.log(sec_name)
|
||||
articles = []
|
||||
if 'Views' in link:
|
||||
for sec in link['Views']:
|
||||
if 'Articles' in sec:
|
||||
for art in sec['Articles']:
|
||||
if 'ArticleName' not in art:
|
||||
continue
|
||||
url = art['ArticleName']
|
||||
title = art.get('ArticleTitle', 'unknown').replace('<br>', '')
|
||||
if art.get('ColumnTitle', '') == '':
|
||||
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
|
||||
else:
|
||||
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '')
|
||||
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
|
||||
articles.append({'title': title, 'description':desc, 'url': url})
|
||||
if articles:
|
||||
feeds.append((sec_name, articles))
|
||||
return feeds
|
||||
if 'Articles' in link:
|
||||
for art in link['Articles']:
|
||||
section = sec_name
|
||||
if 'ArticleName' not in art:
|
||||
continue
|
||||
url = art['ArticleName']
|
||||
title = art.get('ArticleTitle', 'unknown').replace('<br>', '')
|
||||
if art.get('ColumnTitle', '') == '':
|
||||
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
|
||||
else:
|
||||
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '')
|
||||
self.log('\t', title, '\n\t', desc.replace('\n', ''))
|
||||
feeds_dict[section].append({"title": title, "url": url, "description": desc})
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
|
||||
def preprocess_raw_html(self, raw, *a):
|
||||
data = json.loads(raw)
|
||||
@ -107,8 +107,11 @@ class toiprint(BasicNewsRecipe):
|
||||
elif 'ZoneText' in x:
|
||||
body += '<p><i>' + x['ZoneText'] + '</i></p>'
|
||||
return '<html><body><div>' \
|
||||
+ body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('<br>', '<p>').replace('\n', '<div>') \
|
||||
+ body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('<br>', '<p>').replace('\n', '<br>') \
|
||||
+ '</div></body></html>'
|
||||
|
||||
def print_version(self, url):
|
||||
return index + '/ArticleZoneJson/' + url.split('_')[-3] + '/' + url + '.json'
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
article.url = '***'
|
||||
|
@ -35,6 +35,7 @@ class WSJ(BasicNewsRecipe):
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'News and current affairs'
|
||||
language = 'en'
|
||||
masthead_url = 'https://s.wsj.net/media/wsj_amp_masthead_lg.png'
|
||||
|
||||
compress_news_images = True
|
||||
compress_news_images_auto_size = 7
|
||||
|
@ -35,6 +35,7 @@ class WSJ(BasicNewsRecipe):
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'News and current affairs'
|
||||
language = 'en'
|
||||
masthead_url = 'https://s.wsj.net/media/wsj_amp_masthead_lg.png'
|
||||
|
||||
compress_news_images = True
|
||||
compress_news_images_auto_size = 7
|
||||
|
Loading…
x
Reference in New Issue
Block a user