mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'master' of https://github.com/unkn0w7n/calibre
This commit is contained in:
commit
3eb7f21fde
@ -79,11 +79,10 @@ class TheEconomicTimes(BasicNewsRecipe):
|
|||||||
for h3 in section.findAll(("h1", "h3", "h4", "h5")):
|
for h3 in section.findAll(("h1", "h3", "h4", "h5")):
|
||||||
span = h3.find(
|
span = h3.find(
|
||||||
'span',
|
'span',
|
||||||
href=lambda x: x and x.startswith('/epaper/'),
|
href=lambda x: x and x.startswith('https://economictimes.indiatimes.com/epaper/'),
|
||||||
attrs={'class': 'banner'}
|
attrs={'class': 'banner'}
|
||||||
)
|
)
|
||||||
url = span['href']
|
url = span['href']
|
||||||
url = 'https://economictimes.indiatimes.com' + url
|
|
||||||
title = self.tag_to_string(span)
|
title = self.tag_to_string(span)
|
||||||
div = h3.find_next_sibling('div', attrs={'class': 'dsc'})
|
div = h3.find_next_sibling('div', attrs={'class': 'dsc'})
|
||||||
if div is not None:
|
if div is not None:
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import json
|
import json
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
||||||
# default edition is Delhi i.e., 'cap'
|
# default edition is Delhi i.e., 'cap'
|
||||||
@ -54,33 +55,32 @@ class toiprint(BasicNewsRecipe):
|
|||||||
url = index + '/DayIndex/' + date_ + '_' + le + '.json'
|
url = index + '/DayIndex/' + date_ + '_' + le + '.json'
|
||||||
raw = self.index_to_soup(url, raw=True)
|
raw = self.index_to_soup(url, raw=True)
|
||||||
data = json.loads(raw)
|
data = json.loads(raw)
|
||||||
if 'DigitalIndex' not in data:
|
if 'DayIndex' not in data:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
'The Times of India Newspaper is not published today.'
|
'The Times of India Newspaper is not published today.'
|
||||||
)
|
)
|
||||||
data = data['DigitalIndex']
|
data = data['DayIndex']
|
||||||
feeds = []
|
feeds_dict = defaultdict(list)
|
||||||
for link in data:
|
for link in data:
|
||||||
sec_name = link['PageTitle']
|
sec_name = link['PageTitle']
|
||||||
|
if sec_name == 'Advertisement':
|
||||||
|
continue
|
||||||
self.log(sec_name)
|
self.log(sec_name)
|
||||||
articles = []
|
articles = []
|
||||||
if 'Views' in link:
|
if 'Articles' in link:
|
||||||
for sec in link['Views']:
|
for art in link['Articles']:
|
||||||
if 'Articles' in sec:
|
section = sec_name
|
||||||
for art in sec['Articles']:
|
if 'ArticleName' not in art:
|
||||||
if 'ArticleName' not in art:
|
continue
|
||||||
continue
|
url = art['ArticleName']
|
||||||
url = art['ArticleName']
|
title = art.get('ArticleTitle', 'unknown').replace('<br>', '')
|
||||||
title = art.get('ArticleTitle', 'unknown').replace('<br>', '')
|
if art.get('ColumnTitle', '') == '':
|
||||||
if art.get('ColumnTitle', '') == '':
|
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
|
||||||
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
|
else:
|
||||||
else:
|
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '')
|
||||||
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '')
|
self.log('\t', title, '\n\t', desc.replace('\n', ''))
|
||||||
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
|
feeds_dict[section].append({"title": title, "url": url, "description": desc})
|
||||||
articles.append({'title': title, 'description':desc, 'url': url})
|
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||||
if articles:
|
|
||||||
feeds.append((sec_name, articles))
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw, *a):
|
def preprocess_raw_html(self, raw, *a):
|
||||||
data = json.loads(raw)
|
data = json.loads(raw)
|
||||||
@ -107,8 +107,11 @@ class toiprint(BasicNewsRecipe):
|
|||||||
elif 'ZoneText' in x:
|
elif 'ZoneText' in x:
|
||||||
body += '<p><i>' + x['ZoneText'] + '</i></p>'
|
body += '<p><i>' + x['ZoneText'] + '</i></p>'
|
||||||
return '<html><body><div>' \
|
return '<html><body><div>' \
|
||||||
+ body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('<br>', '<p>').replace('\n', '<div>') \
|
+ body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('<br>', '<p>').replace('\n', '<br>') \
|
||||||
+ '</div></body></html>'
|
+ '</div></body></html>'
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return index + '/ArticleZoneJson/' + url.split('_')[-3] + '/' + url + '.json'
|
return index + '/ArticleZoneJson/' + url.split('_')[-3] + '/' + url + '.json'
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
article.url = '***'
|
||||||
|
@ -35,6 +35,7 @@ class WSJ(BasicNewsRecipe):
|
|||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'News and current affairs'
|
description = 'News and current affairs'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
masthead_url = 'https://s.wsj.net/media/wsj_amp_masthead_lg.png'
|
||||||
|
|
||||||
compress_news_images = True
|
compress_news_images = True
|
||||||
compress_news_images_auto_size = 7
|
compress_news_images_auto_size = 7
|
||||||
|
@ -35,6 +35,7 @@ class WSJ(BasicNewsRecipe):
|
|||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'News and current affairs'
|
description = 'News and current affairs'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
masthead_url = 'https://s.wsj.net/media/wsj_amp_masthead_lg.png'
|
||||||
|
|
||||||
compress_news_images = True
|
compress_news_images = True
|
||||||
compress_news_images_auto_size = 7
|
compress_news_images_auto_size = 7
|
||||||
|
Loading…
x
Reference in New Issue
Block a user