mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
59 lines
2.1 KiB
Python
59 lines
2.1 KiB
Python
#!/usr/bin/env python
|
||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||
|
||
|
||
class scroll(BasicNewsRecipe):
|
||
title = 'Scroll.in'
|
||
__author__ = 'unkn0wn'
|
||
description = (
|
||
'The leading destination for original reporting on news, politics, and culture in India. '
|
||
'Our award-winning team of journalists brings readers insightful analysis and opinion on the day’s '
|
||
'headlines alongside a fresh mix of features on music, books, and cinema.'
|
||
)
|
||
language = 'en_IN'
|
||
masthead_url = 'https://scroll.in/static/assets/scroll-logo.0f68c78dd023e2598248ea107feba562.003.svg'
|
||
|
||
no_stylesheets = True
|
||
remove_javascript = True
|
||
|
||
ignore_duplicate_articles = {'title', 'url'}
|
||
remove_attributes = ['style', 'height', 'width']
|
||
|
||
extra_css = '''
|
||
.orange-tag, .article-meta-container { font-size:small; }
|
||
.featured-image, .cms-block-image { text-align:center; font-size:small; }
|
||
'''
|
||
|
||
keep_only_tags = [
|
||
dict(name='header'),
|
||
classes('featured-image article-body')
|
||
]
|
||
|
||
remove_tags = [classes('comments-entry-point-meta')]
|
||
|
||
def parse_index(self):
|
||
index = 'https://scroll.in/'
|
||
sections = [
|
||
'article', 'magazine'
|
||
]
|
||
feeds = []
|
||
soup = self.index_to_soup(index)
|
||
for sec in sections:
|
||
section = sec.capitalize()
|
||
self.log(section)
|
||
articles = []
|
||
for a in soup.findAll('a', attrs={'href':lambda x: x and x.startswith(index + sec + '/')}):
|
||
url = a['href'].split('?')[0]
|
||
if url in {index + sec + '/', index + sec}:
|
||
continue
|
||
title = self.tag_to_string(a)
|
||
self.log('\t', title, '\n\t\t', url)
|
||
articles.append({'title': title, 'url': url})
|
||
if articles:
|
||
feeds.append((section, articles))
|
||
return feeds
|
||
|
||
def populate_article_metadata(self, article, soup, first):
|
||
if soup.find('h2'):
|
||
article.summary = article.text_summary = self.tag_to_string(soup.find('h2'))
|