calibre/recipes/scroll.recipe
un-pogaz 41cee6f02d various whitespace (auto-fix)
ruff 'E201,E202,E211,E251,E275'
2025-01-24 11:14:24 +01:00

59 lines
2.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
from calibre.web.feeds.news import BasicNewsRecipe, classes
class scroll(BasicNewsRecipe):
title = 'Scroll.in'
__author__ = 'unkn0wn'
description = (
'The leading destination for original reporting on news, politics, and culture in India. '
'Our award-winning team of journalists brings readers insightful analysis and opinion on the days '
'headlines alongside a fresh mix of features on music, books, and cinema.'
)
language = 'en_IN'
masthead_url = 'https://scroll.in/static/assets/scroll-logo.0f68c78dd023e2598248ea107feba562.003.svg'
no_stylesheets = True
remove_javascript = True
ignore_duplicate_articles = {'title', 'url'}
remove_attributes = ['style', 'height', 'width']
extra_css = '''
.orange-tag, .article-meta-container { font-size:small; }
.featured-image, .cms-block-image { text-align:center; font-size:small; }
'''
keep_only_tags = [
dict(name='header'),
classes('featured-image article-body')
]
remove_tags = [classes('comments-entry-point-meta')]
def parse_index(self):
index = 'https://scroll.in/'
sections = [
'article', 'magazine'
]
feeds = []
soup = self.index_to_soup(index)
for sec in sections:
section = sec.capitalize()
self.log(section)
articles = []
for a in soup.findAll('a', attrs={'href':lambda x: x and x.startswith(index + sec + '/')}):
url = a['href'].split('?')[0]
if url in {index + sec + '/', index + sec}:
continue
title = self.tag_to_string(a)
self.log('\t', title, '\n\t\t', url)
articles.append({'title': title, 'url': url})
if articles:
feeds.append((section, articles))
return feeds
def populate_article_metadata(self, article, soup, first):
if soup.find('h2'):
article.summary = article.text_summary = self.tag_to_string(soup.find('h2'))