calibre/recipes/revista22.recipe
un-pogaz 41cee6f02d various whitespace (auto-fix)
ruff 'E201,E202,E211,E251,E275'
2025-01-24 11:14:24 +01:00

76 lines
2.4 KiB
Python

#!/usr/bin/env python
from calibre.web.feeds.recipes import BasicNewsRecipe
class Volkskrant(BasicNewsRecipe):
title = 'Revista 22'
__author__ = 'Cristi Ghera'
max_articles_per_feed = 100
description = 'Revista 22'
needs_subscription = False
language = 'ro'
country = 'RO'
category = 'news, politics, Romania'
resolve_internal_links = True
remove_tags_before = {'class': 'col-span-8'}
remove_tags_after = {'class': 'col-span-8'}
remove_tags = [
dict(
attrs={
'class': [
'icons',
'float-left',
'samesection',
]
}
),
dict(
name=['div'],
attrs={
'class': ['mb-2']
}
),
dict(id=['comments']),
dict(name=['script', 'noscript', 'style']),
]
remove_attributes = ['class', 'id', 'name', 'style']
encoding = 'utf-8'
no_stylesheets = True
ignore_duplicate_articles = {'url'}
def parse_index(self):
soup = self.index_to_soup('https://revista22.ro')
url = soup.find('div', attrs={'class': 'uppercase'}).find('a').attrs['href']
if url[0] == '/':
url = 'https://revista22.ro' + url
soup = self.index_to_soup(url)
main_container = soup.find('div', attrs={'class': 'col-span-8'})
containers = main_container.findAll(attrs={'class': 'mb-4'})
articles = []
for container in containers:
if 'pb-4' not in container.attrs['class']:
continue
a = container.find('a')
url = a['href']
if url[0] == '/':
url = 'https://revista22.ro' + url
article_title = self.tag_to_string(a.find('h3')).strip()
author = self.tag_to_string(
container.find('span', attrs={'class': 'text-red'})
).strip()
summary = self.tag_to_string(container.find('p')).strip()
pubdate = self.tag_to_string(a.find('span'))
description = author + ' - ' + summary
articles.append(
dict(
title=article_title,
url=url,
date=pubdate,
description=description,
content=''
)
)
sections = [('Numărul curent', articles)]
return sections