Update Аргументы и Факты

This commit is contained in:
Kovid Goyal 2021-09-11 09:47:06 +05:30
parent 9a6d284cf4
commit 4e9bc479c3
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,37 +1,45 @@
__license__ = 'GPL v3' #!/usr/bin/env python
__copyright__ = '2010 - 2014, Darko Miletic <darko.miletic at gmail.com>' # vim:fileencoding=utf-8
''' from __future__ import with_statement, unicode_literals
www.aif.ru
'''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AIF_ru(BasicNewsRecipe): class AdvancedUserRecipe1592177429(BasicNewsRecipe):
title = 'Arguments & Facts - Russian' title = 'Аргументы и Факты'
__author__ = 'Darko Miletic'
description = 'News from Russia'
publisher = 'AIF'
category = 'news, politics, Russia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8' encoding = 'utf8'
language = 'ru' language = 'ru'
publication_type = 'magazine' oldest_article = 7
masthead_url = 'http://static3.aif.ru/glossy/index/i/logo.png' max_articles_per_feed = 25
extra_css = """ verbose = 3
body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif}
img{display: block}
"""
keep_only_tags = [
dict(name='h1', attrs={'class': 'title'}), dict(name='div', attrs={'class': 'prew_tags'}), dict(
name='article', attrs={'class': lambda x: x and 'articl_body' in x.split()})
]
remove_tags = [
dict(name=['iframe', 'object', 'link', 'base', 'input', 'meta']), dict(name='div', attrs={'class': 'in-topic'}), dict(name='div', attrs={
'class': lambda x: x and 'related_article' in x.split()}), dict(name='div', attrs={'class': lambda x: x and 'articl_tag' in x.split()})
]
feeds = [(u'News', u'http://www.aif.ru/rss/all.php')] feeds = [
('AIF', 'https://www.aif.ru/rss/all.php'),
]
INDEX = 'https://www.aif.ru/rss/all.php'
def parse_index(self):
feeds = []
section_title = 'aif'
articles = []
soup = self.index_to_soup(self.INDEX)
ii = 0
for item in soup.findAll('item'):
if ii < self.max_articles_per_feed:
try:
ii = ii + 1
A = str(item)
i = A.find(u'link')
j = A.find(u'description')
ZZ = item.find('description')
ZZ1 = str(ZZ) # bs4.element.Tag to str
ZZ2 = ZZ1[24:-19]
AU = A[i:j]
try:
articles.append({'url':AU[6:-2], 'title':ZZ2})
except Exception:
pass
except Exception:
self.log("Exception handled!")
if articles:
feeds.append((section_title, articles))
return feeds