diff --git a/recipes/aif_ru.recipe b/recipes/aif_ru.recipe index c742e5f838..086a9e04c9 100644 --- a/recipes/aif_ru.recipe +++ b/recipes/aif_ru.recipe @@ -1,37 +1,45 @@ -__license__ = 'GPL v3' -__copyright__ = '2010 - 2014, Darko Miletic ' -''' -www.aif.ru -''' - +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import with_statement, unicode_literals from calibre.web.feeds.news import BasicNewsRecipe -class AIF_ru(BasicNewsRecipe): - title = 'Arguments & Facts - Russian' - __author__ = 'Darko Miletic' - description = 'News from Russia' - publisher = 'AIF' - category = 'news, politics, Russia' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False +class AdvancedUserRecipe1592177429(BasicNewsRecipe): + title = 'Аргументы и Факты' encoding = 'utf8' language = 'ru' - publication_type = 'magazine' - masthead_url = 'http://static3.aif.ru/glossy/index/i/logo.png' - extra_css = """ - body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif} - img{display: block} - """ - keep_only_tags = [ - dict(name='h1', attrs={'class': 'title'}), dict(name='div', attrs={'class': 'prew_tags'}), dict( - name='article', attrs={'class': lambda x: x and 'articl_body' in x.split()}) - ] - remove_tags = [ - dict(name=['iframe', 'object', 'link', 'base', 'input', 'meta']), dict(name='div', attrs={'class': 'in-topic'}), dict(name='div', attrs={ - 'class': lambda x: x and 'related_article' in x.split()}), dict(name='div', attrs={'class': lambda x: x and 'articl_tag' in x.split()}) - ] + oldest_article = 7 + max_articles_per_feed = 25 + verbose = 3 - feeds = [(u'News', u'http://www.aif.ru/rss/all.php')] + feeds = [ + ('AIF', 'https://www.aif.ru/rss/all.php'), + ] + INDEX = 'https://www.aif.ru/rss/all.php' + + def parse_index(self): + feeds = [] + section_title = 'aif' + articles = [] + soup = self.index_to_soup(self.INDEX) + ii = 0 + for item in soup.findAll('item'): + if ii < self.max_articles_per_feed: + try: + ii = ii + 1 + A = str(item) + i = A.find(u'link') + j = A.find(u'description') + ZZ = item.find('description') + ZZ1 = str(ZZ) # bs4.element.Tag to str + ZZ2 = ZZ1[24:-19] + AU = A[i:j] + try: + articles.append({'url':AU[6:-2], 'title':ZZ2}) + except Exception: + pass + except Exception: + self.log("Exception handled!") + if articles: + feeds.append((section_title, articles)) + return feeds