From 4e9bc479c3b9bbae0071e84a225d64e448e01303 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 11 Sep 2021 09:47:06 +0530 Subject: [PATCH] =?UTF-8?q?Update=20=D0=90=D1=80=D0=B3=D1=83=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D1=82=D1=8B=20=D0=B8=20=D0=A4=D0=B0=D0=BA=D1=82=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- recipes/aif_ru.recipe | 70 ++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/recipes/aif_ru.recipe b/recipes/aif_ru.recipe index c742e5f838..086a9e04c9 100644 --- a/recipes/aif_ru.recipe +++ b/recipes/aif_ru.recipe @@ -1,37 +1,45 @@ -__license__ = 'GPL v3' -__copyright__ = '2010 - 2014, Darko Miletic ' -''' -www.aif.ru -''' - +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import with_statement, unicode_literals from calibre.web.feeds.news import BasicNewsRecipe -class AIF_ru(BasicNewsRecipe): - title = 'Arguments & Facts - Russian' - __author__ = 'Darko Miletic' - description = 'News from Russia' - publisher = 'AIF' - category = 'news, politics, Russia' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False +class AdvancedUserRecipe1592177429(BasicNewsRecipe): + title = 'Аргументы и Факты' encoding = 'utf8' language = 'ru' - publication_type = 'magazine' - masthead_url = 'http://static3.aif.ru/glossy/index/i/logo.png' - extra_css = """ - body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif} - img{display: block} - """ - keep_only_tags = [ - dict(name='h1', attrs={'class': 'title'}), dict(name='div', attrs={'class': 'prew_tags'}), dict( - name='article', attrs={'class': lambda x: x and 'articl_body' in x.split()}) - ] - remove_tags = [ - dict(name=['iframe', 'object', 'link', 'base', 'input', 'meta']), dict(name='div', attrs={'class': 'in-topic'}), dict(name='div', attrs={ - 'class': lambda x: x and 'related_article' in x.split()}), dict(name='div', attrs={'class': lambda x: x and 'articl_tag' in x.split()}) - ] + oldest_article = 7 + max_articles_per_feed = 25 + verbose = 3 - feeds = [(u'News', u'http://www.aif.ru/rss/all.php')] + feeds = [ + ('AIF', 'https://www.aif.ru/rss/all.php'), + ] + INDEX = 'https://www.aif.ru/rss/all.php' + + def parse_index(self): + feeds = [] + section_title = 'aif' + articles = [] + soup = self.index_to_soup(self.INDEX) + ii = 0 + for item in soup.findAll('item'): + if ii < self.max_articles_per_feed: + try: + ii = ii + 1 + A = str(item) + i = A.find(u'link') + j = A.find(u'description') + ZZ = item.find('description') + ZZ1 = str(ZZ) # bs4.element.Tag to str + ZZ2 = ZZ1[24:-19] + AU = A[i:j] + try: + articles.append({'url':AU[6:-2], 'title':ZZ2}) + except Exception: + pass + except Exception: + self.log("Exception handled!") + if articles: + feeds.append((section_title, articles)) + return feeds