From dffb2775f27c12dad84770c219b8970640c77e1c Mon Sep 17 00:00:00 2001 From: Tadej <37044425+tadejsinko@users.noreply.github.com> Date: Sun, 13 Feb 2022 00:08:29 +0100 Subject: [PATCH] Fixed news recipe for www.rtvslo.si The old recipe wasn't generating any content at all due to significant changes to the website. I fixed the recipe and added a few feeds that were missing. --- recipes/mmc_rtv.recipe | 58 +++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 35 deletions(-) diff --git a/recipes/mmc_rtv.recipe b/recipes/mmc_rtv.recipe index ccd8b97284..969115e1f0 100644 --- a/recipes/mmc_rtv.recipe +++ b/recipes/mmc_rtv.recipe @@ -1,58 +1,46 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, BlonG' -''' -www.rtvslo.si -''' -from calibre.web.feeds.news import BasicNewsRecipe +#!/usr/bin/env python +# News source: https://www.rtvslo.si +# License: GPLv3 +# Copyright: 2022, TadejS +from calibre.web.feeds.news import BasicNewsRecipe + class MMCRTV(BasicNewsRecipe): title = u'MMC RTV Slovenija' - __author__ = u'BlonG' + __author__ = u'TadejS' description = u"Prvi interaktivni multimedijski portal, MMC RTV Slovenija" oldest_article = 3 - max_articles_per_feed = 20 + max_articles_per_feed = 100 language = 'sl' no_stylesheets = True use_embedded_content = False + encoding = 'utf-8' - cover_url = 'https://sites.google.com/site/javno2010/home/rtv_slo_cover.jpg' - - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' - - def print_version(self, url): - split_url = url.split("/") - print_url = 'http://www.rtvslo.si/index.php?c_mod=news&op=print&id=' + \ - split_url[-1] - return print_url + cover_url = 'https://img.rtvslo.si/_static/novi/logo/tvmmc-light-bg.png' keep_only_tags = [ - dict(name='div', attrs={'class': 'title'}), - dict(name='div', attrs={'id': 'newsbody'}), - dict(name='div', attrs={'id': 'newsblocks'}), + dict(name='header', attrs={'class': 'article-header'}), + dict(name='div', attrs={'class': 'article-body'}), + ] + remove_tags=[ + dict(name='div', attrs={'class':'gallery-grid'}), + dict(name='div', attrs={'class':'exposed-article'}), + dict(name='div', attrs={'class':'d-lg-none'}), + dict(name='div', attrs={'class':'section-heading'}), ] -# remove_tags=[ -# 40 dict(name='div', attrs={'id':'newsblocks'}), -# ] feeds = [ (u'Slovenija', u'http://www.rtvslo.si/feeds/01.xml'), - (u'Svet', u'http://www.rtvslo.si/feeds/02.xml'), (u'Evropska unija', u'http://www.rtvslo.si/feeds/16.xml'), + (u'Svet', u'http://www.rtvslo.si/feeds/02.xml'), (u'Gospodarstvo', u'http://www.rtvslo.si/feeds/04.xml'), - (u'\u010crna kronika', u'http://www.rtvslo.si/feeds/08.xml'), (u'Okolje', u'http://www.rtvslo.si/feeds/12.xml'), (u'Znanost in tehnologija', u'http://www.rtvslo.si/feeds/09.xml'), + (u'Kultura', u'https://www.rtvslo.si/feeds/05.xml'), + (u'Šport', u'https://www.rtvslo.si/feeds/03.xml'), (u'Zabava', u'http://www.rtvslo.si/feeds/06.xml'), (u'Ture avanture', u'http://www.rtvslo.si/feeds/28.xml'), + (u'Črna kronika', u'http://www.rtvslo.si/feeds/08.xml'), ] - -# def preprocess_html(self, soup): -# newsblocks = soup.find('div',attrs = ['id':'newsblocks']) -# soup.find('div', attrs = {'id':'newsbody'}).insert(-1, newsblocks) -# return soup +