__license__ = 'GPL v3' __copyright__ = '2010, BlonG' ''' www.rtvslo.si ''' from calibre.web.feeds.news import BasicNewsRecipe class MMCRTV(BasicNewsRecipe): title = u'MMC RTV Slovenija' __author__ = u'BlonG' description = u"Prvi interaktivni multimedijski portal, MMC RTV Slovenija" oldest_article = 3 max_articles_per_feed = 20 language = 'sl' no_stylesheets = True use_embedded_content = False cover_url = 'https://sites.google.com/site/javno2010/home/rtv_slo_cover.jpg' extra_css = ''' h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} p{font-family:Arial,Helvetica,sans-serif;font-size:small;} body{font-family:Helvetica,Arial,sans-serif;font-size:small;} ''' def print_version(self, url): split_url = url.split("/") print_url = 'http://www.rtvslo.si/index.php?c_mod=news&op=print&id=' + \ split_url[-1] return print_url keep_only_tags = [ dict(name='div', attrs={'class': 'title'}), dict(name='div', attrs={'id': 'newsbody'}), dict(name='div', attrs={'id': 'newsblocks'}), ] # remove_tags=[ # 40 dict(name='div', attrs={'id':'newsblocks'}), # ] feeds = [ (u'Slovenija', u'http://www.rtvslo.si/feeds/01.xml'), (u'Svet', u'http://www.rtvslo.si/feeds/02.xml'), (u'Evropska unija', u'http://www.rtvslo.si/feeds/16.xml'), (u'Gospodarstvo', u'http://www.rtvslo.si/feeds/04.xml'), (u'\u010crna kronika', u'http://www.rtvslo.si/feeds/08.xml'), (u'Okolje', u'http://www.rtvslo.si/feeds/12.xml'), (u'Znanost in tehnologija', u'http://www.rtvslo.si/feeds/09.xml'), (u'Zabava', u'http://www.rtvslo.si/feeds/06.xml'), (u'Ture avanture', u'http://www.rtvslo.si/feeds/28.xml'), ] # def preprocess_html(self, soup): # newsblocks = soup.find('div',attrs = ['id':'newsblocks']) # soup.find('div', attrs = {'id':'newsbody'}).insert(-1, newsblocks) # return soup