#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = u'2010, Tomasz Dlugosz ' ''' rmf24.pl ''' import re from calibre.web.feeds.news import BasicNewsRecipe class RMF24_opinie(BasicNewsRecipe): title = u'Rmf24.pl - Opinie' description = u'Blogi, wywiady i komentarze ze strony rmf24.pl' language = 'pl' oldest_article = 7 max_articles_per_feed = 100 __author__ = u'Tomasz D\u0142ugosz' no_stylesheets = True remove_javascript = True remove_empty_feeds = True feeds = [(u'Blogi', u'http://www.rmf24.pl/opinie/blogi/feed'), (u'Kontrwywiad', u'http://www.rmf24.pl/opinie/wywiady/kontrwywiad/feed'), (u'Przes\u0142uchanie', u'http://www.rmf24.pl/opinie/wywiady/przesluchanie/feed'), (u'Komentarze', u'http://www.rmf24.pl/opinie/komentarze/feed')] keep_only_tags = [ dict(name='header', attrs={'class': 'article-header'}), dict(name='div', attrs={'class': 'article-container'})] remove_tags = [dict(name='div', attrs={'id': 'ReklamaMobile'})] extra_css = ''' h1 { font-size: 1.2em; } ''' # thanks to Kovid Goyal def get_article_url(self, article): link = article.get('link') if '/audio,aId' not in link: return link preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [ (r'

Zdj.cie

', lambda match: ''), (r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'), # noqa (r'