From 66d958241060b59ed95088c60c2bfb1bac474a24 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 29 Sep 2010 14:57:26 -0600 Subject: [PATCH 1/2] Rmf24 - Opinie by Tomasz Dlugosz --- resources/recipes/rmf24_opinie.recipe | 55 +++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 resources/recipes/rmf24_opinie.recipe diff --git a/resources/recipes/rmf24_opinie.recipe b/resources/recipes/rmf24_opinie.recipe new file mode 100644 index 0000000000..4d2f447dbe --- /dev/null +++ b/resources/recipes/rmf24_opinie.recipe @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2010, Tomasz Dlugosz ' +''' +rmf24.pl +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class RMF24_opinie(BasicNewsRecipe): + title = u'Rmf24.pl - Opinie' + description = u'Blogi, wywiady i komentarze ze strony rmf24.pl' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + __author__ = u'Tomasz D\u0142ugosz' + no_stylesheets = True + remove_javascript = True + + feeds = [(u'Blogi', u'http://www.rmf24.pl/opinie/blogi/feed'), + (u'Kontrwywiad', u'http://www.rmf24.pl/opinie/wywiady/kontrwywiad/feed'), + (u'Przes\u0142uchanie', u'http://www.rmf24.pl/opinie/wywiady/przesluchanie/feed'), + (u'Komentarze', u'http://www.rmf24.pl/opinie/komentarze/feed')] + + keep_only_tags = [ + dict(name='div', attrs={'class':'box articleSingle print'}), + dict(name='div', attrs={'class':'box articleSingle print singleCommentary'}), + dict(name='div', attrs={'class':'box articleSingle print blogSingleEntry'})] + + remove_tags = [ + dict(name='div', attrs={'class':'toTop'}), + dict(name='div', attrs={'class':'category'}), + dict(name='div', attrs={'class':'REMOVE'}), + dict(name='div', attrs={'class':'embed embedAd'})] + + extra_css = ''' + h1 { font-size: 1.2em; } + ''' + + # thanks to Kovid Goyal + def get_article_url(self, article): + link = article.get('link') + if 'audio' not in link: + return link + + preprocess_regexps = [ + (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in + [ + (r'

Zdj.cie

', lambda match: ''), + (r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'), + (r'