diff --git a/resources/recipes/peterschiff.recipe b/resources/recipes/peterschiff.recipe index 882dabc43b..842da7f733 100644 --- a/resources/recipes/peterschiff.recipe +++ b/resources/recipes/peterschiff.recipe @@ -12,15 +12,18 @@ class PeterSchiff(BasicNewsRecipe): description = 'Economic commentary' publisher = 'Euro Pacific capital' category = 'news, politics, economy, USA' - oldest_article = 15 + oldest_article = 25 max_articles_per_feed = 200 no_stylesheets = True - encoding = 'cp1252' + encoding = 'utf8' use_embedded_content = False language = 'en' - country = 'US' remove_empty_feeds = True - extra_css = ' body{font-family: Verdana,Times,serif } h1{text-align: left} img{margin-bottom: 0.4em} ' + extra_css = """ + body{font-family: Verdana,Times,serif } + .field-field-commentary-writer-name{font-weight: bold} + .field-items{display: inline} + """ conversion_options = { 'comment' : description @@ -30,7 +33,15 @@ class PeterSchiff(BasicNewsRecipe): , 'linearize_tables' : True } - keep_only_tags = [dict(name='tr',attrs={'style':'vertical-align: top;'})] + keep_only_tags = [ + dict(name='h2',attrs={'id':'page-title'}) + ,dict(name='div',attrs={'class':'node'}) + ] + remove_tags = [ + dict(name=['meta','link','base','iframe','embed']) + ,dict(attrs={'id':'text-zoom'}) + ] + remove_attributes=['track','linktype','lang'] feeds = [(u'Articles', u'http://feeds.feedburner.com/PeterSchiffsEconomicCommentary')] diff --git a/resources/recipes/rmf24_opinie.recipe b/resources/recipes/rmf24_opinie.recipe new file mode 100644 index 0000000000..4d2f447dbe --- /dev/null +++ b/resources/recipes/rmf24_opinie.recipe @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2010, Tomasz Dlugosz ' +''' +rmf24.pl +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class RMF24_opinie(BasicNewsRecipe): + title = u'Rmf24.pl - Opinie' + description = u'Blogi, wywiady i komentarze ze strony rmf24.pl' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + __author__ = u'Tomasz D\u0142ugosz' + no_stylesheets = True + remove_javascript = True + + feeds = [(u'Blogi', u'http://www.rmf24.pl/opinie/blogi/feed'), + (u'Kontrwywiad', u'http://www.rmf24.pl/opinie/wywiady/kontrwywiad/feed'), + (u'Przes\u0142uchanie', u'http://www.rmf24.pl/opinie/wywiady/przesluchanie/feed'), + (u'Komentarze', u'http://www.rmf24.pl/opinie/komentarze/feed')] + + keep_only_tags = [ + dict(name='div', attrs={'class':'box articleSingle print'}), + dict(name='div', attrs={'class':'box articleSingle print singleCommentary'}), + dict(name='div', attrs={'class':'box articleSingle print blogSingleEntry'})] + + remove_tags = [ + dict(name='div', attrs={'class':'toTop'}), + dict(name='div', attrs={'class':'category'}), + dict(name='div', attrs={'class':'REMOVE'}), + dict(name='div', attrs={'class':'embed embedAd'})] + + extra_css = ''' + h1 { font-size: 1.2em; } + ''' + + # thanks to Kovid Goyal + def get_article_url(self, article): + link = article.get('link') + if 'audio' not in link: + return link + + preprocess_regexps = [ + (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in + [ + (r'

Zdj.cie

', lambda match: ''), + (r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'), + (r'