From a55d77bd4c0ac7f25be16b0aa5f0d19f75a9ffab Mon Sep 17 00:00:00 2001 From: Adrian Fiergolski Date: Tue, 29 Dec 2015 12:20:29 +0100 Subject: [PATCH] Fix Rzeczpospolita recipe. --- recipes/rzeczpospolita.recipe | 56 +++++++++++------------------------ 1 file changed, 18 insertions(+), 38 deletions(-) diff --git a/recipes/rzeczpospolita.recipe b/recipes/rzeczpospolita.recipe index d1453ab57e..9bdbc67b57 100644 --- a/recipes/rzeczpospolita.recipe +++ b/recipes/rzeczpospolita.recipe @@ -2,9 +2,9 @@ from calibre.web.feeds.news import BasicNewsRecipe class RzeczpospolitaRecipe(BasicNewsRecipe): __license__ = 'GPL v3' - __author__ = u'kwetal and Tomasz Dlugosz' + __author__ = u'kwetal, Tomasz Dlugosz, adrianf0' language = 'pl' - version = 1 + version = 2 title = u'Rzeczpospolita OnLine' publisher = u'Presspublica Sp.' @@ -17,38 +17,21 @@ class RzeczpospolitaRecipe(BasicNewsRecipe): remove_javascript = True encoding = 'utf-8' # Seems to work best, but YMMV - simultaneous_downloads = 1 + simultaneous_downloads = 5 feeds = [] - feeds.append(u'http://www.rp.pl/rss/2.html') - feeds.append(u'http://www.rp.pl/rss/10.html') - feeds.append(u'http://www.rp.pl/rss/11.html') - feeds.append(u'http://www.rp.pl/rss/12.html') - feeds.append(u'http://www.rp.pl/rss/4.html') - feeds.append(u'http://www.rp.pl/rss/5.html') - feeds.append(u'http://www.rp.pl/rss/6.html') - feeds.append(u'http://www.rp.pl/rss/7.html') - feeds.append(u'http://www.rp.pl/rss/8.html') + feeds.append(u'http://www.rp.pl/rss/1056') #Wydarzenia + feeds.append(u'http://www.rp.pl/rss/1004') #Ekonomia + feeds.append(u'http://www.rp.pl/rss/1037') #Prawo keep_only_tags =[] - keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'story'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article-content'})) remove_tags =[] - remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleLeftBox'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'socialNewTools'})) - remove_tags.append(dict(name = 'div', attrs = {'id' : 'socialTools'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxTop'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'})) - remove_tags.append(dict(name = 'div', attrs = {'id' : 'recommendations'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks editorPicksFirst'})) - remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightText'})) - remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightButton'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxBottom'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'addRecommendation'})) - remove_tags.append(dict(name = 'h3', attrs = {'id' : 'tags'})) - + remove_tags.append(dict(name = 'div', attrs = {'id' : 'article-copyright-box'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'article-footer'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'article-tags'})) + extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} h1{text-align: left;} @@ -58,15 +41,12 @@ class RzeczpospolitaRecipe(BasicNewsRecipe): .fot{font-size: x-small; color: #666666;} ''' - def skip_ad_pages(self, soup): - if ('advertisement' in soup.find('title').string.lower()): - href = soup.find('a').get('href') - return self.index_to_soup(href, raw=True) - else: - return None + # def skip_ad_pages(self, soup): + # if ('advertisement' in soup.find('title').string.lower()): + # href = soup.find('a').get('href') + # return self.index_to_soup(href, raw=True) + # else: + # return None def print_version(self, url): - start, sep, rest = url.rpartition('/') - forget, sep, index = rest.rpartition(',') - - return start + '/' + index + '?print=tak' + return url + '?template=printart'