Update Rzeczpospolita

Merge branch 'master' of https://github.com/adrianf0/calibre
This commit is contained in:
Kovid Goyal 2015-12-31 10:00:44 +05:30
commit 0b83786cc6

View File

@ -2,9 +2,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
class RzeczpospolitaRecipe(BasicNewsRecipe): class RzeczpospolitaRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = u'kwetal and Tomasz Dlugosz' __author__ = u'kwetal, Tomasz Dlugosz, adrianf0'
language = 'pl' language = 'pl'
version = 1 version = 2
title = u'Rzeczpospolita OnLine' title = u'Rzeczpospolita OnLine'
publisher = u'Presspublica Sp.' publisher = u'Presspublica Sp.'
@ -17,37 +17,20 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
encoding = 'utf-8' encoding = 'utf-8'
# Seems to work best, but YMMV # Seems to work best, but YMMV
simultaneous_downloads = 1 simultaneous_downloads = 5
feeds = [] feeds = []
feeds.append(u'http://www.rp.pl/rss/2.html') feeds.append(u'http://www.rp.pl/rss/1056') #Wydarzenia
feeds.append(u'http://www.rp.pl/rss/10.html') feeds.append(u'http://www.rp.pl/rss/1004') #Ekonomia
feeds.append(u'http://www.rp.pl/rss/11.html') feeds.append(u'http://www.rp.pl/rss/1037') #Prawo
feeds.append(u'http://www.rp.pl/rss/12.html')
feeds.append(u'http://www.rp.pl/rss/4.html')
feeds.append(u'http://www.rp.pl/rss/5.html')
feeds.append(u'http://www.rp.pl/rss/6.html')
feeds.append(u'http://www.rp.pl/rss/7.html')
feeds.append(u'http://www.rp.pl/rss/8.html')
keep_only_tags =[] keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'story'})) keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article-content'}))
remove_tags =[] remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleLeftBox'})) remove_tags.append(dict(name = 'div', attrs = {'id' : 'article-copyright-box'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'socialNewTools'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'article-footer'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'socialTools'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'article-tags'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxTop'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'recommendations'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks editorPicksFirst'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightText'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightButton'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxBottom'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'addRecommendation'}))
remove_tags.append(dict(name = 'h3', attrs = {'id' : 'tags'}))
extra_css = ''' extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
@ -58,15 +41,12 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
.fot{font-size: x-small; color: #666666;} .fot{font-size: x-small; color: #666666;}
''' '''
def skip_ad_pages(self, soup): # def skip_ad_pages(self, soup):
if ('advertisement' in soup.find('title').string.lower()): # if ('advertisement' in soup.find('title').string.lower()):
href = soup.find('a').get('href') # href = soup.find('a').get('href')
return self.index_to_soup(href, raw=True) # return self.index_to_soup(href, raw=True)
else: # else:
return None # return None
def print_version(self, url): def print_version(self, url):
start, sep, rest = url.rpartition('/') return url + '?template=printart'
forget, sep, index = rest.rpartition(',')
return start + '/' + index + '?print=tak'