from calibre.web.feeds.news import BasicNewsRecipe class RzeczpospolitaRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = u'kwetal and Tomasz Dlugosz' language = 'pl' version = 1 title = u'Rzeczpospolita OnLine' publisher = u'Presspublica Sp.' category = u'News' description = u'Newspaper' oldest_article = 1 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True encoding = 'utf-8' # Seems to work best, but YMMV simultaneous_downloads = 1 feeds = [] feeds.append(u'http://www.rp.pl/rss/2.html') feeds.append(u'http://www.rp.pl/rss/10.html') feeds.append(u'http://www.rp.pl/rss/11.html') feeds.append(u'http://www.rp.pl/rss/12.html') feeds.append(u'http://www.rp.pl/rss/4.html') feeds.append(u'http://www.rp.pl/rss/5.html') feeds.append(u'http://www.rp.pl/rss/6.html') feeds.append(u'http://www.rp.pl/rss/7.html') feeds.append(u'http://www.rp.pl/rss/8.html') keep_only_tags =[] keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'story'})) remove_tags =[] remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleLeftBox'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'socialNewTools'})) remove_tags.append(dict(name = 'div', attrs = {'id' : 'socialTools'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxTop'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'})) remove_tags.append(dict(name = 'div', attrs = {'id' : 'recommendations'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks editorPicksFirst'})) remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightText'})) remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightButton'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxBottom'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'addRecommendation'})) remove_tags.append(dict(name = 'h3', attrs = {'id' : 'tags'})) extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} h1{text-align: left;} h2{font-size: medium; font-weight: bold;} p.lead {font-weight: bold; text-align: left;} .authordate {font-size: small; color: #696969;} .fot{font-size: x-small; color: #666666;} ''' def skip_ad_pages(self, soup): if ('advertisement' in soup.find('title').string.lower()): href = soup.find('a').get('href') return self.index_to_soup(href, raw=True) else: return None def print_version(self, url): start, sep, rest = url.rpartition('/') forget, sep, index = rest.rpartition(',') return start + '/' + index + '?print=tak'