from calibre.web.feeds.news import BasicNewsRecipe class RzeczpospolitaRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = u'kwetal, Tomasz Dlugosz, adrianf0' language = 'pl' version = 2 title = u'Rzeczpospolita OnLine' publisher = u'Presspublica Sp.' category = u'News' description = u'Newspaper' oldest_article = 1 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True # Seems to work best, but YMMV simultaneous_downloads = 5 feeds = [] feeds.append((u"WiadomoĊ›ci", u'http://www.rp.pl/rss/1056')) # Wydarzenia feeds.append((u"Ekonomia", u'http://www.rp.pl/rss/1004')) # Ekonomia feeds.append((u"Prawo", u'http://www.rp.pl/rss/1037')) # Prawo keep_only_tags = [] keep_only_tags.append(dict(name='div', attrs={'class': 'article-content'})) remove_tags = [] remove_tags.append(dict(name='div', attrs={'id': 'article-copyright-box'})) remove_tags.append(dict(name='div', attrs={'class': 'article-footer'})) remove_tags.append(dict(name='div', attrs={'class': 'article-tags'})) extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} h1{text-align: left;} h2{font-size: medium; font-weight: bold;} p.lead {font-weight: bold; text-align: left;} .authordate {font-size: small; color: #696969;} .fot{font-size: x-small; color: #666666;} ''' # def skip_ad_pages(self, soup): # if ('advertisement' in soup.find('title').string.lower()): # href = soup.find('a').get('href') # return self.index_to_soup(href, raw=True) # else: # return None def print_version(self, url): return url + '?template=printart'