from calibre.web.feeds.news import BasicNewsRecipe class RzeczpospolitaRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = u'kwetal, Tomasz Dlugosz, adrianf0' language = 'pl' title = u'Rzeczpospolita OnLine' publisher = u'Presspublica Sp.' category = u'News' description = u'Newspaper' oldest_article = 1 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True # Seems to work best, but YMMV simultaneous_downloads = 5 feeds = [] feeds.append((u"WiadomoĊ›ci", u'http://www.rp.pl/rss/1056')) # Wydarzenia feeds.append((u"Ekonomia", u'http://www.rp.pl/rss/1004')) # Ekonomia feeds.append((u"Prawo", u'http://www.rp.pl/rss/1037')) # Prawo keep_only_tags = [] keep_only_tags.append(dict(name='h1', attrs={'id': 'article-title'})) keep_only_tags.append(dict(name='img', attrs={'class': 'img-responsive article__image'})) keep_only_tags.append(dict(name='div', attrs={'class': ['article-content', 'article__lead js-voice-read', 'article__content js-voice-read','article__image-desc','article__image-author']})) remove_tags = [] remove_tags.append(dict(name='div', attrs={'class': 'related-articles__wrapper'})) remove_tags.append(dict(name='span', attrs={'class': ['article__premium-player','ad-label']})) extra_css = ''' div.article__image-desc {font-style:italic; font-size:70%;text-align:right} div.article__image-author {font-size:60%;text-align:right} '''