from calibre.web.feeds.news import BasicNewsRecipe class INFRA(BasicNewsRecipe): title = u'INFRA' oldest_article = 7 max_articles_per_feed = 100 __author__ = 'fenuks' description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.' cover_url = 'http://i.imgur.com/j7hJT.jpg' category = 'UFO' index = 'http://infra.org.pl' language = 'pl' max_articles_per_feed = 100 remove_empty_feeds = True remove_attrs = ['style'] no_stylesheets = True keep_only_tags = [dict(id='ja-current-content')] feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/rss')] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] for a in soup.findAll('a', href=True): if 'http://' not in a['href'] and 'https://' not in a['href']: a['href'] = self.index + a['href'] return soup