__license__ = 'GPL v3' __copyright__ = '2009 Neil Grogan' # # Irish Independent Recipe # from calibre.web.feeds.news import BasicNewsRecipe class IrishIndependent(BasicNewsRecipe): title = u'Irish Independent' description = 'Irish and World news from Irelands Bestselling Daily Broadsheet' __author__ = 'Neil Grogan' language = 'en_IE' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True ignore_duplicate_articles = {'url'} keep_only_tags = [ dict(name='div', attrs={'class':lambda x: x and '_contentwrapper' in x}) ] remove_tags = [ dict(name=['svg', 'button']), dict(name='div', attrs={'data-testid':['article-share', 'embed-video', 'inline-related-wrapper']}) ] feeds = [ ('Frontpage News', 'http://www.independent.ie/rss'), ('World News', 'http://www.independent.ie/world-news/rss'), ('Opinion', 'http://www.independent.ie/opinion/rss'), ('Business', 'http://www.independent.ie/business/rss'), ('Sport', 'http://www.independent.ie/sport/rss'), ('Life', 'http://www.independent.ie/life/rss'), ('Style', 'http://www.independent.ie/style/rss'), ('Entertainment', 'http://www.independent.ie/business/rss'), ] def preprocess_html(self, soup): for img in soup.findAll(attrs={'data-src': True}): img['src'] = img['data-src'] return soup