__license__ = 'GPL v3' __copyright__ = '2009 Neil Grogan' # # Irish Independent Recipe # from calibre.web.feeds.news import BasicNewsRecipe, classes class IrishIndependent(BasicNewsRecipe): title = u'Irish Independent' description = 'Irish and World news from Irelands Bestselling Daily Broadsheet' __author__ = 'Neil Grogan' language = 'en_IE' oldest_article = 7 max_articles_per_feed = 100 remove_tags_before = dict(id='article') remove_tags_after = [dict(name='div', attrs={'class': 'toolsBottom'})] no_stylesheets = True keep_only_tags = [ classes('n-content1 n-content2 n-content3'), ] remove_tags_after = classes('quick-subscribe') remove_tags = [ classes('icon1 icon-close c-lightbox1-side c-socials1 social-embed-consent-wall n-split1-side c-footer1'), dict(attrs={'data-ad-slot': True}), dict(attrs={'data-lightbox': True}), dict(name='form'), dict(attrs={'data-urn': lambda x: x and ':video:' in x}), ] feeds = [ (u'Frontpage News', u'http://www.independent.ie/rss'), (u'World News', u'http://www.independent.ie/world-news/rss'), (u'Technology', u'http://www.independent.ie/business/technology/rss'), (u'Sport', u'http://www.independent.ie/sport/rss'), (u'Entertainment', u'http://www.independent.ie/entertainment/rss'), (u'Independent Woman', u'http://www.independent.ie/lifestyle/independent-woman/rss'), (u'Education', u'http://www.independent.ie/education/rss'), (u'Lifestyle', u'http://www.independent.ie/lifestyle/rss'), (u'Travel', u'http://www.independent.ie/travel/rss'), (u'Letters', u'http://www.independent.ie/opinion/letters/rss'), (u'Weather', u'http://www.independent.ie/weather/rss') ] def preprocess_html(self, soup): for img in soup.findAll(attrs={'data-src': True}): img['src'] = img['data-src'] return soup