diff --git a/recipes/irish_independent.recipe b/recipes/irish_independent.recipe index 562016afee..a7425e6a29 100644 --- a/recipes/irish_independent.recipe +++ b/recipes/irish_independent.recipe @@ -12,35 +12,26 @@ class IrishIndependent(BasicNewsRecipe): description = 'Irish and World news from Irelands Bestselling Daily Broadsheet' __author__ = 'Neil Grogan' language = 'en_IE' - oldest_article = 7 + oldest_article = 2 max_articles_per_feed = 100 - remove_tags_before = dict(id='article') - remove_tags_after = [dict(name='div', attrs={'class': 'toolsBottom'})] no_stylesheets = True + keep_only_tags = [ - classes('n-content1 n-content2 n-content3'), + dict(name='div', attrs={'class':lambda x: x and '_contentwrapper' in x}) ] - remove_tags_after = classes('quick-subscribe') + remove_tags = [ - classes('icon1 icon-close c-lightbox1-side c-socials1 social-embed-consent-wall n-split1-side c-footer1'), - dict(attrs={'data-ad-slot': True}), - dict(attrs={'data-lightbox': True}), - dict(name='form'), - dict(attrs={'data-urn': lambda x: x and ':video:' in x}), + dict(name='div', attrs={'data-testid':['article-share', 'embed-video']}) ] feeds = [ - (u'Frontpage News', u'http://www.independent.ie/rss'), - (u'World News', u'http://www.independent.ie/world-news/rss'), - (u'Technology', u'http://www.independent.ie/business/technology/rss'), - (u'Sport', u'http://www.independent.ie/sport/rss'), - (u'Entertainment', u'http://www.independent.ie/entertainment/rss'), - (u'Independent Woman', u'http://www.independent.ie/lifestyle/independent-woman/rss'), - (u'Education', u'http://www.independent.ie/education/rss'), - (u'Lifestyle', u'http://www.independent.ie/lifestyle/rss'), - (u'Travel', u'http://www.independent.ie/travel/rss'), - (u'Letters', u'http://www.independent.ie/opinion/letters/rss'), - (u'Weather', u'http://www.independent.ie/weather/rss') + ('News', 'http://www.independent.ie/rss'), + ('Opinion', 'http://www.independent.ie/opinion/rss'), + ('Business', 'http://www.independent.ie/business/rss'), + ('Sport', 'http://www.independent.ie/sport/rss'), + ('Life', 'http://www.independent.ie/life/rss'), + ('Style', 'http://www.independent.ie/style/rss'), + ('Entertainment', 'http://www.independent.ie/business/rss'), ] def preprocess_html(self, soup):