Update irish_independent.recipe

This commit is contained in:
unkn0w7n 2023-04-27 16:57:44 +05:30 committed by GitHub
parent c547b188d7
commit 8d66c6eab3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -12,35 +12,26 @@ class IrishIndependent(BasicNewsRecipe):
description = 'Irish and World news from Irelands Bestselling Daily Broadsheet'
__author__ = 'Neil Grogan'
language = 'en_IE'
oldest_article = 7
oldest_article = 2
max_articles_per_feed = 100
remove_tags_before = dict(id='article')
remove_tags_after = [dict(name='div', attrs={'class': 'toolsBottom'})]
no_stylesheets = True
keep_only_tags = [
classes('n-content1 n-content2 n-content3'),
dict(name='div', attrs={'class':lambda x: x and '_contentwrapper' in x})
]
remove_tags_after = classes('quick-subscribe')
remove_tags = [
classes('icon1 icon-close c-lightbox1-side c-socials1 social-embed-consent-wall n-split1-side c-footer1'),
dict(attrs={'data-ad-slot': True}),
dict(attrs={'data-lightbox': True}),
dict(name='form'),
dict(attrs={'data-urn': lambda x: x and ':video:' in x}),
dict(name='div', attrs={'data-testid':['article-share', 'embed-video']})
]
feeds = [
(u'Frontpage News', u'http://www.independent.ie/rss'),
(u'World News', u'http://www.independent.ie/world-news/rss'),
(u'Technology', u'http://www.independent.ie/business/technology/rss'),
(u'Sport', u'http://www.independent.ie/sport/rss'),
(u'Entertainment', u'http://www.independent.ie/entertainment/rss'),
(u'Independent Woman', u'http://www.independent.ie/lifestyle/independent-woman/rss'),
(u'Education', u'http://www.independent.ie/education/rss'),
(u'Lifestyle', u'http://www.independent.ie/lifestyle/rss'),
(u'Travel', u'http://www.independent.ie/travel/rss'),
(u'Letters', u'http://www.independent.ie/opinion/letters/rss'),
(u'Weather', u'http://www.independent.ie/weather/rss')
('News', 'http://www.independent.ie/rss'),
('Opinion', 'http://www.independent.ie/opinion/rss'),
('Business', 'http://www.independent.ie/business/rss'),
('Sport', 'http://www.independent.ie/sport/rss'),
('Life', 'http://www.independent.ie/life/rss'),
('Style', 'http://www.independent.ie/style/rss'),
('Entertainment', 'http://www.independent.ie/business/rss'),
]
def preprocess_html(self, soup):