calibre/recipes/irish_independent.recipe
2023-01-07 13:17:54 +05:30

50 lines
1.9 KiB
Plaintext

__license__ = 'GPL v3'
__copyright__ = '2009 Neil Grogan'
#
# Irish Independent Recipe
#
from calibre.web.feeds.news import BasicNewsRecipe, classes
class IrishIndependent(BasicNewsRecipe):
title = u'Irish Independent'
description = 'Irish and World news from Irelands Bestselling Daily Broadsheet'
__author__ = 'Neil Grogan'
language = 'en_IE'
oldest_article = 7
max_articles_per_feed = 100
remove_tags_before = dict(id='article')
remove_tags_after = [dict(name='div', attrs={'class': 'toolsBottom'})]
no_stylesheets = True
keep_only_tags = [
classes('n-content1 n-content2 n-content3'),
]
remove_tags_after = classes('quick-subscribe')
remove_tags = [
classes('icon1 icon-close c-lightbox1-side c-socials1 social-embed-consent-wall n-split1-side c-footer1'),
dict(attrs={'data-ad-slot': True}),
dict(attrs={'data-lightbox': True}),
dict(name='form'),
dict(attrs={'data-urn': lambda x: x and ':video:' in x}),
]
feeds = [
(u'Frontpage News', u'http://www.independent.ie/rss'),
(u'World News', u'http://www.independent.ie/world-news/rss'),
(u'Technology', u'http://www.independent.ie/business/technology/rss'),
(u'Sport', u'http://www.independent.ie/sport/rss'),
(u'Entertainment', u'http://www.independent.ie/entertainment/rss'),
(u'Independent Woman', u'http://www.independent.ie/lifestyle/independent-woman/rss'),
(u'Education', u'http://www.independent.ie/education/rss'),
(u'Lifestyle', u'http://www.independent.ie/lifestyle/rss'),
(u'Travel', u'http://www.independent.ie/travel/rss'),
(u'Letters', u'http://www.independent.ie/opinion/letters/rss'),
(u'Weather', u'http://www.independent.ie/weather/rss')
]
def preprocess_html(self, soup):
for img in soup.findAll(attrs={'data-src': True}):
img['src'] = img['data-src']
return soup