Update Irish Independent and Irish Times

This commit is contained in:
Kovid Goyal 2023-01-07 13:17:54 +05:30
parent c0ff9b11e8
commit 37fd1d521a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 16 additions and 11 deletions

View File

@ -4,7 +4,7 @@ __copyright__ = '2009 Neil Grogan'
# Irish Independent Recipe # Irish Independent Recipe
# #
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe, classes
class IrishIndependent(BasicNewsRecipe): class IrishIndependent(BasicNewsRecipe):
@ -17,13 +17,16 @@ class IrishIndependent(BasicNewsRecipe):
remove_tags_before = dict(id='article') remove_tags_before = dict(id='article')
remove_tags_after = [dict(name='div', attrs={'class': 'toolsBottom'})] remove_tags_after = [dict(name='div', attrs={'class': 'toolsBottom'})]
no_stylesheets = True no_stylesheets = True
keep_only_tags = [
classes('n-content1 n-content2 n-content3'),
]
remove_tags_after = classes('quick-subscribe')
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class': 'toolsBottom'}), classes('icon1 icon-close c-lightbox1-side c-socials1 social-embed-consent-wall n-split1-side c-footer1'),
dict(name='div', attrs={'class': 'toolsTop'}), dict(attrs={'data-ad-slot': True}),
dict(name='div', attrs={'class': 'boxRelated'}), dict(attrs={'data-lightbox': True}),
dict(name='div', attrs={'class': 'section first'}), dict(name='form'),
dict(name='div', attrs={'class': 'tabIt'}), dict(attrs={'data-urn': lambda x: x and ':video:' in x}),
dict(name='div', attrs={'class': 'inner'})
] ]
feeds = [ feeds = [
@ -40,6 +43,7 @@ class IrishIndependent(BasicNewsRecipe):
(u'Weather', u'http://www.independent.ie/weather/rss') (u'Weather', u'http://www.independent.ie/weather/rss')
] ]
# If text only articles are desired def preprocess_html(self, soup):
# def print_version(self, url): for img in soup.findAll(attrs={'data-src': True}):
# return '%s?service=Print' % url img['src'] = img['data-src']
return soup

View File

@ -36,7 +36,8 @@ class IrishTimes(BasicNewsRecipe):
classes('lead-art-wrapper article-body-wrapper'), classes('lead-art-wrapper article-body-wrapper'),
] ]
remove_tags = [ remove_tags = [
dict(name='button') dict(name='button'),
classes('sm-promo-headline'),
] ]
remove_attributes = ['width', 'height'] remove_attributes = ['width', 'height']