fix rmf24 recipes

This commit is contained in:
Tomasz Długosz 2018-10-13 22:40:36 +02:00
parent 356db86058
commit e1c18fdc35
3 changed files with 12 additions and 24 deletions

View File

@ -26,13 +26,11 @@ class RMF24_ESKN(BasicNewsRecipe):
(u'Nauka', u'http://www.rmf24.pl/nauka/feed')]
keep_only_tags = [
dict(name='div', attrs={'class': 'box articleSingle print'})]
dict(name='header', attrs={'class': 'article-header'}),
dict(name='div', attrs={'class': 'article-container'})]
remove_tags = [
dict(name='div', attrs={'class': 'toTop'}),
dict(name='div', attrs={'class': 'category'}),
dict(name='div', attrs={'class': 'REMOVE'}),
dict(name='div', attrs={'class': 'embed embedAd'})]
remove_tags = [dict(name='div', attrs={'id': 'ReklamaMobile'}),
dict(name='img', attrs={'class': 'img-responsive hidden-lg hidden-md hidden-sm'})]
extra_css = '''
h1 { font-size: 1.2em; }
@ -42,7 +40,6 @@ class RMF24_ESKN(BasicNewsRecipe):
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<h2>Zdj.cie</h2>', lambda match: ''),
(r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'), # noqa
(r'<a href="http://www.facebook.com/pages/RMF24pl/.*?>RMF24.pl</a> on Facebook</div>',
lambda match: '</div>')
]

View File

@ -24,14 +24,11 @@ class RMF24(BasicNewsRecipe):
(u'\u015awiat', u'http://www.rmf24.pl/fakty/swiat/feed')]
keep_only_tags = [
dict(name='div', attrs={'class': 'box articleSingle print'})]
dict(name='header', attrs={'class': 'article-header'}),
dict(name='div', attrs={'class': 'article-container'})]
remove_tags = [
dict(name='div', attrs={'id': 'adBox625'}),
dict(name='div', attrs={'class': 'toTop'}),
dict(name='div', attrs={'class': 'category'}),
dict(name='div', attrs={'class': 'REMOVE'}),
dict(name='div', attrs={'class': 'embed embedAd'})]
remove_tags = [dict(name='div', attrs={'id': 'ReklamaMobile'}),
dict(name='img', attrs={'class': 'img-responsive hidden-lg hidden-md hidden-sm'})]
extra_css = '''
h1 { font-size: 1.2em; }
@ -40,7 +37,6 @@ class RMF24(BasicNewsRecipe):
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<h2>Zdj.cie</h2>', lambda match: ''),
(r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'), # noqa
(r'<a href="http://www.facebook.com/pages/RMF24pl/.*?>RMF24.pl</a> on Facebook</div>',
lambda match: '</div>')
]

View File

@ -19,6 +19,7 @@ class RMF24_opinie(BasicNewsRecipe):
__author__ = u'Tomasz D\u0142ugosz'
no_stylesheets = True
remove_javascript = True
remove_empty_feeds = True
feeds = [(u'Blogi', u'http://www.rmf24.pl/opinie/blogi/feed'),
(u'Kontrwywiad',
@ -28,16 +29,10 @@ class RMF24_opinie(BasicNewsRecipe):
(u'Komentarze', u'http://www.rmf24.pl/opinie/komentarze/feed')]
keep_only_tags = [
dict(name='div', attrs={'class': 'box articleSingle print'}),
dict(name='div', attrs={
'class': 'box articleSingle print singleCommentary'}),
dict(name='div', attrs={'class': 'box articleSingle print blogSingleEntry'})]
dict(name='header', attrs={'class': 'article-header'}),
dict(name='div', attrs={'class': 'article-container'})]
remove_tags = [
dict(name='div', attrs={'class': 'toTop'}),
dict(name='div', attrs={'class': 'category'}),
dict(name='div', attrs={'class': 'REMOVE'}),
dict(name='div', attrs={'class': 'embed embedAd'})]
remove_tags = [dict(name='div', attrs={'id': 'ReklamaMobile'})]
extra_css = '''
h1 { font-size: 1.2em; }