update filmweb

This commit is contained in:
Tomasz Długosz 2019-10-14 23:10:05 +02:00
parent ed7476adb4
commit 2db371f7de
2 changed files with 15 additions and 17 deletions

View File

@ -6,10 +6,10 @@ class FilmWebPl(BasicNewsRecipe):
title = 'FilmWeb' title = 'FilmWeb'
__author__ = 'fenuks' __author__ = 'fenuks'
description = u'Filmweb.pl - Filmy takie jak Ty Filmweb to największy i najczęściej odwiedzany polski serwis filmowy.' description = u'Filmweb.pl - Filmy takie jak Ty Filmweb to największy i najczęściej odwiedzany polski serwis filmowy.'
cover_url = 'http://1.fwcdn.pl/an/867323/63321_1.11.jpg' cover_url = 'https://1.fwcdn.pl/an/np/49468/2018/15037.2.jpg'
category = 'movies' category = 'movies'
language = 'pl' language = 'pl'
index = 'http://www.filmweb.pl' index = 'https://www.filmweb.pl'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
@ -28,25 +28,23 @@ class FilmWebPl(BasicNewsRecipe):
remove_tags = [dict(attrs={'class':['infoParent', 'likeBar', remove_tags = [dict(attrs={'class':['infoParent', 'likeBar',
'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})] 'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})]
remove_attributes = ['style',] remove_attributes = ['style',]
keep_only_tags = [dict(attrs={'class': ['newsHdr hdrWithAuthor ', 'reviewHdr', 'newsContent newsPage', 'newsContent']})] keep_only_tags = [dict(attrs={'class': ['newsHdr hdrWithAuthor ', 'reviewHdr', 'newsContent newsPage', 'newsContent', 'hdr hdr-mega']})]
# remove_tags_before = dict(attrs={'class': 'hdr hdr-mega'}) feeds = [(u'Filmy', u'https://www.filmweb.pl/feed/news/category/film'),
# remove_tags_after = dict(attrs={'class': 'newsContent'}) (u'Seriale', u'https://www.filmweb.pl/feed/news/category/serial'),
feeds = [(u'Filmy', u'http://www.filmweb.pl/feed/news/category/film'), (u'Box office', u'https://www.filmweb.pl/feed/news/category/boxoffice'),
(u'Seriale', u'http://www.filmweb.pl/feed/news/category/serial'), (u'Telewizja', u'https://www.filmweb.pl/feed/news/category/tv'),
(u'Box office', u'http://www.filmweb.pl/feed/news/category/boxoffice'), (u'Festiwale, nagrody i przeglądy', u'https://www.filmweb.pl/feed/news/category/festival'),
(u'Telewizja', u'http://www.filmweb.pl/feed/news/category/tv'), (u'Multimedia', u'https://www.filmweb.pl/feed/news/category/multimedia'),
(u'Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'), (u'Dystrybucja dvd/blu-ray', u'https://www.filmweb.pl/feed/news/category/dvd'),
(u'Multimedia', u'http://www.filmweb.pl/feed/news/category/multimedia'), (u'Gry wideo', u'https://www.filmweb.pl/feed/news/category/game'),
(u'Dystrybucja dvd/blu-ray', u'http://www.filmweb.pl/feed/news/category/dvd'), (u'Różne', u'https://www.filmweb.pl/feed/news/category/other'),
(u'Gry wideo', u'http://www.filmweb.pl/feed/news/category/game'), (u'Recenzje redakcji', u'https://www.filmweb.pl/feed/reviews/latest'),
(u'Różne', u'http://www.filmweb.pl/feed/news/category/other'), (u'Recenzje użytkowników', u'https://www.filmweb.pl/feed/user-reviews/latest')
(u'Recenzje redakcji', u'http://www.filmweb.pl/feed/reviews/latest'),
(u'Recenzje użytkowników', u'http://www.filmweb.pl/feed/user-reviews/latest')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for a in soup('a', href=True): for a in soup('a', href=True):
if 'http://' not in a['href'] and 'https://' not in a['href']: if 'https://' not in a['href']:
a['href'] = self.index + a['href'] a['href'] = self.index + a['href']
return soup return soup

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 454 B