diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe deleted file mode 100644 index afb8e080d9..0000000000 --- a/recipes/adventure_zone_pl.recipe +++ /dev/null @@ -1,45 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class Adventure_zone(BasicNewsRecipe): - title = u'Adventure Zone' - __author__ = 'fenuks' - description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.' # noqa - category = 'games' - language = 'pl' - BASEURL = 'http://www.adventure-zone.info/fusion/' - no_stylesheets = True - extra_css = '.image {float: left; margin-right: 5px;}' - oldest_article = 20 - max_articles_per_feed = 100 - cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png' - remove_attributes = ['style'] - use_embedded_content = False - keep_only_tags = [dict(attrs={'class': 'content'})] - remove_tags = [dict(attrs={'class': 'footer'})] - feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')] - - _trigger_words = ('zapowied', 'recenzj', 'solucj', 'poradnik') - - @staticmethod - def _is_linked_text(title): - return 'zapowied' in title or 'recenz' in title or 'solucj' in title or 'poradnik' in title - - def skip_ad_pages(self, soup): - skip_tag = soup.body.find(attrs={'class':'subject'}) - skip_tag = skip_tag.findAll(name='a', href=True) - title = soup.title.renderContents().decode('utf-8').lower() - if self._is_linked_text(title): - for r in skip_tag: - word = r.renderContents().decode('utf-8') - if not word: - continue - word = word.lower() - if self._is_linked_text(word): - return self.index_to_soup(self.BASEURL+r['href'], raw=True) - - def preprocess_html(self, soup): - for link in soup.findAll('a', href=True): - if not link['href'].startswith('http'): - link['href'] = self.BASEURL + link['href'] - return soup diff --git a/recipes/fdb_pl.recipe b/recipes/fdb_pl.recipe index e281682f1c..3dbd074f1a 100644 --- a/recipes/fdb_pl.recipe +++ b/recipes/fdb_pl.recipe @@ -9,7 +9,7 @@ class FDBPl(BasicNewsRecipe): category = 'film' language = 'pl' extra_css = '.options-left > li {display: inline;} em {display: block;}' - cover_url = 'http://fdb.pl/assets/fdb2/logo.png' + cover_url = 'https://i1.fdbimg.pl/hygg2xp1/480x300_magq39.jpg' use_embedded_content = False oldest_article = 7 max_articles_per_feed = 100 @@ -19,25 +19,25 @@ class FDBPl(BasicNewsRecipe): remove_attributes = ['style', 'font'] ignore_duplicate_articles = {'title', 'url'} - keep_only_tags = [dict(attrs={'class': 'news-item news-first'})] + keep_only_tags = [dict(attrs={'class': ['row justify-content-center', 'figure']})] remove_tags = [ - dict(attrs={'class': ['dig dig-first', 'ads clearfix', 'comments']})] + dict(attrs={'class': ['news-footer infinite-scroll-breakepoit', 'list-inline text-muted m-0']})] feeds = [] def parse_index(self): feeds = [] feeds.append((u'Wiadomości', self.get_articles( - 'http://fdb.pl/wiadomosci?page={0}', 2))) + 'https://fdb.pl/wiadomosci?page={0}', 2))) return feeds def get_articles(self, url, pages=1): articles = [] for nr in range(1, pages + 1): soup = self.index_to_soup(url.format(nr)) - for tag in soup.findAll(attrs={'class': 'news-item clearfix'}): - node = tag.find('h2') + for tag in soup.findAll(attrs={'class': 'col-xs-6 col-sm-4 col-md-4 col-lg-3'}): + node = tag.find('h5') title = node.a.string - url = 'http://fdb.pl' + node.a['href'] + url = node.a['href'] date = '' articles.append({'title': title, 'url': url, diff --git a/recipes/film_web.recipe b/recipes/film_web.recipe index ef71e548d2..7ed105bc2b 100644 --- a/recipes/film_web.recipe +++ b/recipes/film_web.recipe @@ -6,10 +6,10 @@ class FilmWebPl(BasicNewsRecipe): title = 'FilmWeb' __author__ = 'fenuks' description = u'Filmweb.pl - Filmy takie jak Ty Filmweb to największy i najczęściej odwiedzany polski serwis filmowy.' - cover_url = 'http://1.fwcdn.pl/an/867323/63321_1.11.jpg' + cover_url = 'https://1.fwcdn.pl/an/np/49468/2018/15037.2.jpg' category = 'movies' language = 'pl' - index = 'http://www.filmweb.pl' + index = 'https://www.filmweb.pl' oldest_article = 8 max_articles_per_feed = 100 no_stylesheets = True @@ -28,25 +28,23 @@ class FilmWebPl(BasicNewsRecipe): remove_tags = [dict(attrs={'class':['infoParent', 'likeBar', 'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})] remove_attributes = ['style',] - keep_only_tags = [dict(attrs={'class': ['newsHdr hdrWithAuthor ', 'reviewHdr', 'newsContent newsPage', 'newsContent']})] - # remove_tags_before = dict(attrs={'class': 'hdr hdr-mega'}) - # remove_tags_after = dict(attrs={'class': 'newsContent'}) - feeds = [(u'Filmy', u'http://www.filmweb.pl/feed/news/category/film'), - (u'Seriale', u'http://www.filmweb.pl/feed/news/category/serial'), - (u'Box office', u'http://www.filmweb.pl/feed/news/category/boxoffice'), - (u'Telewizja', u'http://www.filmweb.pl/feed/news/category/tv'), - (u'Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'), - (u'Multimedia', u'http://www.filmweb.pl/feed/news/category/multimedia'), - (u'Dystrybucja dvd/blu-ray', u'http://www.filmweb.pl/feed/news/category/dvd'), - (u'Gry wideo', u'http://www.filmweb.pl/feed/news/category/game'), - (u'Różne', u'http://www.filmweb.pl/feed/news/category/other'), - (u'Recenzje redakcji', u'http://www.filmweb.pl/feed/reviews/latest'), - (u'Recenzje użytkowników', u'http://www.filmweb.pl/feed/user-reviews/latest') + keep_only_tags = [dict(attrs={'class': ['newsHdr hdrWithAuthor ', 'reviewHdr', 'newsContent newsPage', 'newsContent', 'hdr hdr-mega']})] + feeds = [(u'Filmy', u'https://www.filmweb.pl/feed/news/category/film'), + (u'Seriale', u'https://www.filmweb.pl/feed/news/category/serial'), + (u'Box office', u'https://www.filmweb.pl/feed/news/category/boxoffice'), + (u'Telewizja', u'https://www.filmweb.pl/feed/news/category/tv'), + (u'Festiwale, nagrody i przeglądy', u'https://www.filmweb.pl/feed/news/category/festival'), + (u'Multimedia', u'https://www.filmweb.pl/feed/news/category/multimedia'), + (u'Dystrybucja dvd/blu-ray', u'https://www.filmweb.pl/feed/news/category/dvd'), + (u'Gry wideo', u'https://www.filmweb.pl/feed/news/category/game'), + (u'Różne', u'https://www.filmweb.pl/feed/news/category/other'), + (u'Recenzje redakcji', u'https://www.filmweb.pl/feed/reviews/latest'), + (u'Recenzje użytkowników', u'https://www.filmweb.pl/feed/user-reviews/latest') ] def preprocess_html(self, soup): for a in soup('a', href=True): - if 'http://' not in a['href'] and 'https://' not in a['href']: + if 'https://' not in a['href']: a['href'] = self.index + a['href'] return soup diff --git a/recipes/frazpc.recipe b/recipes/frazpc.recipe deleted file mode 100644 index 7158f101ac..0000000000 --- a/recipes/frazpc.recipe +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = u'2010-2011, Tomasz Dlugosz ' -''' -frazpc.pl -''' - -from calibre.web.feeds.news import BasicNewsRecipe -import re - - -class FrazPC(BasicNewsRecipe): - title = u'frazpc.pl' - publisher = u'frazpc.pl' - description = u'Tw\xf3j Vortal Technologiczny' - language = 'pl' - __author__ = u'Tomasz D\u0142ugosz' - oldest_article = 7 - max_articles_per_feed = 100 - use_embedded_content = False - no_stylesheets = True - remove_empty_feeds = True - cover_url = 'http://www.frazpc.pl/images/logo.png' - feeds = [ - (u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'), - (u'Artyku\u0142y', u'http://www.frazpc.pl/feed/artykuly') - ] - - keep_only_tags = [dict(name='div', attrs={'class': 'article'})] - - remove_tags = [ - dict(name='div', attrs={'class': 'title-wrapper'}), - dict(name='p', attrs={'class': 'tags'}), - dict(name='p', attrs={'class': 'article-links'}), - dict(name='div', attrs={'class': 'comments_box'}) - ] - - remove_tags_after = dict(name='div', attrs={'class': 'content'}) - preprocess_regexps = [(re.compile( - r'\| Komentarze \([0-9]*\)'), lambda match: '')] - - remove_attributes = ['width', 'height'] diff --git a/recipes/icons/adventure_zone_pl.png b/recipes/icons/adventure_zone_pl.png deleted file mode 100644 index f00db43dab..0000000000 Binary files a/recipes/icons/adventure_zone_pl.png and /dev/null differ diff --git a/recipes/icons/film_web.png b/recipes/icons/film_web.png index 9e3dc04426..1791a862ba 100644 Binary files a/recipes/icons/film_web.png and b/recipes/icons/film_web.png differ diff --git a/recipes/icons/frazpc.png b/recipes/icons/frazpc.png deleted file mode 100644 index aed3d9e856..0000000000 Binary files a/recipes/icons/frazpc.png and /dev/null differ