Update FilmWed and Fdb.pl

Merge branch 'master' of https://github.com/t3d/calibre
2025-07-09 03:04:10 -04:00 · 2019-10-15 04:28:26 +05:30 · 2019-10-15 04:28:26 +05:30 · 8f8a5f7561
commit 8f8a5f7561
parent c0c6872249 2db371f7de
7 changed files with 22 additions and 112 deletions
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@ -1,45 +0,0 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class Adventure_zone(BasicNewsRecipe):
    title = u'Adventure Zone'
    __author__ = 'fenuks'
    description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.'  # noqa
    category = 'games'
    language = 'pl'
    BASEURL = 'http://www.adventure-zone.info/fusion/'
    no_stylesheets = True
    extra_css = '.image {float: left; margin-right: 5px;}'
    oldest_article = 20
    max_articles_per_feed = 100
    cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
    remove_attributes = ['style']
    use_embedded_content = False
    keep_only_tags = [dict(attrs={'class': 'content'})]
    remove_tags = [dict(attrs={'class': 'footer'})]
    feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')]
    _trigger_words = ('zapowied', 'recenzj', 'solucj', 'poradnik')
    @staticmethod
    def _is_linked_text(title):
        return 'zapowied' in title or 'recenz' in title or 'solucj' in title or 'poradnik' in title
    def skip_ad_pages(self, soup):
        skip_tag = soup.body.find(attrs={'class':'subject'})
        skip_tag = skip_tag.findAll(name='a', href=True)
        title = soup.title.renderContents().decode('utf-8').lower()
        if self._is_linked_text(title):
            for r in skip_tag:
                word = r.renderContents().decode('utf-8')
                if not word:
                    continue
                word = word.lower()
                if self._is_linked_text(word):
                    return self.index_to_soup(self.BASEURL+r['href'], raw=True)
    def preprocess_html(self, soup):
        for link in soup.findAll('a', href=True):
            if not link['href'].startswith('http'):
                link['href'] = self.BASEURL + link['href']
        return soup
--- a/recipes/fdb_pl.recipe
+++ b/recipes/fdb_pl.recipe
@ -9,7 +9,7 @@ class FDBPl(BasicNewsRecipe):
    category = 'film'
    language = 'pl'
    extra_css = '.options-left > li {display: inline;} em {display: block;}'
-    cover_url = 'http://fdb.pl/assets/fdb2/logo.png'
+    cover_url = 'https://i1.fdbimg.pl/hygg2xp1/480x300_magq39.jpg'
    use_embedded_content = False
    oldest_article = 7
    max_articles_per_feed = 100
@ -19,25 +19,25 @@ class FDBPl(BasicNewsRecipe):
    remove_attributes = ['style', 'font']
    ignore_duplicate_articles = {'title', 'url'}
-    keep_only_tags = [dict(attrs={'class': 'news-item news-first'})]
+    keep_only_tags = [dict(attrs={'class': ['row justify-content-center', 'figure']})]
    remove_tags = [
-        dict(attrs={'class': ['dig dig-first', 'ads clearfix', 'comments']})]
+        dict(attrs={'class': ['news-footer infinite-scroll-breakepoit', 'list-inline text-muted m-0']})]
    feeds = []
    def parse_index(self):
        feeds = []
        feeds.append((u'Wiadomości', self.get_articles(
-            'http://fdb.pl/wiadomosci?page={0}', 2)))
+            'https://fdb.pl/wiadomosci?page={0}', 2)))
        return feeds
    def get_articles(self, url, pages=1):
        articles = []
        for nr in range(1, pages + 1):
            soup = self.index_to_soup(url.format(nr))
-            for tag in soup.findAll(attrs={'class': 'news-item clearfix'}):
+            for tag in soup.findAll(attrs={'class': 'col-xs-6 col-sm-4 col-md-4 col-lg-3'}):
-                node = tag.find('h2')
+                node = tag.find('h5')
                title = node.a.string
-                url = 'http://fdb.pl' + node.a['href']
+                url = node.a['href']
                date = ''
                articles.append({'title': title,
                                 'url': url,
--- a/recipes/film_web.recipe
+++ b/recipes/film_web.recipe
@ -6,10 +6,10 @@ class FilmWebPl(BasicNewsRecipe):
    title          = 'FilmWeb'
    __author__        = 'fenuks'
    description   = u'Filmweb.pl - Filmy takie jak Ty Filmweb to największy i najczęściej odwiedzany polski serwis filmowy.'
-    cover_url      = 'http://1.fwcdn.pl/an/867323/63321_1.11.jpg'
+    cover_url      = 'https://1.fwcdn.pl/an/np/49468/2018/15037.2.jpg'
    category       = 'movies'
    language       = 'pl'
-    index = 'http://www.filmweb.pl'
+    index = 'https://www.filmweb.pl'
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets = True
@ -28,25 +28,23 @@ class FilmWebPl(BasicNewsRecipe):
    remove_tags = [dict(attrs={'class':['infoParent', 'likeBar',
                    'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})]
    remove_attributes = ['style',]
-    keep_only_tags = [dict(attrs={'class': ['newsHdr hdrWithAuthor ', 'reviewHdr', 'newsContent newsPage', 'newsContent']})]
+    keep_only_tags = [dict(attrs={'class': ['newsHdr hdrWithAuthor ', 'reviewHdr', 'newsContent newsPage', 'newsContent', 'hdr hdr-mega']})]
-    # remove_tags_before = dict(attrs={'class': 'hdr hdr-mega'})
+    feeds          = [(u'Filmy', u'https://www.filmweb.pl/feed/news/category/film'),
-    # remove_tags_after = dict(attrs={'class': 'newsContent'})
+                         (u'Seriale', u'https://www.filmweb.pl/feed/news/category/serial'),
-    feeds          = [(u'Filmy', u'http://www.filmweb.pl/feed/news/category/film'),
+                         (u'Box office', u'https://www.filmweb.pl/feed/news/category/boxoffice'),
-                         (u'Seriale', u'http://www.filmweb.pl/feed/news/category/serial'),
+                         (u'Telewizja', u'https://www.filmweb.pl/feed/news/category/tv'),
-                         (u'Box office', u'http://www.filmweb.pl/feed/news/category/boxoffice'),
+                         (u'Festiwale, nagrody i przeglądy', u'https://www.filmweb.pl/feed/news/category/festival'),
-                         (u'Telewizja', u'http://www.filmweb.pl/feed/news/category/tv'),
+                         (u'Multimedia', u'https://www.filmweb.pl/feed/news/category/multimedia'),
-                         (u'Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
+                         (u'Dystrybucja dvd/blu-ray', u'https://www.filmweb.pl/feed/news/category/dvd'),
-                         (u'Multimedia', u'http://www.filmweb.pl/feed/news/category/multimedia'),
+                         (u'Gry wideo', u'https://www.filmweb.pl/feed/news/category/game'),
-                         (u'Dystrybucja dvd/blu-ray', u'http://www.filmweb.pl/feed/news/category/dvd'),
+                         (u'Różne', u'https://www.filmweb.pl/feed/news/category/other'),
-                         (u'Gry wideo', u'http://www.filmweb.pl/feed/news/category/game'),
+                         (u'Recenzje redakcji', u'https://www.filmweb.pl/feed/reviews/latest'),
-                         (u'Różne', u'http://www.filmweb.pl/feed/news/category/other'),
+                         (u'Recenzje użytkowników', u'https://www.filmweb.pl/feed/user-reviews/latest')
                         (u'Recenzje redakcji', u'http://www.filmweb.pl/feed/reviews/latest'),
                         (u'Recenzje użytkowników', u'http://www.filmweb.pl/feed/user-reviews/latest')
                          ]
    def preprocess_html(self, soup):
        for a in soup('a', href=True):
-            if 'http://' not in a['href'] and 'https://' not in a['href']:
+            if 'https://' not in a['href']:
                a['href'] = self.index + a['href']
        return soup
--- a/recipes/frazpc.recipe
+++ b/recipes/frazpc.recipe
@ -1,43 +0,0 @@
 #!/usr/bin/env  python2
 __license__ = 'GPL v3'
 __copyright__ = u'2010-2011, Tomasz Dlugosz <tomek3d@gmail.com>'
 '''
 frazpc.pl
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class FrazPC(BasicNewsRecipe):
    title = u'frazpc.pl'
    publisher = u'frazpc.pl'
    description = u'Tw\xf3j Vortal Technologiczny'
    language = 'pl'
    __author__ = u'Tomasz D\u0142ugosz'
    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content = False
    no_stylesheets = True
    remove_empty_feeds = True
    cover_url = 'http://www.frazpc.pl/images/logo.png'
    feeds = [
        (u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'),
        (u'Artyku\u0142y', u'http://www.frazpc.pl/feed/artykuly')
    ]
    keep_only_tags = [dict(name='div', attrs={'class': 'article'})]
    remove_tags = [
        dict(name='div', attrs={'class': 'title-wrapper'}),
        dict(name='p', attrs={'class': 'tags'}),
        dict(name='p', attrs={'class': 'article-links'}),
        dict(name='div', attrs={'class': 'comments_box'})
    ]
    remove_tags_after = dict(name='div', attrs={'class': 'content'})
    preprocess_regexps = [(re.compile(
        r'\| <a href="#comments">Komentarze \([0-9]*\)</a>'), lambda match: '')]
    remove_attributes = ['width', 'height']
--- a/recipes/icons/adventure_zone_pl.png
+++ b/recipes/icons/adventure_zone_pl.png
--- a/recipes/icons/film_web.png
+++ b/recipes/icons/film_web.png
--- a/recipes/icons/frazpc.png
+++ b/recipes/icons/frazpc.png