Update FilmWed and Fdb.pl

Merge branch 'master' of https://github.com/t3d/calibre
This commit is contained in:
Kovid Goyal 2019-10-15 04:28:26 +05:30
commit 8f8a5f7561
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
7 changed files with 22 additions and 112 deletions

View File

@ -1,45 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Adventure_zone(BasicNewsRecipe):
title = u'Adventure Zone'
__author__ = 'fenuks'
description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.' # noqa
category = 'games'
language = 'pl'
BASEURL = 'http://www.adventure-zone.info/fusion/'
no_stylesheets = True
extra_css = '.image {float: left; margin-right: 5px;}'
oldest_article = 20
max_articles_per_feed = 100
cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
remove_attributes = ['style']
use_embedded_content = False
keep_only_tags = [dict(attrs={'class': 'content'})]
remove_tags = [dict(attrs={'class': 'footer'})]
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')]
_trigger_words = ('zapowied', 'recenzj', 'solucj', 'poradnik')
@staticmethod
def _is_linked_text(title):
return 'zapowied' in title or 'recenz' in title or 'solucj' in title or 'poradnik' in title
def skip_ad_pages(self, soup):
skip_tag = soup.body.find(attrs={'class':'subject'})
skip_tag = skip_tag.findAll(name='a', href=True)
title = soup.title.renderContents().decode('utf-8').lower()
if self._is_linked_text(title):
for r in skip_tag:
word = r.renderContents().decode('utf-8')
if not word:
continue
word = word.lower()
if self._is_linked_text(word):
return self.index_to_soup(self.BASEURL+r['href'], raw=True)
def preprocess_html(self, soup):
for link in soup.findAll('a', href=True):
if not link['href'].startswith('http'):
link['href'] = self.BASEURL + link['href']
return soup

View File

@ -9,7 +9,7 @@ class FDBPl(BasicNewsRecipe):
category = 'film'
language = 'pl'
extra_css = '.options-left > li {display: inline;} em {display: block;}'
cover_url = 'http://fdb.pl/assets/fdb2/logo.png'
cover_url = 'https://i1.fdbimg.pl/hygg2xp1/480x300_magq39.jpg'
use_embedded_content = False
oldest_article = 7
max_articles_per_feed = 100
@ -19,25 +19,25 @@ class FDBPl(BasicNewsRecipe):
remove_attributes = ['style', 'font']
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = [dict(attrs={'class': 'news-item news-first'})]
keep_only_tags = [dict(attrs={'class': ['row justify-content-center', 'figure']})]
remove_tags = [
dict(attrs={'class': ['dig dig-first', 'ads clearfix', 'comments']})]
dict(attrs={'class': ['news-footer infinite-scroll-breakepoit', 'list-inline text-muted m-0']})]
feeds = []
def parse_index(self):
feeds = []
feeds.append((u'Wiadomości', self.get_articles(
'http://fdb.pl/wiadomosci?page={0}', 2)))
'https://fdb.pl/wiadomosci?page={0}', 2)))
return feeds
def get_articles(self, url, pages=1):
articles = []
for nr in range(1, pages + 1):
soup = self.index_to_soup(url.format(nr))
for tag in soup.findAll(attrs={'class': 'news-item clearfix'}):
node = tag.find('h2')
for tag in soup.findAll(attrs={'class': 'col-xs-6 col-sm-4 col-md-4 col-lg-3'}):
node = tag.find('h5')
title = node.a.string
url = 'http://fdb.pl' + node.a['href']
url = node.a['href']
date = ''
articles.append({'title': title,
'url': url,

View File

@ -6,10 +6,10 @@ class FilmWebPl(BasicNewsRecipe):
title = 'FilmWeb'
__author__ = 'fenuks'
description = u'Filmweb.pl - Filmy takie jak Ty Filmweb to największy i najczęściej odwiedzany polski serwis filmowy.'
cover_url = 'http://1.fwcdn.pl/an/867323/63321_1.11.jpg'
cover_url = 'https://1.fwcdn.pl/an/np/49468/2018/15037.2.jpg'
category = 'movies'
language = 'pl'
index = 'http://www.filmweb.pl'
index = 'https://www.filmweb.pl'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets = True
@ -28,25 +28,23 @@ class FilmWebPl(BasicNewsRecipe):
remove_tags = [dict(attrs={'class':['infoParent', 'likeBar',
'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})]
remove_attributes = ['style',]
keep_only_tags = [dict(attrs={'class': ['newsHdr hdrWithAuthor ', 'reviewHdr', 'newsContent newsPage', 'newsContent']})]
# remove_tags_before = dict(attrs={'class': 'hdr hdr-mega'})
# remove_tags_after = dict(attrs={'class': 'newsContent'})
feeds = [(u'Filmy', u'http://www.filmweb.pl/feed/news/category/film'),
(u'Seriale', u'http://www.filmweb.pl/feed/news/category/serial'),
(u'Box office', u'http://www.filmweb.pl/feed/news/category/boxoffice'),
(u'Telewizja', u'http://www.filmweb.pl/feed/news/category/tv'),
(u'Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
(u'Multimedia', u'http://www.filmweb.pl/feed/news/category/multimedia'),
(u'Dystrybucja dvd/blu-ray', u'http://www.filmweb.pl/feed/news/category/dvd'),
(u'Gry wideo', u'http://www.filmweb.pl/feed/news/category/game'),
(u'Różne', u'http://www.filmweb.pl/feed/news/category/other'),
(u'Recenzje redakcji', u'http://www.filmweb.pl/feed/reviews/latest'),
(u'Recenzje użytkowników', u'http://www.filmweb.pl/feed/user-reviews/latest')
keep_only_tags = [dict(attrs={'class': ['newsHdr hdrWithAuthor ', 'reviewHdr', 'newsContent newsPage', 'newsContent', 'hdr hdr-mega']})]
feeds = [(u'Filmy', u'https://www.filmweb.pl/feed/news/category/film'),
(u'Seriale', u'https://www.filmweb.pl/feed/news/category/serial'),
(u'Box office', u'https://www.filmweb.pl/feed/news/category/boxoffice'),
(u'Telewizja', u'https://www.filmweb.pl/feed/news/category/tv'),
(u'Festiwale, nagrody i przeglądy', u'https://www.filmweb.pl/feed/news/category/festival'),
(u'Multimedia', u'https://www.filmweb.pl/feed/news/category/multimedia'),
(u'Dystrybucja dvd/blu-ray', u'https://www.filmweb.pl/feed/news/category/dvd'),
(u'Gry wideo', u'https://www.filmweb.pl/feed/news/category/game'),
(u'Różne', u'https://www.filmweb.pl/feed/news/category/other'),
(u'Recenzje redakcji', u'https://www.filmweb.pl/feed/reviews/latest'),
(u'Recenzje użytkowników', u'https://www.filmweb.pl/feed/user-reviews/latest')
]
def preprocess_html(self, soup):
for a in soup('a', href=True):
if 'http://' not in a['href'] and 'https://' not in a['href']:
if 'https://' not in a['href']:
a['href'] = self.index + a['href']
return soup

View File

@ -1,43 +0,0 @@
#!/usr/bin/env python2
__license__ = 'GPL v3'
__copyright__ = u'2010-2011, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
frazpc.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class FrazPC(BasicNewsRecipe):
title = u'frazpc.pl'
publisher = u'frazpc.pl'
description = u'Tw\xf3j Vortal Technologiczny'
language = 'pl'
__author__ = u'Tomasz D\u0142ugosz'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_empty_feeds = True
cover_url = 'http://www.frazpc.pl/images/logo.png'
feeds = [
(u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'),
(u'Artyku\u0142y', u'http://www.frazpc.pl/feed/artykuly')
]
keep_only_tags = [dict(name='div', attrs={'class': 'article'})]
remove_tags = [
dict(name='div', attrs={'class': 'title-wrapper'}),
dict(name='p', attrs={'class': 'tags'}),
dict(name='p', attrs={'class': 'article-links'}),
dict(name='div', attrs={'class': 'comments_box'})
]
remove_tags_after = dict(name='div', attrs={'class': 'content'})
preprocess_regexps = [(re.compile(
r'\| <a href="#comments">Komentarze \([0-9]*\)</a>'), lambda match: '')]
remove_attributes = ['width', 'height']

Binary file not shown.

Before

Width:  |  Height:  |  Size: 791 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 454 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 271 B