Update Tablety and Tanuki recipes

Merge branch 'master' of https://github.com/t3d/calibre
This commit is contained in:
Kovid Goyal 2018-10-07 08:50:29 +05:30
commit a78682093a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
9 changed files with 3 additions and 167 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 266 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 770 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 171 B

File diff suppressed because one or more lines are too long

View File

@ -16,7 +16,7 @@ class Tablety_pl(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
preprocess_regexps = [(re.compile(u'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), preprocess_regexps = [(re.compile(u'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''),
(re.compile(u'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')] (re.compile(u'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
keep_only_tags = [dict(id='news_block')] keep_only_tags = [dict(attrs={'class': ['featured-image', 'article-content clearfix']})]
remove_tags = [dict(attrs={'class': ['comments_icon', 'wp-polls', 'entry-comments', remove_tags = [dict(attrs={'class': ['comments_icon', 'wp-polls', 'entry-comments',
'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer', 'social-custom']})] 'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer', 'social-custom']})]
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')] feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]

View File

@ -11,6 +11,7 @@ class tanuki(BasicNewsRecipe):
language = 'pl' language = 'pl'
max_articles_per_feed = 100 max_articles_per_feed = 100
encoding = 'utf-8' encoding = 'utf-8'
autocleanup = True
extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}' extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
preprocess_regexps = [(re.compile(u'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile( preprocess_regexps = [(re.compile(u'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(
unicode(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')] unicode(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')]
@ -42,15 +43,3 @@ class tanuki(BasicNewsRecipe):
appendtag.insert(pos, pagetext) appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'class': 'nextarrow'}): for r in appendtag.findAll(attrs={'class': 'nextarrow'}):
r.extract() r.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa
if 'tanuki-anime' in soup.title.string.lower():
a['href'] = 'http://anime.tanuki.pl' + a['href']
elif 'tanuki-manga' in soup.title.string.lower():
a['href'] = 'http://manga.tanuki.pl' + a['href']
elif 'tanuki-czytelnia' in soup.title.string.lower():
a['href'] = 'http://czytelnia.tanuki.pl' + a['href']
return soup

View File

@ -1,36 +0,0 @@
__license__ = 'GPL v3'
import re
from calibre.web.feeds.news import BasicNewsRecipe
class TawernaRPG(BasicNewsRecipe):
title = u'Tawerna RPG'
__author__ = 'fenuks'
description = u'Tawerna RPG to ogólnopolski serwis zajmujący się fantastyką i grami fantastycznymi. Znajdziesz u nas zarówno gry fabularne, karciane, planszowe i komputerowe, a także recenzje, opowiadania i sporą dawkę humoru.' # noqa
category = 'fantasy, rpg, board games'
language = 'pl'
extra_css = '.slajd {list-style-type: none; padding-left: 0px; margin-left: 0px;} .lewanc {float: left; margin-right: 5px;} .srodek {display: block; margin-left: auto; margin-right: auto;}' # noqa
cover_url = 'http://www.tawerna.rpg.pl/img/logo.png'
preprocess_regexps = [(re.compile(u'<h2>Dodaj komentarz</h2>.*</body>',
re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
use_embedded_content = False
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_empty_feeds = True
remove_javascript = True
remove_attributes = ['style', 'font']
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = [dict(id='site')]
remove_tags = [dict(id=['player', 'komentarz'])]
remove_tags_after = dict(id='komentarz')
feeds = [(u'Artykuły', 'http://www.tawerna.rpg.pl/css/rss.rss')]
def preprocess_html(self, soup):
for r in soup.findAll(attrs={'class': 'powi'}):
r.parent.extract()
for c in soup.findAll(name=['li', 'ol', 'ul']):
c.name = 'div'
return soup

View File

@ -1,46 +0,0 @@
#!/usr/bin/env python2
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class telepolis(BasicNewsRecipe):
title = u'Telepolis.pl'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>, Tomasz Długosz <tomek3d@gmail.com>'
language = 'pl'
description = u'Twój telekomunikacyjny serwis informacyjny.'
masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
no_stylesheets = True
use_embedded_content = False
feeds = [
(u'Wiadomości', u'http://www.telepolis.pl/rss,2,5,0.html')
]
keep_only_tags = [
dict(name='div', attrs={'class': 'flol w510'}),
dict(name='div', attrs={'class': 'main_tresc'}),
dict(name='div', attrs={'class': 'main_tresc_news'})
]
def append_page(self, soup, appendtag):
chpage = appendtag.find(attrs={'class': 'str'})
if chpage:
for page in chpage.findAll('a'):
if page.renderContents() == 'Następna &rsaquo;':
break
soup2 = self.index_to_soup(page['href'])
pagetext = soup2.find(attrs={'class': 'main_tresc'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'class': 'str'}):
r.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
for image in soup.findAll('img'):
if 'm.jpg' in image['src']:
image['src'] = image['src'].replace('m.jpg', '.jpg')
return soup

View File

@ -8,7 +8,7 @@ class Trojmiasto(BasicNewsRecipe):
description = u'Wiadomości, imprezy, wydarzenia, spektakle.Gdańsk, Gdynia, Sopot - NOCLEGI, Katalog firm, repertuar kin, wydarzenia, przewodnik, mapa, kwatery, hotele. Portal regionalny trojmiasto.pl' # noqa description = u'Wiadomości, imprezy, wydarzenia, spektakle.Gdańsk, Gdynia, Sopot - NOCLEGI, Katalog firm, repertuar kin, wydarzenia, przewodnik, mapa, kwatery, hotele. Portal regionalny trojmiasto.pl' # noqa
category = '' category = ''
language = 'pl' language = 'pl'
encoding = 'iso-8859-2' encoding = 'utf-8'
extra_css = 'ul {list-style: none; padding:0; margin:0;}' extra_css = 'ul {list-style: none; padding:0; margin:0;}'
cover_url = 'http://www.trojmiasto.pl/_img/toplong2/logo_trojmiasto.gif' cover_url = 'http://www.trojmiasto.pl/_img/toplong2/logo_trojmiasto.gif'
use_embedded_content = False use_embedded_content = False