Update Tablety and Tanuki recipes

Merge branch 'master' of https://github.com/t3d/calibre
2025-07-09 03:04:10 -04:00 · 2018-10-07 08:50:29 +05:30 · 2018-10-07 08:50:29 +05:30 · a78682093a
commit a78682093a
parent de1b0d1cb6 3c1e81f868
9 changed files with 3 additions and 167 deletions
--- a/recipes/icons/stopklatka.png
+++ b/recipes/icons/stopklatka.png
--- a/recipes/icons/tawernarpg_pl.png
+++ b/recipes/icons/tawernarpg_pl.png
--- a/recipes/icons/telepolis_pl.png
+++ b/recipes/icons/telepolis_pl.png
--- a/recipes/stopklatka.recipe
+++ b/recipes/stopklatka.recipe
--- a/recipes/tablety_pl.recipe
+++ b/recipes/tablety_pl.recipe
@ -16,7 +16,7 @@ class Tablety_pl(BasicNewsRecipe):
    max_articles_per_feed = 100
    preprocess_regexps = [(re.compile(u'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''),
                          (re.compile(u'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
-    keep_only_tags = [dict(id='news_block')]
+    keep_only_tags = [dict(attrs={'class': ['featured-image', 'article-content clearfix']})]
    remove_tags = [dict(attrs={'class': ['comments_icon', 'wp-polls', 'entry-comments',
                                         'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer', 'social-custom']})]
    feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
--- a/recipes/tanuki.recipe
+++ b/recipes/tanuki.recipe
@ -11,6 +11,7 @@ class tanuki(BasicNewsRecipe):
    language = 'pl'
    max_articles_per_feed = 100
    encoding = 'utf-8'
    autocleanup = True
    extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
    preprocess_regexps = [(re.compile(u'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(
        unicode(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')]
@ -42,15 +43,3 @@ class tanuki(BasicNewsRecipe):
                appendtag.insert(pos, pagetext)
            for r in appendtag.findAll(attrs={'class': 'nextarrow'}):
                r.extract()
    def preprocess_html(self, soup):
        self.append_page(soup, soup.body)
        for a in soup('a'):
            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:  # noqa
                if 'tanuki-anime' in soup.title.string.lower():
                    a['href'] = 'http://anime.tanuki.pl' + a['href']
                elif 'tanuki-manga' in soup.title.string.lower():
                    a['href'] = 'http://manga.tanuki.pl' + a['href']
                elif 'tanuki-czytelnia' in soup.title.string.lower():
                    a['href'] = 'http://czytelnia.tanuki.pl' + a['href']
        return soup
--- a/recipes/tawernarpg_pl.recipe
+++ b/recipes/tawernarpg_pl.recipe
@ -1,36 +0,0 @@
 __license__ = 'GPL v3'
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class TawernaRPG(BasicNewsRecipe):
    title = u'Tawerna RPG'
    __author__ = 'fenuks'
    description = u'Tawerna RPG to ogólnopolski serwis zajmujący się fantastyką i grami fantastycznymi. Znajdziesz u nas zarówno gry fabularne, karciane, planszowe i komputerowe, a także recenzje, opowiadania i sporą dawkę humoru.'  # noqa
    category = 'fantasy, rpg, board games'
    language = 'pl'
    extra_css = '.slajd {list-style-type: none; padding-left: 0px; margin-left: 0px;} .lewanc {float: left; margin-right: 5px;} .srodek {display: block; margin-left: auto; margin-right: auto;}'  # noqa
    cover_url = 'http://www.tawerna.rpg.pl/img/logo.png'
    preprocess_regexps = [(re.compile(u'<h2>Dodaj komentarz</h2>.*</body>',
                                      re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
    use_embedded_content = False
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_empty_feeds = True
    remove_javascript = True
    remove_attributes = ['style', 'font']
    ignore_duplicate_articles = {'title', 'url'}
    keep_only_tags = [dict(id='site')]
    remove_tags = [dict(id=['player', 'komentarz'])]
    remove_tags_after = dict(id='komentarz')
    feeds = [(u'Artykuły', 'http://www.tawerna.rpg.pl/css/rss.rss')]
    def preprocess_html(self, soup):
        for r in soup.findAll(attrs={'class': 'powi'}):
            r.parent.extract()
        for c in soup.findAll(name=['li', 'ol', 'ul']):
            c.name = 'div'
        return soup
--- a/recipes/telepolis_pl.recipe
+++ b/recipes/telepolis_pl.recipe
@ -1,46 +0,0 @@
 #!/usr/bin/env  python2
 __license__ = 'GPL v3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class telepolis(BasicNewsRecipe):
    title = u'Telepolis.pl'
    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>, Tomasz Długosz <tomek3d@gmail.com>'
    language = 'pl'
    description = u'Twój telekomunikacyjny serwis informacyjny.'
    masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
    no_stylesheets = True
    use_embedded_content = False
    feeds = [
        (u'Wiadomości', u'http://www.telepolis.pl/rss,2,5,0.html')
    ]
    keep_only_tags = [
        dict(name='div', attrs={'class': 'flol w510'}),
        dict(name='div', attrs={'class': 'main_tresc'}),
        dict(name='div', attrs={'class': 'main_tresc_news'})
    ]
    def append_page(self, soup, appendtag):
        chpage = appendtag.find(attrs={'class': 'str'})
        if chpage:
            for page in chpage.findAll('a'):
                if page.renderContents() == 'Następna &rsaquo;':
                    break
                soup2 = self.index_to_soup(page['href'])
                pagetext = soup2.find(attrs={'class': 'main_tresc'})
                pos = len(appendtag.contents)
                appendtag.insert(pos, pagetext)
            for r in appendtag.findAll(attrs={'class': 'str'}):
                r.extract()
    def preprocess_html(self, soup):
        self.append_page(soup, soup.body)
        for image in soup.findAll('img'):
            if 'm.jpg' in image['src']:
                image['src'] = image['src'].replace('m.jpg', '.jpg')
        return soup
--- a/recipes/trojmiasto_pl.recipe
+++ b/recipes/trojmiasto_pl.recipe
@ -8,7 +8,7 @@ class Trojmiasto(BasicNewsRecipe):
    description = u'Wiadomości, imprezy, wydarzenia, spektakle.Gdańsk, Gdynia, Sopot - NOCLEGI, Katalog firm, repertuar kin, wydarzenia, przewodnik, mapa, kwatery, hotele. Portal regionalny trojmiasto.pl'  # noqa
    category = ''
    language = 'pl'
-    encoding = 'iso-8859-2'
+    encoding = 'utf-8'
    extra_css = 'ul {list-style: none; padding:0; margin:0;}'
    cover_url = 'http://www.trojmiasto.pl/_img/toplong2/logo_trojmiasto.gif'
    use_embedded_content = False