From 1b842be5c64892ead9e13ab592c33f9c1ad94d09 Mon Sep 17 00:00:00 2001 From: Jony <23194385+jony0008@users.noreply.github.com> Date: Sun, 15 Mar 2020 14:20:06 +0100 Subject: [PATCH] Delete kurier_galicyjski.recipe --- recipes/kurier_galicyjski.recipe | 74 -------------------------------- 1 file changed, 74 deletions(-) delete mode 100644 recipes/kurier_galicyjski.recipe diff --git a/recipes/kurier_galicyjski.recipe b/recipes/kurier_galicyjski.recipe deleted file mode 100644 index e51bae27ec..0000000000 --- a/recipes/kurier_galicyjski.recipe +++ /dev/null @@ -1,74 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs, Comment - - -class KurierGalicyjski(BasicNewsRecipe): - title = u'Kurier Galicyjski' - __author__ = 'fenuks' - description = u'Kurier Galicyjski - największa gazeta dla Polaków na Ukrainie. Bieżące wydarzenia z życia polskiej mniejszości, historia, kultura, polityka, reportaże.' # noqa - category = 'news' - language = 'pl' - cover_url = 'http://www.duszki.pl/Kurier_galicyjski_bis2_small.gif' - oldest_article = 7 - max_articles_per_feed = 100 - remove_empty_feeds = True - no_stylesheets = True - keep_only_tags = [dict(attrs={'class': 'item-page'})] - remove_tags = [dict(attrs={'class': 'pagenav'}), dict(attrs={ - 'style': 'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'})] # noqa - feeds = [ - (u'Wydarzenia', u'http://kuriergalicyjski.com/index.php/wydarzenia?format=feed&type=atom'), - (u'Publicystyka', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'), - (u'Reporta\u017ce', u'http://kuriergalicyjski.com/index.php/report?format=feed&type=atom'), - (u'Rozmowy Kuriera', u'http://kuriergalicyjski.com/index.php/kuriera?format=feed&type=atom'), - (u'Przegl\u0105d prasy', u'http://kuriergalicyjski.com/index.php/2012-01-05-14-08-55?format=feed&type=atom'), - (u'Kultura', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-26-39?format=feed&type=atom'), - (u'Zabytki', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-27-32?format=feed&type=atom'), - (u'Polska-Ukraina', u'http://kuriergalicyjski.com/index.php/pol-ua?format=feed&type=atom'), - (u'Polacy i Ukrai\u0144cy', u'http://kuriergalicyjski.com/index.php/polacy-i-ukr?format=feed&type=atom'), - (u'Niezwyk\u0142e historie', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'), - (u'Polemiki', u'http://kuriergalicyjski.com/index.php/polemiki?format=feed&type=atom')] - - def append_page(self, soup, appendtag): - pager = soup.find(id='article-index') - if pager: - pager = pager.findAll('a')[1:] - if pager: - for a in pager: - nexturl = 'http://www.kuriergalicyjski.com' + a['href'] - soup2 = self.index_to_soup(nexturl) - pagetext = soup2.find(attrs={'class': 'item-page'}) - if pagetext.h2: - pagetext.h2.extract() - r = pagetext.find(attrs={'class': 'article-info'}) - if r: - r.extract() - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pos = len(appendtag.contents) - for r in appendtag.findAll(id='article-index'): - r.extract() - for r in appendtag.findAll(attrs={'class': 'pagenavcounter'}): - r.extract() - for r in appendtag.findAll(attrs={'class': 'pagination'}): - r.extract() - for r in appendtag.findAll(attrs={'class': 'pagenav'}): - r.extract() - for r in appendtag.findAll(attrs={'style': 'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'}): # noqa - r.extract() - comments = appendtag.findAll( - text=lambda text: isinstance(text, Comment)) - for comment in comments: - comment.extract() - - def preprocess_html(self, soup): - self.append_page(soup, soup.body) - for r in soup.findAll(style=True): - del r['style'] - for img in soup.findAll(attrs={'class': 'easy_img_caption smartresize'}): - img.insert(len(img.contents) - 1, bs('
')) - img.insert(len(img.contents), bs('

')) - for a in soup.findAll('a', href=True): - if a['href'].startswith('/'): - a['href'] = 'http://kuriergalicyjski.com' + a['href'] - return soup