diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe index 3f0c71fdcb..40be1482fc 100644 --- a/recipes/adventure_zone_pl.recipe +++ b/recipes/adventure_zone_pl.recipe @@ -1,4 +1,5 @@ from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs class Adventure_zone(BasicNewsRecipe): @@ -19,16 +20,24 @@ class Adventure_zone(BasicNewsRecipe): remove_tags = [dict(attrs={'class': 'footer'})] feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')] + _trigger_words = ('zapowied', 'recenzj', 'solucj', 'poradnik') + + @staticmethod + def _is_linked_text(title): + return 'zapowied' in title or 'recenz' in title or 'solucj' in title or 'poradnik' in title + def skip_ad_pages(self, soup): - skip_tag = soup.body.find(attrs={'class': 'content'}) - skip_tag = skip_tag.findAll(name='a') - title = soup.title.string.lower() - if (('zapowied' in title) or ('recenzj' in title) or ('solucj' in title) or ('poradnik' in title)): + skip_tag = soup.body.find(attrs={'class':'subject'}) + skip_tag = skip_tag.findAll(name='a', href=True) + title = soup.title.renderContents().lower() + if self._is_linked_text(title): for r in skip_tag: - if r.strong and r.strong.string: - word = r.strong.string.lower() - if (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)): - return self.index_to_soup(self.BASEURL + r['href'], raw=True) + word = r.renderContents() + if not word: + continue + word = word.lower() + if self._is_linked_text(word): + return self.index_to_soup(self.BASEURL+r['href'], raw=True) def preprocess_html(self, soup): for link in soup.findAll('a', href=True): diff --git a/recipes/eclicto.recipe b/recipes/eclicto.recipe deleted file mode 100644 index 230b1d77fb..0000000000 --- a/recipes/eclicto.recipe +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -''' -blog.eclicto.pl -''' - -from calibre.web.feeds.news import BasicNewsRecipe -import re - - -class BlogeClictoRecipe(BasicNewsRecipe): - __author__ = 'Mori, Tomasz Długosz' - language = 'pl' - - title = u'Blog eClicto' - publisher = u'Blog eClicto' - description = u'Blog o e-papierze i e-bookach' - - max_articles_per_feed = 100 - cover_url = 'http://blog.eclicto.pl/wordpress/wp-content/themes/blog_eclicto/g/logo.gif' - - no_stylesheets = True - remove_javascript = True - encoding = 'utf-8' - - extra_css = ''' - img{float: left; padding-right: 10px; padding-bottom: 5px;} - ''' - - feeds = [ - (u'Blog eClicto', u'http://blog.eclicto.pl/feed/') - ] - - remove_tags = [ - dict(name='div', attrs={'class': 'social_bookmark'}), - ] - - keep_only_tags = [ - dict(name='div', attrs={'class': 'post'}) - ] - - preprocess_regexps = [ - (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in - [ - (r'\s*', lambda match: ''), - ] - ] diff --git a/recipes/film_org_pl.recipe b/recipes/film_org_pl.recipe index 12883ce292..87928df019 100644 --- a/recipes/film_org_pl.recipe +++ b/recipes/film_org_pl.recipe @@ -1,72 +1,32 @@ # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Comment -import re class FilmOrgPl(BasicNewsRecipe): title = u'Film.org.pl' __author__ = 'fenuks' - description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce." # noqa + description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce." category = 'film' language = 'pl' - extra_css = '.alignright {float:right; margin-left:5px;} .alignleft {float:left; margin-right:5px;} .recenzja-title {font-size: 150%; margin-top: 5px; margin-bottom: 5px;}' # noqa - cover_url = 'http://film.org.pl/wp-content/themes/KMF/images/logo_kmf10.png' + cover_url = 'http://film.org.pl/wp-content/uploads/2015/02/film.org.pl_film.org_.pl_kmfviolet4.png' ignore_duplicate_articles = {'title', 'url'} oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True remove_empty_feeds = True - use_embedded_content = False - remove_attributes = ['style'] - preprocess_regexps = [ - (re.compile(ur'