diff --git a/recipes/kopalniawiedzy.recipe b/recipes/kopalniawiedzy.recipe index 79aa913498..628dc1b2d2 100644 --- a/recipes/kopalniawiedzy.recipe +++ b/recipes/kopalniawiedzy.recipe @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2011, Attis ' __version__ = 'v. 0.1' @@ -16,21 +15,21 @@ class KopalniaWiedzy(BasicNewsRecipe): oldest_article = 7 max_articles_per_feed = 100 INDEX = u'http://kopalniawiedzy.pl/' - remove_javascript = True + remove_javascript = True no_stylesheets = True - - remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'} }] + + remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'} }, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}] remove_tags_after = dict(attrs={'class':'ad-square'}) keep_only_tags = [dict(name="div", attrs={'id':'articleContent'})] extra_css = '.topimage {margin-top: 30px}' - + preprocess_regexps = [ (re.compile(u''), lambda match: '' ), (re.compile(u'

'), lambda match: '') ] - + feeds = [ (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'), (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'), @@ -39,10 +38,10 @@ class KopalniaWiedzy(BasicNewsRecipe): (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'), (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss') ] - + def is_link_wanted(self, url, tag): return tag['class'] == 'next' - + def remove_beyond(self, tag, next): while tag is not None and getattr(tag, 'name', None) != 'body': after = getattr(tag, next) @@ -51,30 +50,30 @@ class KopalniaWiedzy(BasicNewsRecipe): after.extract() after = ns tag = tag.parent - + def append_page(self, soup, appendtag, position): pager = soup.find('a',attrs={'class':'next'}) if pager: nexturl = self.INDEX + pager['href'] soup2 = self.index_to_soup(nexturl) texttag = soup2.find('div', attrs={'id':'articleContent'}) - + tag = texttag.find(attrs={'class':'pages'}) self.remove_beyond(tag, 'nextSibling') - + newpos = len(texttag.contents) self.append_page(soup2,texttag,newpos) appendtag.insert(position,texttag) - def preprocess_html(self, soup): + def preprocess_html(self, soup): self.append_page(soup, soup.body, 3) - + for item in soup.findAll('div',attrs={'class':'pages'}): item.extract() - + for item in soup.findAll('p', attrs={'class':'wykop'}): item.extract() - + return soup