From cbef5c2c9a8070ba0f290a35050683f8d1a13343 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sat, 13 Oct 2018 23:11:36 +0200 Subject: [PATCH] fix purepc.pl recipe --- recipes/pure_pc.recipe | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/recipes/pure_pc.recipe b/recipes/pure_pc.recipe index 952285c017..ae599c4759 100644 --- a/recipes/pure_pc.recipe +++ b/recipes/pure_pc.recipe @@ -11,35 +11,10 @@ class PurePC(BasicNewsRecipe): description = u'Artykuły, aktualności, sprzęt, forum, chłodzenie, modding, urządzenia mobilne - wszystko w jednym miejscu.' category = 'IT' language = 'pl' - masthead_url = 'http://www.purepc.pl/themes/new/images/purepc.jpg' cover_url = 'http://www.purepc.pl/themes/new/images/purepc.jpg' extra_css = '.wykres_logo {float: left; margin-right: 5px;}' no_stylesheets = True - keep_only_tags = [dict(id='content')] - remove_tags_after = dict(attrs={'class': 'fivestar-widget'}) - remove_tags = [dict(id='navigator'), dict( - attrs={'class': ['box-tools', 'fivestar-widget', 'PageMenuList']})] + + keep_only_tags = [dict(name='div', attrs={'class':'node page0'})] + remove_tags = [dict(name='div', attrs={'class':'article-options'})] feeds = [(u'Wiadomo\u015bci', u'http://www.purepc.pl/node/feed')] - - def append_page(self, soup, appendtag): - lasturl = appendtag.find(attrs={'class': 'pager-last'}) - if lasturl: - regex = re.search('(.+?2C)(\d+)', lasturl.a['href']) - baseurl = regex.group(1).replace('?page=0%2C', '?page=1%2C') - baseurl = 'http://www.purepc.pl' + baseurl - nr = int(regex.group(2)) - for page_nr in range(1, nr + 1): - soup2 = self.index_to_soup(baseurl + str(page_nr)) - pagetext = soup2.find(attrs={'class': 'article'}) - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - for r in appendtag.findAll(attrs={'class': ['PageMenuList', 'pager', 'fivestar-widget']}): - r.extract() - comments = appendtag.findAll( - text=lambda text: isinstance(text, Comment)) - for comment in comments: - comment.extract() - - def preprocess_html(self, soup): - self.append_page(soup, soup.body) - return soup