From 73ed9f1268e0c2ff147f85cda5d4bf9d675e2606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Mon, 28 Apr 2014 00:03:03 +0200 Subject: [PATCH] quick fix for fronda - just populate article list and get the first page of article content --- recipes/fronda.recipe | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/recipes/fronda.recipe b/recipes/fronda.recipe index 8372bb4d81..f677e38499 100644 --- a/recipes/fronda.recipe +++ b/recipes/fronda.recipe @@ -62,34 +62,25 @@ class Fronda(BasicNewsRecipe): except: continue articles[genName] = [] - for item in soup.findAll('li'): - article_h = item.find('h2') - if not article_h: - continue - article_date = self.tag_to_string(item.find('b')) - if self.date_cut(article_date): - continue - article_a = article_h.find('a') + for item in soup.findAll('article',attrs={'class':'article article-wide'}): + article_a = item.find('a') article_url = 'http://www.fronda.pl' + article_a['href'] article_title = self.tag_to_string(article_a) - articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date }) + articles[genName].append( { 'title' : article_title, 'url' : article_url }) if articles[genName]: feeds.append((genName, articles[genName])) return feeds keep_only_tags = [ - dict(name='div', attrs={'class':'yui-g'}) + dict(name='div', attrs={'class':'content content-70 phone-100'}) ] remove_tags = [ - dict(name='div', attrs={'class':['related-articles','button right','pagination','related-articles content']}), - dict(name='h3', attrs={'class':'block-header article comments'}), - dict(name='ul', attrs={'class':['comment-list','category','tag-list']}), - dict(name='p', attrs={'id':'comments-disclaimer'}), - dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}), - dict(name='div', attrs={'style':'text-align: left; margin-top: 15px; margin-bottom: 30px;'}), - dict(name='div', attrs={'id':'comment-form'}), - dict(name='span', attrs={'class':'separator'}) + dict(name='div', attrs={'class':['clearfix','last-articles clearfix','comments clearfix','related-articles','social-buttons clearfix']}), + dict(name='span', attrs={'class':'small-info'}), + dict(name='ul', attrs={'class':'nav nav-tags clearfix'}), + dict(name='h3', attrs={'class':'section-header'}), + dict(name='article', attrs={'class':['slided-article hidden-phone', 'article article-wide hidden-phone']}) ] preprocess_regexps = [