quick fix for fronda - just populate article list and get the first page of article content

This commit is contained in:
Tomasz Długosz 2014-04-28 00:03:03 +02:00
parent 3ab8d26ac2
commit 73ed9f1268

View File

@ -62,34 +62,25 @@ class Fronda(BasicNewsRecipe):
except: except:
continue continue
articles[genName] = [] articles[genName] = []
for item in soup.findAll('li'): for item in soup.findAll('article',attrs={'class':'article article-wide'}):
article_h = item.find('h2') article_a = item.find('a')
if not article_h:
continue
article_date = self.tag_to_string(item.find('b'))
if self.date_cut(article_date):
continue
article_a = article_h.find('a')
article_url = 'http://www.fronda.pl' + article_a['href'] article_url = 'http://www.fronda.pl' + article_a['href']
article_title = self.tag_to_string(article_a) article_title = self.tag_to_string(article_a)
articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date }) articles[genName].append( { 'title' : article_title, 'url' : article_url })
if articles[genName]: if articles[genName]:
feeds.append((genName, articles[genName])) feeds.append((genName, articles[genName]))
return feeds return feeds
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class':'yui-g'}) dict(name='div', attrs={'class':'content content-70 phone-100'})
] ]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':['related-articles','button right','pagination','related-articles content']}), dict(name='div', attrs={'class':['clearfix','last-articles clearfix','comments clearfix','related-articles','social-buttons clearfix']}),
dict(name='h3', attrs={'class':'block-header article comments'}), dict(name='span', attrs={'class':'small-info'}),
dict(name='ul', attrs={'class':['comment-list','category','tag-list']}), dict(name='ul', attrs={'class':'nav nav-tags clearfix'}),
dict(name='p', attrs={'id':'comments-disclaimer'}), dict(name='h3', attrs={'class':'section-header'}),
dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}), dict(name='article', attrs={'class':['slided-article hidden-phone', 'article article-wide hidden-phone']})
dict(name='div', attrs={'style':'text-align: left; margin-top: 15px; margin-bottom: 30px;'}),
dict(name='div', attrs={'id':'comment-form'}),
dict(name='span', attrs={'class':'separator'})
] ]
preprocess_regexps = [ preprocess_regexps = [