From 189159b67f341377fc648806e435b4cdcb9371e8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Mar 2013 09:09:16 +0530 Subject: [PATCH] Update Business Week Magazine --- recipes/bwmagazine2.recipe | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/recipes/bwmagazine2.recipe b/recipes/bwmagazine2.recipe index cba255afa8..608c046d07 100644 --- a/recipes/bwmagazine2.recipe +++ b/recipes/bwmagazine2.recipe @@ -11,8 +11,8 @@ class BusinessWeekMagazine(BasicNewsRecipe): category = 'news' encoding = 'UTF-8' keep_only_tags = [ - dict(name='div', attrs={'id':'article_body_container'}), - ] + dict(name='div', attrs={'id':'article_body_container'}), + ] remove_tags = [dict(name='ui'),dict(name='li'),dict(name='div', attrs={'id':['share-email']})] no_javascript = True no_stylesheets = True @@ -25,6 +25,7 @@ class BusinessWeekMagazine(BasicNewsRecipe): #Find date mag=soup.find('h2',text='Magazine') + self.log(mag) dates=self.tag_to_string(mag.findNext('h3')) self.timefmt = u' [%s]'%dates @@ -32,7 +33,7 @@ class BusinessWeekMagazine(BasicNewsRecipe): div0 = soup.find ('div', attrs={'class':'column left'}) section_title = '' feeds = OrderedDict() - for div in div0.findAll('h4'): + for div in div0.findAll(['h4','h5']): articles = [] section_title = self.tag_to_string(div.findPrevious('h3')).strip() title=self.tag_to_string(div.a).strip() @@ -48,7 +49,7 @@ class BusinessWeekMagazine(BasicNewsRecipe): feeds[section_title] += articles div1 = soup.find ('div', attrs={'class':'column center'}) section_title = '' - for div in div1.findAll('h5'): + for div in div1.findAll(['h4','h5']): articles = [] desc=self.tag_to_string(div.findNext('p')).strip() section_title = self.tag_to_string(div.findPrevious('h3')).strip()