diff --git a/recipes/bwmagazine2.recipe b/recipes/bwmagazine2.recipe index cba255afa8..608c046d07 100644 --- a/recipes/bwmagazine2.recipe +++ b/recipes/bwmagazine2.recipe @@ -11,8 +11,8 @@ class BusinessWeekMagazine(BasicNewsRecipe): category = 'news' encoding = 'UTF-8' keep_only_tags = [ - dict(name='div', attrs={'id':'article_body_container'}), - ] + dict(name='div', attrs={'id':'article_body_container'}), + ] remove_tags = [dict(name='ui'),dict(name='li'),dict(name='div', attrs={'id':['share-email']})] no_javascript = True no_stylesheets = True @@ -25,6 +25,7 @@ class BusinessWeekMagazine(BasicNewsRecipe): #Find date mag=soup.find('h2',text='Magazine') + self.log(mag) dates=self.tag_to_string(mag.findNext('h3')) self.timefmt = u' [%s]'%dates @@ -32,7 +33,7 @@ class BusinessWeekMagazine(BasicNewsRecipe): div0 = soup.find ('div', attrs={'class':'column left'}) section_title = '' feeds = OrderedDict() - for div in div0.findAll('h4'): + for div in div0.findAll(['h4','h5']): articles = [] section_title = self.tag_to_string(div.findPrevious('h3')).strip() title=self.tag_to_string(div.a).strip() @@ -48,7 +49,7 @@ class BusinessWeekMagazine(BasicNewsRecipe): feeds[section_title] += articles div1 = soup.find ('div', attrs={'class':'column center'}) section_title = '' - for div in div1.findAll('h5'): + for div in div1.findAll(['h4','h5']): articles = [] desc=self.tag_to_string(div.findNext('p')).strip() section_title = self.tag_to_string(div.findPrevious('h3')).strip()