Update Business Week Magazine

This commit is contained in:
Kovid Goyal 2013-03-26 09:09:16 +05:30
parent 4f6709d754
commit 189159b67f

View File

@ -11,8 +11,8 @@ class BusinessWeekMagazine(BasicNewsRecipe):
category = 'news'
encoding = 'UTF-8'
keep_only_tags = [
dict(name='div', attrs={'id':'article_body_container'}),
]
dict(name='div', attrs={'id':'article_body_container'}),
]
remove_tags = [dict(name='ui'),dict(name='li'),dict(name='div', attrs={'id':['share-email']})]
no_javascript = True
no_stylesheets = True
@ -25,6 +25,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
#Find date
mag=soup.find('h2',text='Magazine')
self.log(mag)
dates=self.tag_to_string(mag.findNext('h3'))
self.timefmt = u' [%s]'%dates
@ -32,7 +33,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
div0 = soup.find ('div', attrs={'class':'column left'})
section_title = ''
feeds = OrderedDict()
for div in div0.findAll('h4'):
for div in div0.findAll(['h4','h5']):
articles = []
section_title = self.tag_to_string(div.findPrevious('h3')).strip()
title=self.tag_to_string(div.a).strip()
@ -48,7 +49,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
feeds[section_title] += articles
div1 = soup.find ('div', attrs={'class':'column center'})
section_title = ''
for div in div1.findAll('h5'):
for div in div1.findAll(['h4','h5']):
articles = []
desc=self.tag_to_string(div.findNext('p')).strip()
section_title = self.tag_to_string(div.findPrevious('h3')).strip()