Update Harvard Business Review

This commit is contained in:
Kovid Goyal 2013-09-12 15:28:40 +05:30
parent 3bbfb055e7
commit 54501ee576

View File

@ -39,10 +39,10 @@ class HBR(BasicNewsRecipe):
br.visit('https://hbr.org/login?request_url=/', timeout=20) br.visit('https://hbr.org/login?request_url=/', timeout=20)
except Timeout: except Timeout:
pass pass
br.click('#accordion div[tabindex="0"]', wait_for_load=False) br.click('#form-wrapper h3[tabindex="0"]', wait_for_load=False)
f = br.select_form('#signin-form') f = br.select_form('#login-form')
f['signin-form:username'] = username f['username'] = username
f['signin-form:password'] = password f['password'] = password
br.submit(wait_for_load=False) br.submit(wait_for_load=False)
br.run_for_a_time(30) br.run_for_a_time(30)
@ -56,7 +56,8 @@ class HBR(BasicNewsRecipe):
articles = [] articles = []
for x in soup.find(id='issueFeaturesContent').findAll(['li', 'h4']): for x in soup.find(id='issueFeaturesContent').findAll(['li', 'h4']):
if x.name == 'h4': if x.name == 'h4':
if x.get('class', None) == 'basic':continue if x.get('class', None) == 'basic':
continue
if current_section is not None and articles: if current_section is not None and articles:
feeds.append((current_section, articles)) feeds.append((current_section, articles))
current_section = self.tag_to_string(x).capitalize() current_section = self.tag_to_string(x).capitalize()
@ -64,7 +65,8 @@ class HBR(BasicNewsRecipe):
self.log('\tFound section:', current_section) self.log('\tFound section:', current_section)
else: else:
a = x.find('a', href=True) a = x.find('a', href=True)
if a is None: continue if a is None:
continue
title = self.tag_to_string(a) title = self.tag_to_string(a)
url = a['href'] url = a['href']
if '/ar/' not in url: if '/ar/' not in url:
@ -90,11 +92,11 @@ class HBR(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
soup0 = self.index_to_soup('http://hbr.org/magazine') soup0 = self.index_to_soup('http://hbr.org/magazine')
datencover = soup0.find('ul', attrs={'id':'magazineArchiveCarousel'}).findAll('li')[-1] datencover = soup0.find('ul', attrs={'id':'magazineArchiveCarousel'}).findAll('li')[-1]
#find date & cover # find date & cover
self.cover_url=datencover.img['src'] self.cover_url=datencover.img['src']
dates=self.tag_to_string(datencover.img['alt']) dates=self.tag_to_string(datencover.img['alt'])
self.timefmt = u' [%s]'%dates self.timefmt = u' [%s]'%dates
soup = self.index_to_soup(self.INDEX + soup0.find('div', attrs = {'class':'magazine_page'}).a['href']) soup = self.index_to_soup(self.INDEX + soup0.find('div', attrs={'class':'magazine_page'}).a['href'])
feeds = self.hbr_parse_toc(soup) feeds = self.hbr_parse_toc(soup)
return feeds return feeds