Fix #802100 (Error upon pulling HBR feed (Change in HBR format))

This commit is contained in:
Kovid Goyal 2011-06-28 13:00:53 -06:00
parent d3a93c500b
commit 1a1b75411c

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re import re
from datetime import date, timedelta
class HBR(BasicNewsRecipe): class HBR(BasicNewsRecipe):
@ -12,13 +13,14 @@ class HBR(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
LOGIN_URL = 'http://hbr.org/login?request_url=/' LOGIN_URL = 'http://hbr.org/login?request_url=/'
INDEX = 'http://hbr.org/current' INDEX = 'http://hbr.org/archive-toc/BR'
keep_only_tags = [dict(name='div', id='pageContainer')] keep_only_tags = [dict(name='div', id='pageContainer')]
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline', remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn', 'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR', 'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
'mailingListTout', 'partnerCenter', 'pageFooter', 'mailingListTout', 'partnerCenter', 'pageFooter',
'superNavHeadContainer', 'hbrDisqus',
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']), 'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
dict(name='iframe')] dict(name='iframe')]
extra_css = ''' extra_css = '''
@ -55,9 +57,14 @@ class HBR(BasicNewsRecipe):
def hbr_get_toc(self): def hbr_get_toc(self):
soup = self.index_to_soup(self.INDEX) today = date.today()
url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href') future = today + timedelta(days=30)
return self.index_to_soup('http://hbr.org'+url) for x in [x.strftime('%y%m') for x in (future, today)]:
url = self.INDEX + x
soup = self.index_to_soup(url)
if not soup.find(text='Issue Not Found'):
return soup
raise Exception('Could not find current issue')
def hbr_parse_section(self, container, feeds): def hbr_parse_section(self, container, feeds):
current_section = None current_section = None