From 1a1b75411c93845ff1296bc19ea1d7db6fe26524 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 28 Jun 2011 13:00:53 -0600 Subject: [PATCH] Fix #802100 (Error upon pulling HBR feed (Change in HBR format)) --- recipes/hbr.recipe | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/recipes/hbr.recipe b/recipes/hbr.recipe index cd7dcd2061..1152a48784 100644 --- a/recipes/hbr.recipe +++ b/recipes/hbr.recipe @@ -1,5 +1,6 @@ from calibre.web.feeds.news import BasicNewsRecipe import re +from datetime import date, timedelta class HBR(BasicNewsRecipe): @@ -12,13 +13,14 @@ class HBR(BasicNewsRecipe): no_stylesheets = True LOGIN_URL = 'http://hbr.org/login?request_url=/' - INDEX = 'http://hbr.org/current' + INDEX = 'http://hbr.org/archive-toc/BR' keep_only_tags = [dict(name='div', id='pageContainer')] remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline', 'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn', 'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR', 'mailingListTout', 'partnerCenter', 'pageFooter', + 'superNavHeadContainer', 'hbrDisqus', 'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']), dict(name='iframe')] extra_css = ''' @@ -55,9 +57,14 @@ class HBR(BasicNewsRecipe): def hbr_get_toc(self): - soup = self.index_to_soup(self.INDEX) - url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href') - return self.index_to_soup('http://hbr.org'+url) + today = date.today() + future = today + timedelta(days=30) + for x in [x.strftime('%y%m') for x in (future, today)]: + url = self.INDEX + x + soup = self.index_to_soup(url) + if not soup.find(text='Issue Not Found'): + return soup + raise Exception('Could not find current issue') def hbr_parse_section(self, container, feeds): current_section = None