mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #802100 (Error upon pulling HBR feed (Change in HBR format))
This commit is contained in:
parent
d3a93c500b
commit
1a1b75411c
@ -1,5 +1,6 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
import re
|
||||||
|
from datetime import date, timedelta
|
||||||
|
|
||||||
class HBR(BasicNewsRecipe):
|
class HBR(BasicNewsRecipe):
|
||||||
|
|
||||||
@ -12,13 +13,14 @@ class HBR(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
LOGIN_URL = 'http://hbr.org/login?request_url=/'
|
LOGIN_URL = 'http://hbr.org/login?request_url=/'
|
||||||
INDEX = 'http://hbr.org/current'
|
INDEX = 'http://hbr.org/archive-toc/BR'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', id='pageContainer')]
|
keep_only_tags = [dict(name='div', id='pageContainer')]
|
||||||
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
||||||
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
||||||
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
||||||
'mailingListTout', 'partnerCenter', 'pageFooter',
|
'mailingListTout', 'partnerCenter', 'pageFooter',
|
||||||
|
'superNavHeadContainer', 'hbrDisqus',
|
||||||
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
|
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
|
||||||
dict(name='iframe')]
|
dict(name='iframe')]
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
@ -55,9 +57,14 @@ class HBR(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def hbr_get_toc(self):
|
def hbr_get_toc(self):
|
||||||
soup = self.index_to_soup(self.INDEX)
|
today = date.today()
|
||||||
url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href')
|
future = today + timedelta(days=30)
|
||||||
return self.index_to_soup('http://hbr.org'+url)
|
for x in [x.strftime('%y%m') for x in (future, today)]:
|
||||||
|
url = self.INDEX + x
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
if not soup.find(text='Issue Not Found'):
|
||||||
|
return soup
|
||||||
|
raise Exception('Could not find current issue')
|
||||||
|
|
||||||
def hbr_parse_section(self, container, feeds):
|
def hbr_parse_section(self, container, feeds):
|
||||||
current_section = None
|
current_section = None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user