From 1c315cb2638bebc74af70431c7cf3537b2421be6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 30 Mar 2013 17:07:01 +0530 Subject: [PATCH] Fix #1162167 (Updated recipe for Harpers printed edition) --- recipes/harpers_full.recipe | 44 +++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/recipes/harpers_full.recipe b/recipes/harpers_full.recipe index 862dba0bc8..24778925d3 100644 --- a/recipes/harpers_full.recipe +++ b/recipes/harpers_full.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2008-2012, Darko Miletic ' +__copyright__ = '2008-2013, Darko Miletic ' ''' harpers.org - paid subscription/ printed issue articles This recipe only get's article's published in text format @@ -14,7 +14,7 @@ from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Harpers_full(BasicNewsRecipe): - title = "Harper's Magazine - Printed Edition" + title = "Harper's Magazine - articles from printed edition" __author__ = 'Darko Miletic' description = "Harper's Magazine, the oldest general-interest monthly in America, explores the issues that drive our national conversation, through long-form narrative journalism and essays, and such celebrated features as the iconic Harper's Index." publisher = "Harpers's" @@ -29,7 +29,6 @@ class Harpers_full(BasicNewsRecipe): needs_subscription = 'optional' masthead_url = 'http://harpers.org/wp-content/themes/harpers/images/pheader.gif' publication_type = 'magazine' - INDEX = '' LOGIN = 'http://harpers.org/wp-content/themes/harpers/ajax_login.php' extra_css = """ body{font-family: adobe-caslon-pro,serif} @@ -66,37 +65,40 @@ class Harpers_full(BasicNewsRecipe): def parse_index(self): #find current issue - soup = self.index_to_soup('http://harpers.org/') currentIssue=soup.find('div',attrs={'class':'mainNavi'}).find('li',attrs={'class':'curentIssue'}) currentIssue_url=self.tag_to_string(currentIssue.a['href']) + self.log(currentIssue_url) #go to the current issue soup1 = self.index_to_soup(currentIssue_url) - date = re.split('\s\|\s',self.tag_to_string(soup1.head.title.string))[0] + currentIssue_title = self.tag_to_string(soup1.head.title.string) + date = re.split('\s\|\s',currentIssue_title)[0] self.timefmt = u' [%s]'%date #get cover - self.cover_url = soup1.find('div', attrs = {'class':'picture_hp'}).find('img', src=True)['src'] - + self.cover_url = soup1.find('div', attrs = {'class':'picture_hp'}).find('img', src=True)['src'] + self.log(self.cover_url) + articles = [] count = 0 for item in soup1.findAll('div', attrs={'class':'articleData'}): text_links = item.findAll('h2') - for text_link in text_links: - if count == 0: - count = 1 - else: - url = text_link.a['href'] - title = text_link.a.contents[0] - date = strftime(' %B %Y') - articles.append({ - 'title' :title - ,'date' :date - ,'url' :url - ,'description':'' - }) - return [(soup1.head.title.string, articles)] + if text_links: + for text_link in text_links: + if count == 0: + count = 1 + else: + url = text_link.a['href'] + title = self.tag_to_string(text_link.a) + date = strftime(' %B %Y') + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':'' + }) + return [(currentIssue_title, articles)] def print_version(self, url): return url + '?single=1'