mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-25 07:48:55 -04:00 
			
		
		
		
	Fix #1162167 (Updated recipe for Harpers printed edition)
This commit is contained in:
		
							parent
							
								
									7a90d00dc8
								
							
						
					
					
						commit
						1c315cb263
					
				| @ -1,5 +1,5 @@ | |||||||
| __license__   = 'GPL v3' | __license__   = 'GPL v3' | ||||||
| __copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>' | __copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>' | ||||||
| ''' | ''' | ||||||
| harpers.org - paid subscription/ printed issue articles | harpers.org - paid subscription/ printed issue articles | ||||||
| This recipe only get's article's published in text format | This recipe only get's article's published in text format | ||||||
| @ -14,7 +14,7 @@ from calibre import strftime | |||||||
| from calibre.web.feeds.news import BasicNewsRecipe | from calibre.web.feeds.news import BasicNewsRecipe | ||||||
| 
 | 
 | ||||||
| class Harpers_full(BasicNewsRecipe): | class Harpers_full(BasicNewsRecipe): | ||||||
|     title                 = "Harper's Magazine - Printed Edition" |     title                 = "Harper's Magazine - articles from printed edition" | ||||||
|     __author__            = 'Darko Miletic' |     __author__            = 'Darko Miletic' | ||||||
|     description           = "Harper's Magazine, the oldest general-interest monthly in America, explores the issues that drive our national conversation, through long-form narrative journalism and essays, and such celebrated features as the iconic Harper's Index." |     description           = "Harper's Magazine, the oldest general-interest monthly in America, explores the issues that drive our national conversation, through long-form narrative journalism and essays, and such celebrated features as the iconic Harper's Index." | ||||||
|     publisher             = "Harpers's" |     publisher             = "Harpers's" | ||||||
| @ -29,7 +29,6 @@ class Harpers_full(BasicNewsRecipe): | |||||||
|     needs_subscription    = 'optional' |     needs_subscription    = 'optional' | ||||||
|     masthead_url          = 'http://harpers.org/wp-content/themes/harpers/images/pheader.gif' |     masthead_url          = 'http://harpers.org/wp-content/themes/harpers/images/pheader.gif' | ||||||
|     publication_type      = 'magazine' |     publication_type      = 'magazine' | ||||||
|     INDEX                 = '' |  | ||||||
|     LOGIN                 = 'http://harpers.org/wp-content/themes/harpers/ajax_login.php' |     LOGIN                 = 'http://harpers.org/wp-content/themes/harpers/ajax_login.php' | ||||||
|     extra_css             = """ |     extra_css             = """ | ||||||
|                                 body{font-family: adobe-caslon-pro,serif} |                                 body{font-family: adobe-caslon-pro,serif} | ||||||
| @ -66,29 +65,32 @@ class Harpers_full(BasicNewsRecipe): | |||||||
| 
 | 
 | ||||||
|     def parse_index(self): |     def parse_index(self): | ||||||
|         #find current issue |         #find current issue | ||||||
| 
 |  | ||||||
|         soup = self.index_to_soup('http://harpers.org/') |         soup = self.index_to_soup('http://harpers.org/') | ||||||
|         currentIssue=soup.find('div',attrs={'class':'mainNavi'}).find('li',attrs={'class':'curentIssue'}) |         currentIssue=soup.find('div',attrs={'class':'mainNavi'}).find('li',attrs={'class':'curentIssue'}) | ||||||
|         currentIssue_url=self.tag_to_string(currentIssue.a['href']) |         currentIssue_url=self.tag_to_string(currentIssue.a['href']) | ||||||
|  |         self.log(currentIssue_url) | ||||||
| 
 | 
 | ||||||
|         #go to the current issue |         #go to the current issue | ||||||
|         soup1 = self.index_to_soup(currentIssue_url) |         soup1 = self.index_to_soup(currentIssue_url) | ||||||
|         date = re.split('\s\|\s',self.tag_to_string(soup1.head.title.string))[0] |         currentIssue_title = self.tag_to_string(soup1.head.title.string) | ||||||
|  |         date = re.split('\s\|\s',currentIssue_title)[0] | ||||||
|         self.timefmt =  u' [%s]'%date |         self.timefmt =  u' [%s]'%date | ||||||
| 
 | 
 | ||||||
|         #get cover |         #get cover | ||||||
|         self.cover_url = soup1.find('div', attrs = {'class':'picture_hp'}).find('img', src=True)['src']          |         self.cover_url = soup1.find('div', attrs = {'class':'picture_hp'}).find('img', src=True)['src']          | ||||||
|  |         self.log(self.cover_url) | ||||||
|          |          | ||||||
|         articles = [] |         articles = [] | ||||||
|         count = 0 |         count = 0 | ||||||
|         for item in soup1.findAll('div', attrs={'class':'articleData'}): |         for item in soup1.findAll('div', attrs={'class':'articleData'}): | ||||||
|             text_links = item.findAll('h2') |             text_links = item.findAll('h2') | ||||||
|  |             if text_links: | ||||||
|                 for text_link in text_links: |                 for text_link in text_links: | ||||||
|                     if count == 0: |                     if count == 0: | ||||||
|                        count = 1 |                        count = 1 | ||||||
|                     else: |                     else: | ||||||
|                        url   = text_link.a['href'] |                        url   = text_link.a['href'] | ||||||
|                    title = text_link.a.contents[0] |                        title = self.tag_to_string(text_link.a) | ||||||
|                        date  = strftime(' %B %Y') |                        date  = strftime(' %B %Y') | ||||||
|                        articles.append({ |                        articles.append({ | ||||||
|                                           'title'      :title |                                           'title'      :title | ||||||
| @ -96,7 +98,7 @@ class Harpers_full(BasicNewsRecipe): | |||||||
|                                          ,'url'        :url |                                          ,'url'        :url | ||||||
|                                          ,'description':'' |                                          ,'description':'' | ||||||
|                                         }) |                                         }) | ||||||
|         return [(soup1.head.title.string, articles)] |         return [(currentIssue_title, articles)] | ||||||
| 
 | 
 | ||||||
|     def print_version(self, url): |     def print_version(self, url): | ||||||
|         return url + '?single=1' |         return url + '?single=1' | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user