mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-25 15:52:25 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			66 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			66 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env  python
 | |
| 
 | |
| __license__   = 'GPL v3'
 | |
| __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | |
| '''
 | |
| msdn.microsoft.com/en-us/magazine
 | |
| '''
 | |
| from calibre.web.feeds.news import BasicNewsRecipe
 | |
| from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
 | |
| 
 | |
| class MSDNMagazine_en(BasicNewsRecipe):
 | |
|     title                 = 'MSDN Magazine'
 | |
|     __author__            = 'Darko Miletic'
 | |
|     description           = 'The Microsoft Journal for Developers'
 | |
|     masthead_url          = 'http://i3.msdn.microsoft.com/Platform/MasterPages/MsdnMagazine/smalllogo.png'
 | |
|     publisher             = 'Microsoft Press'
 | |
|     category              = 'news, IT, Microsoft, programming, windows'
 | |
|     oldest_article        = 31
 | |
|     max_articles_per_feed = 100
 | |
|     no_stylesheets        = True
 | |
|     use_embedded_content  = False
 | |
|     encoding              = 'utf-8'
 | |
|     language              = 'en'
 | |
| 
 | |
|     base_url              = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
 | |
|     rss_url               = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
 | |
| 
 | |
| 
 | |
|     keep_only_tags = [dict(name='div', attrs={'id':'MainContent'})]
 | |
| 
 | |
|     remove_tags = [
 | |
|                     dict(name='div', attrs={'class':'DivRatingsOnly'})
 | |
|                     ,dict(name='div', attrs={'class':'ShareThisButton4'})
 | |
|                   ]
 | |
| 
 | |
|     def find_articles(self):
 | |
|         idx_contents = self.browser.open(self.rss_url).read()
 | |
|         idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
 | |
| 
 | |
|         for article in idx.findAll('item'):
 | |
|             desc_html = self.tag_to_string(article.find('description'))
 | |
|             description = self.tag_to_string(BeautifulSoup(desc_html))
 | |
| 
 | |
|             a = {
 | |
|                     'title':  self.tag_to_string(article.find('title')),
 | |
|                     'url': self.tag_to_string(article.find('link')),
 | |
|                     'description': description,
 | |
|                     'date' : self.tag_to_string(article.find('pubdate')),
 | |
|                     }
 | |
|             yield a
 | |
| 
 | |
| 
 | |
|     def parse_index(self):
 | |
|         soup = self.index_to_soup(self.base_url)
 | |
| 
 | |
|         #find issue name, eg "August 2011"
 | |
|         issue_name = self.tag_to_string(soup.find('h1'))
 | |
| 
 | |
|         # find cover pic
 | |
|         img = soup.find('img',attrs ={'alt':issue_name})
 | |
|         if img is not None:
 | |
|             self.cover_url = img['src']
 | |
| 
 | |
|         return [(issue_name, list(self.find_articles()))]
 | |
| 
 |