mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	
		
			
				
	
	
		
			66 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			66 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#!/usr/bin/env  python
 | 
						|
 | 
						|
__license__   = 'GPL v3'
 | 
						|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
						|
'''
 | 
						|
msdn.microsoft.com/en-us/magazine
 | 
						|
'''
 | 
						|
from calibre.web.feeds.news import BasicNewsRecipe
 | 
						|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
 | 
						|
 | 
						|
class MSDNMagazine_en(BasicNewsRecipe):
 | 
						|
    title                 = 'MSDN Magazine'
 | 
						|
    __author__            = 'Darko Miletic'
 | 
						|
    description           = 'The Microsoft Journal for Developers'
 | 
						|
    masthead_url          = 'http://i3.msdn.microsoft.com/Platform/MasterPages/MsdnMagazine/smalllogo.png'
 | 
						|
    publisher             = 'Microsoft Press'
 | 
						|
    category              = 'news, IT, Microsoft, programming, windows'
 | 
						|
    oldest_article        = 31
 | 
						|
    max_articles_per_feed = 100
 | 
						|
    no_stylesheets        = True
 | 
						|
    use_embedded_content  = False
 | 
						|
    encoding              = 'utf-8'
 | 
						|
    language              = 'en'
 | 
						|
 | 
						|
    base_url              = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
 | 
						|
    rss_url               = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
 | 
						|
 | 
						|
 | 
						|
    keep_only_tags = [dict(name='div', attrs={'id':'MainContent'})]
 | 
						|
 | 
						|
    remove_tags = [
 | 
						|
                    dict(name='div', attrs={'class':'DivRatingsOnly'})
 | 
						|
                    ,dict(name='div', attrs={'class':'ShareThisButton4'})
 | 
						|
                  ]
 | 
						|
 | 
						|
    def find_articles(self):
 | 
						|
        idx_contents = self.browser.open(self.rss_url).read()
 | 
						|
        idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
 | 
						|
 | 
						|
        for article in idx.findAll('item'):
 | 
						|
            desc_html = self.tag_to_string(article.find('description'))
 | 
						|
            description = self.tag_to_string(BeautifulSoup(desc_html))
 | 
						|
 | 
						|
            a = {
 | 
						|
                    'title':  self.tag_to_string(article.find('title')),
 | 
						|
                    'url': self.tag_to_string(article.find('link')),
 | 
						|
                    'description': description,
 | 
						|
                    'date' : self.tag_to_string(article.find('pubdate')),
 | 
						|
                    }
 | 
						|
            yield a
 | 
						|
 | 
						|
 | 
						|
    def parse_index(self):
 | 
						|
        soup = self.index_to_soup(self.base_url)
 | 
						|
 | 
						|
        #find issue name, eg "August 2011"
 | 
						|
        issue_name = self.tag_to_string(soup.find('h1'))
 | 
						|
 | 
						|
        # find cover pic
 | 
						|
        img = soup.find('img',attrs ={'alt':issue_name})
 | 
						|
        if img is not None:
 | 
						|
            self.cover_url = img['src']
 | 
						|
 | 
						|
        return [(issue_name, list(self.find_articles()))]
 | 
						|
 |