mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 11:07:02 -05:00 
			
		
		
		
	
		
			
				
	
	
		
			65 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			65 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#!/usr/bin/env  python2
 | 
						|
 | 
						|
__license__ = 'GPL v3'
 | 
						|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
						|
'''
 | 
						|
msdn.microsoft.com/en-us/magazine
 | 
						|
'''
 | 
						|
from calibre.web.feeds.news import BasicNewsRecipe
 | 
						|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
 | 
						|
 | 
						|
 | 
						|
class MSDNMagazine_en(BasicNewsRecipe):
 | 
						|
    title = 'MSDN Magazine'
 | 
						|
    __author__ = 'Darko Miletic'
 | 
						|
    description = 'The Microsoft Journal for Developers'
 | 
						|
    masthead_url = 'http://i3.msdn.microsoft.com/Platform/MasterPages/MsdnMagazine/smalllogo.png'
 | 
						|
    publisher = 'Microsoft Press'
 | 
						|
    category = 'news, IT, Microsoft, programming, windows'
 | 
						|
    oldest_article = 31
 | 
						|
    max_articles_per_feed = 100
 | 
						|
    no_stylesheets = True
 | 
						|
    use_embedded_content = False
 | 
						|
    encoding = 'utf-8'
 | 
						|
    language = 'en'
 | 
						|
 | 
						|
    base_url = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
 | 
						|
    rss_url = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
 | 
						|
 | 
						|
    keep_only_tags = [dict(name='div', attrs={'id': 'MainContent'})]
 | 
						|
 | 
						|
    remove_tags = [
 | 
						|
        dict(name='div', attrs={'class': 'DivRatingsOnly'}), dict(
 | 
						|
            name='div', attrs={'class': 'ShareThisButton4'})
 | 
						|
    ]
 | 
						|
 | 
						|
    def find_articles(self):
 | 
						|
        idx_contents = self.browser.open(self.rss_url).read()
 | 
						|
        idx = BeautifulStoneSoup(
 | 
						|
            idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
 | 
						|
 | 
						|
        for article in idx.findAll('item'):
 | 
						|
            desc_html = self.tag_to_string(article.find('description'))
 | 
						|
            description = self.tag_to_string(BeautifulSoup(desc_html))
 | 
						|
 | 
						|
            a = {
 | 
						|
                'title':  self.tag_to_string(article.find('title')),
 | 
						|
                'url': self.tag_to_string(article.find('link')),
 | 
						|
                'description': description,
 | 
						|
                'date': self.tag_to_string(article.find('pubdate')),
 | 
						|
            }
 | 
						|
            yield a
 | 
						|
 | 
						|
    def parse_index(self):
 | 
						|
        soup = self.index_to_soup(self.base_url)
 | 
						|
 | 
						|
        # find issue name, eg "August 2011"
 | 
						|
        issue_name = self.tag_to_string(soup.find('h1'))
 | 
						|
 | 
						|
        # find cover pic
 | 
						|
        img = soup.find('img', attrs={'alt': issue_name})
 | 
						|
        if img is not None:
 | 
						|
            self.cover_url = img['src']
 | 
						|
 | 
						|
        return [(issue_name, list(self.find_articles()))]
 |