calibre/recipes/msdnmag_en.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

65 lines
2.2 KiB
Python

#!/usr/bin/env python2
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
msdn.microsoft.com/en-us/magazine
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
class MSDNMagazine_en(BasicNewsRecipe):
title = 'MSDN Magazine'
__author__ = 'Darko Miletic'
description = 'The Microsoft Journal for Developers'
masthead_url = 'http://i3.msdn.microsoft.com/Platform/MasterPages/MsdnMagazine/smalllogo.png'
publisher = 'Microsoft Press'
category = 'news, IT, Microsoft, programming, windows'
oldest_article = 31
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
language = 'en'
base_url = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
rss_url = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
keep_only_tags = [dict(name='div', attrs={'id': 'MainContent'})]
remove_tags = [
dict(name='div', attrs={'class': 'DivRatingsOnly'}), dict(
name='div', attrs={'class': 'ShareThisButton4'})
]
def find_articles(self):
idx_contents = self.browser.open(self.rss_url).read()
idx = BeautifulStoneSoup(
idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
for article in idx.findAll('item'):
desc_html = self.tag_to_string(article.find('description'))
description = self.tag_to_string(BeautifulSoup(desc_html))
a = {
'title': self.tag_to_string(article.find('title')),
'url': self.tag_to_string(article.find('link')),
'description': description,
'date': self.tag_to_string(article.find('pubdate')),
}
yield a
def parse_index(self):
soup = self.index_to_soup(self.base_url)
# find issue name, eg "August 2011"
issue_name = self.tag_to_string(soup.find('h1'))
# find cover pic
img = soup.find('img', attrs={'alt': issue_name})
if img is not None:
self.cover_url = img['src']
return [(issue_name, list(self.find_articles()))]