from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup from urllib import quote class SportsIllustratedColumnistsRecipe(BasicNewsRecipe): title = u'Sports Illustrated Columnists' __author__ = u'kwetal' __license__ = u'GPL v3' language = 'en' version = 2 oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True feeds = [] # RSS sources found at http://sportsillustrated.cnn.com/services/rss/ feeds.append((u'Jon Heyman', u'http://rss.cnn.com/rss/si_jon_heyman.rss')) feeds.append( (u'Austin Murphy', u'http://rss.cnn.com/rss/si_austin_murphy.rss')) feeds.append( (u'Lars Anderson', u'http://rss.cnn.com/rss/si_lars_anderson.rss')) feeds.append( (u'Melissa Segura', u'http://rss.cnn.com/rss/si_melissa_segura.rss')) feeds.append((u'Peter King', u'http://rss.cnn.com/rss/si_peter_king.rss')) feeds.append( (u'Scott Wraight', u'http://rss.cnn.com/rss/si_scott_wraight.rss')) def print_version(self, url): # This is the url and the parameters that work to get the print # version. printUrl = 'http://si.printthis.clickability.com/pt/printThis?clickMap=printThis' printUrl += '&fb=Y&partnerID=2356&url=' + quote(url) return printUrl # However the original javascript also uses the following parameters, but they can be left out: # title : can be some random string # random : some random number, but I think the number of digits is important # expire : no idea what value to use # All this comes from the Javascript function that redirects to the # print version. It's called PT() and is defined in the file 48.js def preprocess_html(self, soup): temp = soup.find('div', attrs={'class': 'cnnstoryheadline'}) if temp: # It's an article, make a valid content container homeMadeSoup = BeautifulSoup( '') body = homeMadeSoup.find('body') headline = temp.find('h1') if headline: body.append(headline) for td in soup.findAll('td', attrs={'class': 'cnnstorycontentarea'}): for p in td.findAll('p'): body.append(p) return homeMadeSoup else: # It's a TOC, just return the whole lot return soup