__license__ = 'GPL v3' __copyright__ = '2009-2010, Darko Miletic ' ''' www.boston.com ''' from calibre.web.feeds.recipes import BasicNewsRecipe class BusinessStandard(BasicNewsRecipe): title = 'The Boston Globe' __author__ = 'Darko Miletic' description = 'News from Boston' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True delay = 1 use_embedded_content = False encoding = 'cp1252' publisher = 'Boston' category = 'news, boston, usa, world' language = 'en' publication_type = 'newspaper' masthead_url = 'http://cache.boston.com/images/globe/grslider/the_boston_globe.gif' extra_css = ' body{font-family: Georgia, serif} div#articleBodyTop{display:block} ' conversion_options = { 'comments' : description ,'tags' : category ,'language' : language ,'publisher' : publisher } keep_only_tags = [dict(attrs={'id':['INDblogEntry','blogEntry','articleHeader','articleGraphs','galleryShell']})] remove_tags = [ dict(name=['object','link','script','iframe']) ,dict(attrs={'id':['blogheadTools','bdc_emailWidget','tools','relatedContent']}) ] feeds = [ (u'Top Stories' , u'http://feeds.boston.com/boston/topstories' ) ,(u'Patriots news', u'http://feeds.boston.com/boston/sports/football/patriots') ,(u'National news', u'http://feeds.boston.com/boston/news/nation' ) ,(u'World news' , u'http://feeds.boston.com/boston/news/world' ) ] def print_version(self, url): return url + '?page=full' def get_article_url(self, article): rawarticle = article.get('guid', None) return rawarticle.rpartition('?')[0]