diff --git a/resources/recipes/boston.com.recipe b/resources/recipes/boston.com.recipe index b398d7cc1b..48add6112c 100644 --- a/resources/recipes/boston.com.recipe +++ b/resources/recipes/boston.com.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' www.boston.com ''' @@ -7,10 +7,10 @@ www.boston.com from calibre.web.feeds.recipes import BasicNewsRecipe class BusinessStandard(BasicNewsRecipe): - title = 'Boston' + title = 'The Boston Globe' __author__ = 'Darko Miletic' description = 'News from Boston' - oldest_article = 7 + oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True delay = 1 @@ -19,6 +19,9 @@ class BusinessStandard(BasicNewsRecipe): publisher = 'Boston' category = 'news, boston, usa, world' language = 'en' + publication_type = 'newspaper' + masthead_url = 'http://cache.boston.com/images/globe/grslider/the_boston_globe.gif' + extra_css = ' body{font-family: Georgia, serif} div#articleBodyTop{display:block} ' conversion_options = { 'comments' : description @@ -27,8 +30,11 @@ class BusinessStandard(BasicNewsRecipe): ,'publisher' : publisher } - keep_only_tags = [dict(name='div', attrs={'class':'story'})] - remove_tags = [dict(name=['object','link','script','iframe'])] + keep_only_tags = [dict(attrs={'id':['INDblogEntry','blogEntry','articleHeader','articleGraphs','galleryShell']})] + remove_tags = [ + dict(name=['object','link','script','iframe']) + ,dict(attrs={'id':['blogheadTools','bdc_emailWidget','tools','relatedContent']}) + ] feeds = [ (u'Top Stories' , u'http://feeds.boston.com/boston/topstories' ) @@ -38,12 +44,9 @@ class BusinessStandard(BasicNewsRecipe): ] def print_version(self, url): - return url + '?mode=PF' + return url + '?page=full' def get_article_url(self, article): - rawarticle = article.get('pheedo_origlink', None) - artls, sep, rsep = rawarticle.rpartition('/?') - if artls == '': - artls = rawarticle.rpartition('?')[0] - return artls + rawarticle = article.get('guid', None) + return rawarticle.rpartition('?')[0]