diff --git a/recipes/projo.recipe b/recipes/projo.recipe index e74c492296..cc7afaa3ad 100644 --- a/recipes/projo.recipe +++ b/recipes/projo.recipe @@ -3,21 +3,45 @@ from calibre.web.feeds.news import BasicNewsRecipe class ProvidenceJournal(BasicNewsRecipe): title = u'Providence Journal' language = 'en' - __author__ = 'Krittika Goyal' - oldest_article = 7 # days - max_articles_per_feed = 25 + __author__ = 'mikebw' + oldest_article = 10 # days + max_articles_per_feed = 100 no_stylesheets = True auto_cleanup = True use_embedded_content = False + ignore_duplicate_articles = {'url'} + + publication_type = 'newspaper' + masthead_url = 'http://www.providencejournal.com/Global/images/head/nameplate/providence-journal_logo.png' + + # ProJo web site often returns blank articles unless print version is explicitly requested + def print_version(self, url): + return url + '&template=printart' + +# RSS sources documented at http://www.providencejournal.com/section/feed?refresh=false feeds = [ -('Red Sox', - 'http://www.providencejournal.com/sports/red-sox/content/?rss'), -('Political Scene', - 'http://www.providencejournal.com/politics/political-scene/?rss'), -('Local News', - 'http://www.providencejournal.com/breaking-news/?rss'), -('Music', - 'http://www.providencejournal.com/features/entertainment/music/?rss'), + +('News', + 'http://www.providencejournal.com/news?template=rss&mime=xml'), +('Politics', + 'http://www.providencejournal.com/politics?template=rss&mime=xml'), +('Sports', + 'http://www.providencejournal.com/sports?template=rss&mime=xml'), +('Business', + 'http://www.providencejournal.com/business?template=rss&mime=xml'), +('Opinion', + 'http://www.providencejournal.com/opinion?template=rss&mime=xml'), +('Entertainment', + 'http://www.providencejournal.com/entertainment?template=rss&mime=xml'), +('Lifestyle', + 'http://www.providencejournal.com/lifestyle?template=rss&mime=xml'), +('Food', + 'http://www.providencejournal.com/food?template=rss&mime=xml'), +('Cars', + 'http://www.providencejournal.com/cars?template=rss&mime=xml'), +('Weather', + 'http://www.providencejournal.com/weather?template=rss&mime=xml'), + ]