#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic ' ''' seattletimes.nwsource.com ''' from calibre.web.feeds.news import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) class SeattleTimes(BasicNewsRecipe): title = 'The Seattle Times' __author__ = 'Kovid Goyal' description = 'News from Seattle and USA' publisher = 'The Seattle Times' category = 'news, politics, USA' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf-8' language = 'en' keep_only_tags = [ classes('article-header featured-media article-body') ] remove_tags = [ classes('most-read-container native-ad-article ad-container user-messaging') ] feeds = [ (u'Local News', u'https://www.seattletimes.com/seattle-news/feed/'), (u'Nation & World', u'https://www.seattletimes.com/nation-world/feed/'), (u'Business', u'https://www.seattletimes.com/business/feed/'), (u'Sports', u'https://www.seattletimes.com/sports/feed/'), (u'Entertainment', u'https://www.seattletimes.com/entertainment/feed/'), (u'Life', u'https://www.seattletimes.com/life/feed/'), (u'Opinion', u'https://www.seattletimes.com/opinion/feed/'), (u'Photo and Video', u'https://www.seattletimes.com/photo-video/feed/'), ] def get_browser(self, *a, **kw): # MyClatchy servers dont like the user-agent header, they hang forever # when it is present br = BasicNewsRecipe.get_browser(self, *a, **kw) br.addheaders = [x for x in br.addheaders if x[0].lower() != 'user-agent'] return br