__license__ = 'GPL v3' __copyright__ = '2009-2010, Darko Miletic ' ''' miamiherald.com ''' from calibre.web.feeds.news import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) class TheMiamiHerald(BasicNewsRecipe): title = 'The Miami Herald' __author__ = 'Kovid Goyal' description = "Miami-Dade and Broward's source for the latest breaking local news on sports, weather, business, jobs, real estate, shopping, health, travel, entertainment, & more." # noqa oldest_article = 1 max_articles_per_feed = 100 publisher = u'The Miami Herald' language = 'en' no_stylesheets = True use_embedded_content = False encoding = 'utf-8' remove_javascript = True conversion_options = { 'comment': description, 'publisher': publisher, 'language': language } keep_only_tags = [ classes('story-body') ] remove_tags = [ classes('social-network-macro social-media') ] feeds = [ (u'News', u'https://www.miamiherald.com/news/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Miami-Dade', u'https://www.miamiherald.com/news/local/community/miami-dade/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Broward', u'https://www.miamiherald.com/news/local/community/broward/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Florida Keys', u'https://www.miamiherald.com/news/local/community/florida-keys/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Florida', u'https://www.miamiherald.com/news/state/florida/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'National', u'https://www.miamiherald.com/news/nation-world/national/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'World', u'https://www.miamiherald.com/news/nation-world/world/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Americas', u'https://www.miamiherald.com/news/nation-world/world/americas/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Cuba', u'https://www.miamiherald.com/news/nation-world/world/americas/cuba/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Haiti', u'https://www.miamiherald.com/news/nation-world/world/americas/haiti/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Politics', u'https://www.miamiherald.com/news/politics-government/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Education', u'https://www.miamiherald.com/news/local/education/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Environment', u'https://www.miamiherald.com/news/local/environment/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Sports', u'https://www.miamiherald.com/sports/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), ] def get_browser(self, *a, **kw): # MyClatchy servers dont like the user-agent header, they hang forever # when it is present br = BasicNewsRecipe.get_browser(self, *a, **kw) br.addheaders = [x for x in br.addheaders if x[0].lower() != 'user-agent'] return br def preprocess_html(self, soup): for picture in soup.findAll('picture'): img = picture.find('img') for i, source in enumerate(picture.findAll('source')): if i == 0: img['src'] = source['srcset'].split()[0] source.extract() return soup