Update MSNBC

This commit is contained in:
Kovid Goyal 2016-03-22 20:28:16 +05:30
parent cbce50cbe4
commit cae761166f

View File

@ -17,9 +17,12 @@ class MsNBC(BasicNewsRecipe):
encoding = 'utf8'
publisher = 'msnbc.com'
language = 'en'
remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags=[
dict(itemprop='headline'),
dict(attrs={'class':lambda x: x and set(x.split()).intersection({'byline_article', 'article_main'})}),
dict(attrs={'class':lambda x: x and set(x.split()).intersection({'authors-names', 'pane-node-body'})}),
]
remove_tags = [
dict(name=['iframe', 'button', 'meta', 'link']),
@ -27,17 +30,18 @@ class MsNBC(BasicNewsRecipe):
]
feeds = [
('Latest', 'http://www.msnbc.com/feeds/latest'),
(u'US News' , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml')
,(u'Politics' , u'http://rss.msnbc.msn.com/id/3032552/device/rss/rss.xml')
,(u'Business' , u'http://rss.msnbc.msn.com/id/3032071/device/rss/rss.xml')
,(u'Sports' , u'http://rss.nbcsports.msnbc.com/id/3032112/device/rss/rss.xml')
,(u'Entertainment' , u'http://rss.msnbc.msn.com/id/3032083/device/rss/rss.xml')
,(u'Health' , u'http://rss.msnbc.msn.com/id/3088327/device/rss/rss.xml')
,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml')
]
def get_article_url(self, article):
return article.get('guid')
ans = article.get('guid')
if '/video/' not in ans:
return ans
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-original':True}):