mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update MSNBC
This commit is contained in:
parent
cbce50cbe4
commit
cae761166f
@ -17,9 +17,12 @@ class MsNBC(BasicNewsRecipe):
|
||||
encoding = 'utf8'
|
||||
publisher = 'msnbc.com'
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
keep_only_tags=[
|
||||
dict(itemprop='headline'),
|
||||
dict(attrs={'class':lambda x: x and set(x.split()).intersection({'byline_article', 'article_main'})}),
|
||||
dict(attrs={'class':lambda x: x and set(x.split()).intersection({'authors-names', 'pane-node-body'})}),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['iframe', 'button', 'meta', 'link']),
|
||||
@ -27,17 +30,18 @@ class MsNBC(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Latest', 'http://www.msnbc.com/feeds/latest'),
|
||||
(u'US News' , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml')
|
||||
,(u'Politics' , u'http://rss.msnbc.msn.com/id/3032552/device/rss/rss.xml')
|
||||
,(u'Business' , u'http://rss.msnbc.msn.com/id/3032071/device/rss/rss.xml')
|
||||
,(u'Sports' , u'http://rss.nbcsports.msnbc.com/id/3032112/device/rss/rss.xml')
|
||||
,(u'Entertainment' , u'http://rss.msnbc.msn.com/id/3032083/device/rss/rss.xml')
|
||||
,(u'Health' , u'http://rss.msnbc.msn.com/id/3088327/device/rss/rss.xml')
|
||||
,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml')
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid')
|
||||
ans = article.get('guid')
|
||||
if '/video/' not in ans:
|
||||
return ans
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', attrs={'data-original':True}):
|
||||
|
Loading…
x
Reference in New Issue
Block a user