From e5628d76afc1a82b120bea45188402c2edc1e162 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Mar 2014 08:17:30 +0530 Subject: [PATCH] Update CNet News --- recipes/cnetnews.recipe | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/recipes/cnetnews.recipe b/recipes/cnetnews.recipe index 91114a982a..c9887928b2 100644 --- a/recipes/cnetnews.recipe +++ b/recipes/cnetnews.recipe @@ -1,4 +1,3 @@ - __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic ' ''' @@ -7,6 +6,8 @@ Changelog: Changed cover (drMerry) 2011-10-13 Updated Cover (drMerry) +2014-03-28 +Update by Armin Geller news.cnet.com ''' @@ -18,13 +19,13 @@ class CnetNews(BasicNewsRecipe): description = 'Tech news and business reports by CNET News. Focused on information technology, core topics include computers, hardware, software, networking, and Internet media.' publisher = 'CNET' category = 'news, IT, USA' - oldest_article = 2 + encoding = 'utf-8' # AGe 2014-03-28 + language = 'en' # AGe 2014-03-28 + oldest_article = 7 max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'cp1252' - use_embedded_content = False - language = 'en' + cover_url = 'http://reviews.cnet.com/i/ff/wp/logo_cnet.gif' + conversion_options = { 'comment' : description , 'tags' : category @@ -32,17 +33,15 @@ class CnetNews(BasicNewsRecipe): , 'language' : language } - remove_tags = [ - dict(name='div', attrs={'id':'tweetmemeAndFacebook'}) - ,dict(name='ul', attrs={'class':'contentTools'}) - ,dict(name='aside', attrs={'id':'filed'}) - ,dict(name='div', attrs={'class':'postLinks'}) - ,dict(name='span', attrs={'class':'shareButton'}) - ,dict(name='span', attrs={'class':'printButton'}) - ,dict(name='span', attrs={'class':'emailButton'}) - ,dict(name='div', attrs={'class':'editorBio'}) + remove_tags = [ # AGe 2014-03-28, new + dict(name='div', attrs={'class':'row controls'}), + dict(name='ul', attrs={'class':'sharebar inline-view'}), + dict(name='div', attrs={'id':'ob_holder'}), + dict(name='span', attrs={'class':'author-social'}), ] - keep_only_tags = dict(name='div', attrs={'class':'post'}) - feeds = [(u'News', u'http://news.cnet.com/2547-1_3-0-20.xml')] + keep_only_tags = dict(name='article', attrs={'id':'article-body'}) # AGe 2014-03-28 + feeds = [ + (u'News', u'http://news.cnet.com/2547-1_3-0-20.xml'), + ]