From 9d8a89d6e5199979c5ef09392b54d03125769d3f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 25 Feb 2013 10:07:21 +0530
Subject: [PATCH] Update Science News

---
 recipes/science_news.recipe | 61 ++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/recipes/science_news.recipe b/recipes/science_news.recipe
index fa24bbadcf..53b451030a 100644
--- a/recipes/science_news.recipe
+++ b/recipes/science_news.recipe
@@ -1,24 +1,38 @@
 #!/usr/bin/env  python
 
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 '''
 sciencenews.org
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 
-class Sciencenews(BasicNewsRecipe):
-    title                 = u'ScienceNews'
-    __author__            = u'Darko Miletic and Sujata Raman'
-    description           = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News."
+class ScienceNewsIssue(BasicNewsRecipe):
+    title                 = u'Science News Recent Issues'
+    __author__            = u'Darko Miletic, Sujata Raman and Starson17'
+    description           = u'''Science News is an award-winning weekly
+    newsmagazine covering the most important research in all fields of science.
+    Its 16 pages each week are packed with short, accurate articles that appeal
+    to both general readers and scientists. Published since 1922, the magazine
+    now reaches about 150,000 subscribers and more than 1 million readers.
+    These are the latest News Items from Science News. This recipe downloads
+    the last 30 days worth of articles.'''
+    category              = u'Science, Technology, News'
+    publisher             = u'Society for Science & the Public'
     oldest_article        = 30
     language = 'en'
-
     max_articles_per_feed = 100
     no_stylesheets        = True
     use_embedded_content  = False
-    auto_cleanup = True
     timefmt               = ' [%A, %d %B, %Y]'
+    recursions = 1
+    remove_attributes = ['style']
+
+    conversion_options = {'linearize_tables'  : True
+                        , 'comment'           : description
+                        , 'tags'              : category
+                        , 'publisher'         : publisher
+                        , 'language'          : language
+                        }
 
     extra_css = '''
                 .content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
@@ -27,36 +41,33 @@ class Sciencenews(BasicNewsRecipe):
                 .content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
                 .exclusive{color:#FF0000 ;}
                 .anonymous{color:#14487E ;}
-                .content_content{font-family:helvetica,arial ;font-size: x-small ; color:#000000;}
-                .description{color:#585858;font-family:helvetica,arial ;font-size: xx-small ;}
+                .content_content{font-family:helvetica,arial ;font-size: medium ; color:#000000;}
+                .description{color:#585858;font-family:helvetica,arial ;font-size: large ;}
                 .credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
                 '''
 
-    #keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
-    #remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
-    #remove_tags = [
-                     #dict(name='ul', attrs={'id':'content_functions_bottom'})
-                    #,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
-                    #,dict(name='img', attrs={'class':'icon'})
-                    #,dict(name='div', attrs={'class': 'embiggen'})
-                  #]
+    keep_only_tags = [ dict(name='div', attrs={'class':'content_content'}),
+                       dict(name='ul', attrs={'id':'toc'})
+                     ]
 
-    feeds       = [(u"Science News / News Items", u'http://sciencenews.org/index.php/feed/type/news/name/news.rss/view/feed/name/all.rss')]
+    feeds       = [(u"Science News Current Issues", u'http://www.sciencenews.org/view/feed/type/edition/name/issues.rss')]
+
+    match_regexps = [
+            r'www.sciencenews.org/view/feature/id/',
+            r'www.sciencenews.org/view/generic/id'
+            ]
 
     def get_cover_url(self):
         cover_url = None
         index = 'http://www.sciencenews.org/view/home'
         soup = self.index_to_soup(index)
         link_item = soup.find(name = 'img',alt = "issue")
-        print link_item
         if link_item:
            cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
 
         return cover_url
 
-    #def preprocess_html(self, soup):
-
-            #for tag in soup.findAll(name=['span']):
-                #tag.name = 'div'
-
-            #return soup
+    def preprocess_html(self, soup):
+        for tag in soup.findAll(name=['span']):
+            tag.name = 'div'
+        return soup