Popular Science by Tony Stegall

2025-07-09 03:04:10 -04:00 · 2010-09-18 20:17:30 -06:00 · 2010-09-18 20:17:30 -06:00 · 1a5b92d6d9
commit 1a5b92d6d9
parent 25bacf9a97
2 changed files with 59 additions and 0 deletions
--- a/resources/images/news/popscience.png
+++ b/resources/images/news/popscience.png
--- a/resources/recipes/popscience.recipe
+++ b/resources/recipes/popscience.recipe
@ -0,0 +1,59 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1282101454(BasicNewsRecipe):
+    title = 'Popular Science'
+    language = 'en'
+    __author__ = 'TonytheBookworm'
+    description = 'Popular Science'
+    publisher = 'Popular Science'
+    category = 'gadgets,science'
+    oldest_article = 7 # change this if you want more current articles. I like to go a week in
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_javascript = True
+
+    masthead_url = 'http://www.raytheon.com/newsroom/rtnwcm/groups/Public/documents/masthead/rtn08_popscidec_masthead.jpg'
+
+    remove_tags = [dict(name='div', attrs={'id':['toolbar','main_supplements']}),
+                   dict(name='span', attrs={'class':['comments']}),
+                   dict(name='div', attrs={'class':['relatedinfo related-right','node_navigation','content2']}),
+                   dict(name='ul', attrs={'class':['item-list clear-block']})]
+    feeds          = [
+
+                      ('Gadgets', 'http://www.popsci.com/full-feed/gadgets'),
+                      ('Cars', 'http://www.popsci.com/full-feed/cars'),
+                      ('Science', 'http://www.popsci.com/full-feed/science'),
+                      ('Technology', 'http://www.popsci.com/full-feed/technology'),
+                      ('DIY', 'http://www.popsci.com/full-feed/diy'),
+
+                    ]
+
+
+ #The following will get read of the Gallery: links when found
+
+    def preprocess_html(self, soup) :
+        print 'SOUP IS: ', soup
+        weblinks = soup.findAll(['head','h2'])
+        if weblinks is not None:
+            for link in weblinks:
+                if re.search('(Gallery)(:)',str(link)):
+
+                  link.parent.extract()
+        return soup
+  #-----------------------------------------------------------------
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+