Pull from trunk

2025-12-30 08:40:18 -05:00 · 2010-08-07 16:10:44 -06:00 · 2010-08-07 16:10:44 -06:00 · e54b79a455
commit e54b79a455
parent 262418d6d2 301db9fa42
2 changed files with 99 additions and 0 deletions
--- a/resources/recipes/skeptic.recipe
+++ b/resources/recipes/skeptic.recipe
@ -0,0 +1,49 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Skeptic(BasicNewsRecipe):
+    title          = u'The Skeptic'
+    description         = 'Discussions with leading experts and investigation of fringe science and paranormal claims.'
+    language       = 'en'
+    __author__     = 'Starson17'
+    oldest_article = 31
+    cover_url           = 'http://www.skeptricks.com/images/Skeptic_Magazine.jpg'
+    remove_empty_feeds    = True
+    remove_javascript   = True
+    max_articles_per_feed = 50
+    no_stylesheets = True
+
+    remove_tags = [dict(name='div', attrs={'class':['Introduction','divider']}),
+                  dict(name='div', attrs={'id':['feature', 'podcast']}),
+                  dict(name='div', attrs={'id':re.compile(r'follow.*', re.DOTALL|re.IGNORECASE)}),
+                  dict(name='hr'),
+                  ]
+
+
+    feeds = [
+            ('The Skeptic', 'http://www.skeptic.com/feed'),
+            ('E-Skeptic', 'http://www.skeptic.com/eskeptic'),
+            ('All-SkepticBlog', 'http://skepticblog.org/feed'),
+            ('Brian Dunning', 'http://skepticblog.org/author/dunning/feed/'),
+            ('Daniel Loxton', 'http://skepticblog.org/author/loxton/feed/'),
+            ('Kirsten Sanford', 'http://skepticblog.org/author/sanford/feed/'),
+            ('Mark Edward', 'http://skepticblog.org/author/edward/feed/'),
+            ('Michael Shermer', 'http://skepticblog.org/author/shermer/feed/'),
+            ('Phil Plait', 'http://skepticblog.org/author/plait/feed/'),
+            ('Ryan Johnson', 'http://skepticblog.org/author/johnson/feed/'),
+            ('Steven Novella', 'http://skepticblog.org/author/novella/feed/'),
+            ('Yau-Man Chan', 'http://skepticblog.org/author/chan/feed/'),
+            ]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        br.addheaders = [('Accept', 'text/html')]
+        return br
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+		'''
+
--- a/resources/recipes/skeptical_enquirer.recipe
+++ b/resources/recipes/skeptical_enquirer.recipe
@ -0,0 +1,50 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class TheSkepticalInquirer(BasicNewsRecipe):
+    title          = u'The Skeptical Inquirer'
+    description    = 'Investigation of fringe science and paranormal claims.'
+    language       = 'en'
+    __author__     = 'Starson17'
+    oldest_article = 31
+    cover_url           = 'http://www.skeptricks.com/images/Skeptical_Inquirer_Magazine.jpg'
+    remove_empty_feeds    = True
+    remove_javascript   = True
+    max_articles_per_feed = 50
+    no_stylesheets = True
+
+    keep_only_tags = [dict(name='div', attrs={'id':['content', 'bio']})]
+
+    remove_tags = [
+                  dict(name='div', attrs={'id':['socialMedia']}),
+                  ]
+
+    preprocess_regexps = [
+        (re.compile(r'\.\(JavaScript must be enabled to view this email address\)', re.DOTALL|re.IGNORECASE), lambda match: ''),
+        ]
+
+    def parse_index(self):
+        feeds = []
+        for title, url in [("The Skeptical Inquirer", "http://www.csicop.org")]:
+            articles = self.make_links(url)
+            if articles:
+                feeds.append((title, articles))
+        return feeds
+
+    def make_links(self, url):
+        soup = self.index_to_soup(url)
+        title = ''
+        current_articles = []
+        for item in soup.findAll(attrs={'class':['article-single bigger']}):
+            page_url = url + str(item.a["href"])
+            title = str(item.a.string)
+            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
+        return current_articles
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+		'''
+