calibre/recipes/skeptical_enquirer.recipe

from calibre.web.feeds.news import BasicNewsRecipe
import re

class TheSkepticalInquirer(BasicNewsRecipe):
    title          = u'The Skeptical Inquirer'
    description    = 'Investigation of fringe science and paranormal claims.'
    language       = 'en'
    __author__     = 'Starson17'
    oldest_article = 31
    cover_url           = 'http://www.skeptricks.com/images/Skeptical_Inquirer_Magazine.jpg'
    remove_empty_feeds    = True
    remove_javascript   = True
    max_articles_per_feed = 50
    no_stylesheets = True

    keep_only_tags = [dict(name='div', attrs={'id':['content', 'bio']})]

    remove_tags = [
                  dict(name='div', attrs={'id':['socialMedia']}),
                  ]

    preprocess_regexps = [
        (re.compile(r'\.\(JavaScript must be enabled to view this email address\)', re.DOTALL|re.IGNORECASE), lambda match: ''),
        ]

    def parse_index(self):
        feeds = []
        for title, url in [("The Skeptical Inquirer", "http://www.csicop.org")]:
            articles = self.make_links(url)
            if articles:
                feeds.append((title, articles))
        return feeds

    def make_links(self, url):
        soup = self.index_to_soup(url)
        title = ''
        current_articles = []
        for item in soup.findAll(attrs={'class':['article-single bigger']}):
            page_url = url + str(item.a["href"])
            title = str(item.a.string)
            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
        return current_articles

    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
		'''