calibre/recipes/popscience.recipe

from calibre.web.feeds.news import BasicNewsRecipe
import re

class AdvancedUserRecipe1282101454(BasicNewsRecipe):
    title = 'Popular Science'
    language = 'en'
    __author__ = 'TonytheBookworm'
    description = 'Popular Science'
    publisher = 'Popular Science'
    category = 'gadgets,science'
    oldest_article = 7 # change this if you want more current articles. I like to go a week in
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_javascript = True
    use_embedded_content = True

    masthead_url = 'http://www.raytheon.com/newsroom/rtnwcm/groups/Public/documents/masthead/rtn08_popscidec_masthead.jpg'


    feeds          = [

                      ('Gadgets', 'http://www.popsci.com/full-feed/gadgets'),
                      ('Cars', 'http://www.popsci.com/full-feed/cars'),
                      ('Science', 'http://www.popsci.com/full-feed/science'),
                      ('Technology', 'http://www.popsci.com/full-feed/technology'),
                      ('DIY', 'http://www.popsci.com/full-feed/diy'),

                    ]


 #The following will get read of the Gallery: links when found

    def preprocess_html(self, soup) :
        weblinks = soup.findAll(['head','h2'])
        if weblinks is not None:
            for link in weblinks:
                if re.search('(Gallery)(:)',str(link)):

                  link.parent.extract()
        return soup
  #-----------------------------------------------------------------