import re
from calibre.web.feeds.news import BasicNewsRecipe

class AdvancedUserRecipe1282101454(BasicNewsRecipe):
    title = 'Popular Science'
    language = 'en'
    __author__ = 'TonytheBookworm'
    description = 'Popular Science'
    publisher = 'Popular Science'
    category = 'gadgets,science'
    oldest_article = 7 # change this if you want more current articles. I like to go a week in
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_javascript = True

    masthead_url = 'http://www.raytheon.com/newsroom/rtnwcm/groups/Public/documents/masthead/rtn08_popscidec_masthead.jpg'

    remove_tags = [dict(name='div', attrs={'id':['toolbar','main_supplements']}),
                   dict(name='span', attrs={'class':['comments']}),
                   dict(name='div', attrs={'class':['relatedinfo related-right','node_navigation','content2']}),
                   dict(name='ul', attrs={'class':['item-list clear-block']})]
    feeds          = [

                      ('Gadgets', 'http://www.popsci.com/full-feed/gadgets'),
                      ('Cars', 'http://www.popsci.com/full-feed/cars'),
                      ('Science', 'http://www.popsci.com/full-feed/science'),
                      ('Technology', 'http://www.popsci.com/full-feed/technology'),
                      ('DIY', 'http://www.popsci.com/full-feed/diy'),

                    ]


 #The following will get read of the Gallery: links when found

    def preprocess_html(self, soup) :
        print 'SOUP IS: ', soup
        weblinks = soup.findAll(['head','h2'])
        if weblinks is not None:
            for link in weblinks:
                if re.search('(Gallery)(:)',str(link)):

                  link.parent.extract()
        return soup
  #-----------------------------------------------------------------