calibre/recipes/explosm.recipe

from calibre.web.feeds.news import BasicNewsRecipe
import re

class Explosm(BasicNewsRecipe):
    title              = u'Explosm Rotated'
    __author__        = 'Andromeda Rabbit'
    description      = 'Explosm'
    language            = 'en'
    use_embedded_content = False
    no_stylesheets    = True
    oldest_article    = 24
    remove_javascript   = True
    remove_empty_feeds  = True
    max_articles_per_feed = 10

    feeds = [
             (u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
             ]

    #match_regexps = [r'http://www.explosm.net/comics/.*']

    keep_only_tags   = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
    remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]

    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''

    def get_cover_url(self):
        return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'

    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)

        for curfeed in feeds:
            delList = []
            for a,curarticle in enumerate(curfeed.articles):
                if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
                    delList.append(curarticle)
            if len(delList)>0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []

        return feeds

    def skip_ad_pages(self, soup):
        # Skip ad pages served before actual article
        skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
        if skip_tag is None:
            return soup
        return None