calibre/recipes/explosm.recipe

from calibre.web.feeds.news import BasicNewsRecipe
import re


class Explosm(BasicNewsRecipe):
    title = u'Explosm Rotated'
    __author__ = 'Andromeda Rabbit'
    description = 'Explosm'
    language = 'en'
    use_embedded_content = False
    no_stylesheets = True
    oldest_article = 24
    remove_javascript = True
    remove_empty_feeds = True
    max_articles_per_feed = 10

    feeds = [
        (u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
    ]

    keep_only_tags = [
        dict(name='img', attrs={'alt': 'Cyanide and Happiness, a daily webcomic'})]
    remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(
        name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]

    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''

    def get_cover_url(self):
        return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'

    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)

        for curfeed in feeds:
            delList = []
            for a, curarticle in enumerate(curfeed.articles):
                if re.search(r'http://www.explosm.net/comics', curarticle.url) is None:
                    delList.append(curarticle)
            if len(delList) > 0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index + 1] = []

        return feeds

    def skip_ad_pages(self, soup):
        # Skip ad pages served before actual article
        skip_tag = soup.find(name='img', attrs={
                             'alt': 'Cyanide and Happiness, a daily webcomic'})
        if skip_tag is None:
            return soup
        return None