from calibre.web.feeds.news import BasicNewsRecipe import re class Explosm(BasicNewsRecipe): title = u'Explosm Rotated' __author__ = 'Andromeda Rabbit' description = 'Explosm' language = 'en' use_embedded_content = False no_stylesheets = True oldest_article = 24 remove_javascript = True remove_empty_feeds = True max_articles_per_feed = 10 feeds = [ (u'Explosm Feed', u'http://feeds.feedburner.com/Explosm') ] #match_regexps = [r'http://www.explosm.net/comics/.*'] keep_only_tags = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})] remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')] extra_css = ''' h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} p{font-family:Arial,Helvetica,sans-serif;font-size:small;} body{font-family:Helvetica,Arial,sans-serif;font-size:small;}''' def get_cover_url(self): return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286' def parse_feeds(self): feeds = BasicNewsRecipe.parse_feeds(self) for curfeed in feeds: delList = [] for a,curarticle in enumerate(curfeed.articles): if re.search(r'http://www.explosm.net/comics', curarticle.url) == None: delList.append(curarticle) if len(delList)>0: for d in delList: index = curfeed.articles.index(d) curfeed.articles[index:index+1] = [] return feeds def skip_ad_pages(self, soup): # Skip ad pages served before actual article skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'}) if skip_tag is None: return soup return None