calibre/recipes/explosm.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

57 lines
2.0 KiB
Plaintext

from calibre.web.feeds.news import BasicNewsRecipe
import re
class Explosm(BasicNewsRecipe):
title = u'Explosm Rotated'
__author__ = 'Andromeda Rabbit'
description = 'Explosm'
language = 'en'
use_embedded_content = False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
max_articles_per_feed = 10
feeds = [
(u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
]
keep_only_tags = [
dict(name='img', attrs={'alt': 'Cyanide and Happiness, a daily webcomic'})]
remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(
name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
def get_cover_url(self):
return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a, curarticle in enumerate(curfeed.articles):
if re.search(r'http://www.explosm.net/comics', curarticle.url) is None:
delList.append(curarticle)
if len(delList) > 0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index + 1] = []
return feeds
def skip_ad_pages(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(name='img', attrs={
'alt': 'Cyanide and Happiness, a daily webcomic'})
if skip_tag is None:
return soup
return None