calibre/recipes/cracked_com.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

53 lines
1.8 KiB
Plaintext

from calibre.web.feeds.news import BasicNewsRecipe
class Cracked(BasicNewsRecipe):
title = u'Cracked.com'
__author__ = 'UnWeave'
language = 'en'
description = "America's Only HumorSite since 1958"
publisher = 'Cracked'
category = 'comedy, lists'
oldest_article = 3 # days
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
remove_javascript = True
use_embedded_content = False
recursions = 11
remove_attributes = ['size', 'style']
feeds = [(u'Articles', u'http://feeds.feedburner.com/CrackedRSS/')]
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
keep_only_tags = [dict(name='article', attrs={'class': 'module article dropShadowBottomCurved'}),
dict(name='article', attrs={'class': 'module blog dropShadowBottomCurved'})]
remove_tags = [
dict(name='section', attrs={
'class': ['socialTools', 'quickFixModule']}),
dict(
attrs={'class': ['socialShareAfterContent', 'socialShareModule']}),
]
def is_link_wanted(self, url, a):
return a['class'] == 'next' and a.findParent('nav', attrs={'class': 'PaginationContent'}) is not None
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-img': True}):
img['src'] = img['data-img']
return soup
def postprocess_html(self, soup, first_fetch):
for div in soup.findAll(attrs={'class': 'PaginationContent'}):
div.extract()
if not first_fetch:
for h1 in soup.findAll('h1'):
h1.extract()
for div in soup.findAll(attrs={'class': 'meta'}):
div.extract()
return soup