mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
53 lines
1.8 KiB
Plaintext
53 lines
1.8 KiB
Plaintext
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
class Cracked(BasicNewsRecipe):
|
|
title = u'Cracked.com'
|
|
__author__ = 'UnWeave'
|
|
language = 'en'
|
|
description = "America's Only HumorSite since 1958"
|
|
publisher = 'Cracked'
|
|
category = 'comedy, lists'
|
|
oldest_article = 3 # days
|
|
max_articles_per_feed = 100
|
|
no_stylesheets = True
|
|
encoding = 'utf-8'
|
|
remove_javascript = True
|
|
use_embedded_content = False
|
|
recursions = 11
|
|
remove_attributes = ['size', 'style']
|
|
|
|
feeds = [(u'Articles', u'http://feeds.feedburner.com/CrackedRSS/')]
|
|
|
|
conversion_options = {
|
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
}
|
|
|
|
keep_only_tags = [dict(name='article', attrs={'class': 'module article dropShadowBottomCurved'}),
|
|
dict(name='article', attrs={'class': 'module blog dropShadowBottomCurved'})]
|
|
|
|
remove_tags = [
|
|
dict(name='section', attrs={
|
|
'class': ['socialTools', 'quickFixModule']}),
|
|
dict(
|
|
attrs={'class': ['socialShareAfterContent', 'socialShareModule']}),
|
|
]
|
|
|
|
def is_link_wanted(self, url, a):
|
|
return a['class'] == 'next' and a.findParent('nav', attrs={'class': 'PaginationContent'}) is not None
|
|
|
|
def preprocess_html(self, soup):
|
|
for img in soup.findAll('img', attrs={'data-img': True}):
|
|
img['src'] = img['data-img']
|
|
return soup
|
|
|
|
def postprocess_html(self, soup, first_fetch):
|
|
for div in soup.findAll(attrs={'class': 'PaginationContent'}):
|
|
div.extract()
|
|
if not first_fetch:
|
|
for h1 in soup.findAll('h1'):
|
|
h1.extract()
|
|
for div in soup.findAll(attrs={'class': 'meta'}):
|
|
div.extract()
|
|
return soup
|