mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Cracked.com
This commit is contained in:
parent
788d2d6611
commit
3b05cd601f
@ -23,30 +23,43 @@ class Cracked(BasicNewsRecipe):
|
|||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='article', attrs={'class': 'module article dropShadowBottomCurved'}),
|
keep_only_tags = [
|
||||||
dict(name='article', attrs={'class': 'module blog dropShadowBottomCurved'})]
|
dict(name='div', attrs={'class': [
|
||||||
|
'content-content',
|
||||||
|
'content-header',
|
||||||
|
]}),
|
||||||
|
dict(name='article', attrs={'class': [
|
||||||
|
'module article dropShadowBottomCurved',
|
||||||
|
'module blog dropShadowBottomCurved',
|
||||||
|
]}),
|
||||||
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='section', attrs={
|
dict(name='section', attrs={'class': ['socialTools', 'quickFixModule', 'continue-reading']}),
|
||||||
'class': ['socialTools', 'quickFixModule']}),
|
dict(attrs={'class':['socialShareAfterContent', 'socialShareModule', 'continue-reading', 'social-share-bottom list-inline']}),
|
||||||
dict(
|
dict(name='div', attrs={'id': ['relatedArticle']}),
|
||||||
attrs={'class': ['socialShareAfterContent', 'socialShareModule']}),
|
dict(name='ul', attrs={'id': [
|
||||||
|
'breadcrumbs',
|
||||||
|
'socialShare',
|
||||||
|
]}),
|
||||||
|
dict(name='div', attrs={'class': ['bannerAd hidden-sm hidden-md hidden-lg introAd']})
|
||||||
]
|
]
|
||||||
|
|
||||||
def is_link_wanted(self, url, a):
|
def is_link_wanted(self, url, a):
|
||||||
return a['class'] == 'next' and a.findParent('nav', attrs={'class': 'PaginationContent'}) is not None
|
return a['class'] == 'next' and a.findParent('nav', attrs={'class':'PaginationContent'}) is not None
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for img in soup.findAll('img', attrs={'data-img': True}):
|
for img in soup.findAll('img', attrs={'data-img':True}):
|
||||||
img['src'] = img['data-img']
|
img['src'] = img['data-img']
|
||||||
|
for img in soup.findAll('img', attrs={'data-original':True}):
|
||||||
|
img['src'] = img['data-original']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
def postprocess_html(self, soup, first_fetch):
|
||||||
for div in soup.findAll(attrs={'class': 'PaginationContent'}):
|
for div in soup.findAll(attrs={'class':'PaginationContent'}):
|
||||||
div.extract()
|
div.extract()
|
||||||
if not first_fetch:
|
if not first_fetch:
|
||||||
for h1 in soup.findAll('h1'):
|
for div in soup.findAll(attrs={'class':'meta'}):
|
||||||
h1.extract()
|
|
||||||
for div in soup.findAll(attrs={'class': 'meta'}):
|
|
||||||
div.extract()
|
div.extract()
|
||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user