From ed55e76ff4557c4c3558b28b6dcf29c452d9e9e1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 9 Jul 2013 15:45:20 +0530 Subject: [PATCH] Update cracked.com --- recipes/cracked_com.recipe | 68 +++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 38 deletions(-) diff --git a/recipes/cracked_com.recipe b/recipes/cracked_com.recipe index 829299ae17..a702f93433 100644 --- a/recipes/cracked_com.recipe +++ b/recipes/cracked_com.recipe @@ -1,63 +1,55 @@ from calibre.web.feeds.news import BasicNewsRecipe -class Cracked(BasicNewsRecipe): - title = u'Cracked.com' - __author__ = 'UnWeave' - language = 'en' - description = "America's Only HumorSite since 1958" - publisher = 'Cracked' - category = 'comedy, lists' - oldest_article = 3 #days - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'ascii' - remove_javascript = True - use_embedded_content = False - feeds = [ (u'Articles', u'http://feeds.feedburner.com/CrackedRSS/') ] +class Cracked(BasicNewsRecipe): + title = u'Cracked.com' + __author__ = 'UnWeave' + language = 'en' + description = "America's Only HumorSite since 1958" + publisher = 'Cracked' + category = 'comedy, lists' + oldest_article = 3 # days + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'ascii' + remove_javascript = True + use_embedded_content = False + # auto_cleanup = True + + feeds = [(u'Articles', u'http://feeds.feedburner.com/CrackedRSS/')] conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language - } + 'comment': description, 'tags': category, 'publisher': publisher, 'language': language + } - remove_tags_before = dict(id='PrimaryContent') + # remove_tags_before = dict(id='PrimaryContent') - remove_tags_after = dict(name='div', attrs={'class':'shareBar'}) + keep_only_tags = dict(name='article', attrs={ + 'class': 'module article dropShadowBottomCurved'}) - remove_tags = [ dict(name='div', attrs={'class':['social', - 'FacebookLike', - 'shareBar' - ]}), + # remove_tags_after = dict(name='div', attrs={'class':'shareBar'}) - dict(name='div', attrs={'id':['inline-share-buttons', - ]}), - - dict(name='span', attrs={'class':['views', - 'KonaFilter' - ]}), - #dict(name='img'), - ] + remove_tags = [ + dict(name='section', attrs={'class': ['socialTools', 'quickFixModule']})] def appendPage(self, soup, appendTag, position): # Check if article has multiple pages - pageNav = soup.find('nav', attrs={'class':'PaginationContent'}) + pageNav = soup.find('nav', attrs={'class': 'PaginationContent'}) if pageNav: # Check not at last page - nextPage = pageNav.find('a', attrs={'class':'next'}) + nextPage = pageNav.find('a', attrs={'class': 'next'}) if nextPage: nextPageURL = nextPage['href'] nextPageSoup = self.index_to_soup(nextPageURL) # 8th
tag contains article content - nextPageContent = nextPageSoup.findAll('section')[7] + nextPageContent = nextPageSoup.findAll('article')[0] newPosition = len(nextPageContent.contents) - self.appendPage(nextPageSoup,nextPageContent,newPosition) + self.appendPage(nextPageSoup, nextPageContent, newPosition) nextPageContent.extract() pageNav.extract() - appendTag.insert(position,nextPageContent) + appendTag.insert(position, nextPageContent) def preprocess_html(self, soup): self.appendPage(soup, soup.body, 3) return soup +