Update cracked.com

2025-07-09 03:04:10 -04:00 · 2013-07-09 15:45:20 +05:30 · 2013-07-09 15:45:20 +05:30 · ed55e76ff4
commit ed55e76ff4
parent bba659b852
1 changed files with 30 additions and 38 deletions
--- a/recipes/cracked_com.recipe
+++ b/recipes/cracked_com.recipe
@ -1,63 +1,55 @@
 from calibre.web.feeds.news import BasicNewsRecipe

-class Cracked(BasicNewsRecipe):
-    title                 = u'Cracked.com'
-    __author__            = 'UnWeave'
-    language              = 'en'
-    description           = "America's Only HumorSite since 1958"
-    publisher             = 'Cracked'
-    category              = 'comedy, lists'
-    oldest_article        = 3 #days
-    max_articles_per_feed = 100
-    no_stylesheets        = True
-    encoding              = 'ascii'
-    remove_javascript     = True
-    use_embedded_content  = False

-    feeds = [ (u'Articles', u'http://feeds.feedburner.com/CrackedRSS/') ]
+class Cracked(BasicNewsRecipe):
+    title = u'Cracked.com'
+    __author__ = 'UnWeave'
+    language = 'en'
+    description = "America's Only HumorSite since 1958"
+    publisher = 'Cracked'
+    category = 'comedy, lists'
+    oldest_article = 3  # days
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    encoding = 'ascii'
+    remove_javascript = True
+    use_embedded_content = False
+    # auto_cleanup = True
+
+    feeds = [(u'Articles', u'http://feeds.feedburner.com/CrackedRSS/')]

    conversion_options = {
-                          'comment'   : description
-                        , 'tags'      : category
-                        , 'publisher' : publisher
-                        , 'language'  : language
-                        }
+        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
+    }

-    remove_tags_before = dict(id='PrimaryContent')
+    # remove_tags_before = dict(id='PrimaryContent')

-    remove_tags_after = dict(name='div', attrs={'class':'shareBar'})
+    keep_only_tags = dict(name='article', attrs={
+                          'class': 'module article dropShadowBottomCurved'})

-    remove_tags = [ dict(name='div', attrs={'class':['social',
-                                                     'FacebookLike',
-                                                     'shareBar'
-                                                     ]}),
+    # remove_tags_after = dict(name='div', attrs={'class':'shareBar'})

-                    dict(name='div', attrs={'id':['inline-share-buttons',
-                                                  ]}),
-
-                    dict(name='span', attrs={'class':['views',
-                                                      'KonaFilter'
-                                                      ]}),
-                    #dict(name='img'),
-                    ]
+    remove_tags = [
+        dict(name='section', attrs={'class': ['socialTools', 'quickFixModule']})]

    def appendPage(self, soup, appendTag, position):
        # Check if article has multiple pages
-        pageNav = soup.find('nav', attrs={'class':'PaginationContent'})
+        pageNav = soup.find('nav', attrs={'class': 'PaginationContent'})
        if pageNav:
            # Check not at last page
-            nextPage = pageNav.find('a', attrs={'class':'next'})
+            nextPage = pageNav.find('a', attrs={'class': 'next'})
            if nextPage:
                nextPageURL = nextPage['href']
                nextPageSoup = self.index_to_soup(nextPageURL)
                # 8th <section> tag contains article content
-                nextPageContent = nextPageSoup.findAll('section')[7]
+                nextPageContent = nextPageSoup.findAll('article')[0]
                newPosition = len(nextPageContent.contents)
-                self.appendPage(nextPageSoup,nextPageContent,newPosition)
+                self.appendPage(nextPageSoup, nextPageContent, newPosition)
                nextPageContent.extract()
                pageNav.extract()
-                appendTag.insert(position,nextPageContent)
+                appendTag.insert(position, nextPageContent)

    def preprocess_html(self, soup):
        self.appendPage(soup, soup.body, 3)
        return soup
+