Update cracked.com

2025-08-30 23:00:21 -04:00 · 2013-07-09 15:45:20 +05:30 · 2013-07-09 15:45:20 +05:30 · ed55e76ff4
commit ed55e76ff4
parent bba659b852
1 changed files with 30 additions and 38 deletions
--- a/recipes/cracked_com.recipe
+++ b/recipes/cracked_com.recipe
@ -1,5 +1,6 @@
 from calibre.web.feeds.news import BasicNewsRecipe

+
 class Cracked(BasicNewsRecipe):
    title = u'Cracked.com'
    __author__ = 'UnWeave'
@ -13,33 +14,23 @@ class Cracked(BasicNewsRecipe):
    encoding = 'ascii'
    remove_javascript = True
    use_embedded_content = False
+    # auto_cleanup = True

    feeds = [(u'Articles', u'http://feeds.feedburner.com/CrackedRSS/')]

    conversion_options = {
-                          'comment'   : description
-                        , 'tags'      : category
-                        , 'publisher' : publisher
-                        , 'language'  : language
+        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
    }

-    remove_tags_before = dict(id='PrimaryContent')
+    # remove_tags_before = dict(id='PrimaryContent')

-    remove_tags_after = dict(name='div', attrs={'class':'shareBar'})
+    keep_only_tags = dict(name='article', attrs={
+                          'class': 'module article dropShadowBottomCurved'})

-    remove_tags = [ dict(name='div', attrs={'class':['social',
-                                                     'FacebookLike',
-                                                     'shareBar'
-                                                     ]}),
+    # remove_tags_after = dict(name='div', attrs={'class':'shareBar'})

-                    dict(name='div', attrs={'id':['inline-share-buttons',
-                                                  ]}),
-
-                    dict(name='span', attrs={'class':['views',
-                                                      'KonaFilter'
-                                                      ]}),
-                    #dict(name='img'),
-                    ]
+    remove_tags = [
+        dict(name='section', attrs={'class': ['socialTools', 'quickFixModule']})]

    def appendPage(self, soup, appendTag, position):
        # Check if article has multiple pages
@ -51,7 +42,7 @@ class Cracked(BasicNewsRecipe):
                nextPageURL = nextPage['href']
                nextPageSoup = self.index_to_soup(nextPageURL)
                # 8th <section> tag contains article content
-                nextPageContent = nextPageSoup.findAll('section')[7]
+                nextPageContent = nextPageSoup.findAll('article')[0]
                newPosition = len(nextPageContent.contents)
                self.appendPage(nextPageSoup, nextPageContent, newPosition)
                nextPageContent.extract()
@ -61,3 +52,4 @@ class Cracked(BasicNewsRecipe):
    def preprocess_html(self, soup):
        self.appendPage(soup, soup.body, 3)
        return soup
+