New recipe for The Reader's Digest by BrianG

2025-06-23 15:30:45 -04:00 · 2010-01-19 12:42:44 -07:00 · 2010-01-19 12:42:44 -07:00 · f226bdfe9d
commit f226bdfe9d
parent 4332e1a641
1 changed files with 188 additions and 0 deletions
--- a/resources/recipes/readers_digest.recipe
+++ b/resources/recipes/readers_digest.recipe
@ -0,0 +1,188 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+'''
+'''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.web.feeds import Feed
+
+
+class ReadersDigest(BasicNewsRecipe):
+
+    title       = 'Readers Digest'
+    __author__  = 'BrianG'
+    language = 'en'
+    description = 'Readers Digest Feeds'
+    no_stylesheets        = True
+    use_embedded_content  = False
+    oldest_article = 60
+    max_articles_per_feed = 200
+
+    language = 'en'
+    remove_javascript     = True
+
+    extra_css      = ''' h1 {font-family:georgia,serif;color:#000000;}
+                        .mainHd{font-family:georgia,serif;color:#000000;}
+                         h2 {font-family:Arial,Sans-serif;}
+                        .name{font-family:Arial,Sans-serif; font-size:x-small;font-weight:bold; }
+                        .date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
+                        .byline{font-family:Arial,Sans-serif; font-size:x-small ;}
+                        .photoBkt{ font-size:x-small ;}
+                        .vertPhoto{font-size:x-small ;}
+                        .credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
+                        .credit{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
+                        .artTxt{font-family:georgia,serif;}
+                        .caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
+                        .credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
+                        a:link{color:#CC0000;}
+                        .breadcrumb{font-family:Arial,Sans-serif;font-size:x-small;}
+                        '''
+
+
+    remove_tags = [
+        dict(name='h4', attrs={'class':'close'}),
+        dict(name='div', attrs={'class':'fromLine'}),
+        dict(name='img', attrs={'class':'colorTag'}),
+        dict(name='div', attrs={'id':'sponsorArticleHeader'}),
+        dict(name='div', attrs={'class':'horizontalAd'}),
+        dict(name='div', attrs={'id':'imageCounterLeft'}),
+        dict(name='div', attrs={'id':'commentsPrint'})
+        ]
+
+
+    feeds = [
+            ('New in RD', 'http://feeds.rd.com/ReadersDigest'),
+            ('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'),
+            ('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'),
+            ('Blogs','http://feeds.rd.com/ReadersDigestBlogs')
+        ]
+
+    cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
+
+
+
+#-------------------------------------------------------------------------------------------------
+
+    def print_version(self, url):
+
+        # Get the identity number of the current article and append it to the root print URL
+
+        if url.find('/article') > 0:
+            ident = url[url.find('/article')+8:url.find('.html?')-4]
+            url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
+
+        elif url.find('/post') > 0:
+
+            # in this case, have to get the page itself to derive the Print page.
+            soup = self.index_to_soup(url)
+            newsoup = soup.find('ul',attrs={'class':'printBlock'})
+            url = 'http://www.rd.com' + newsoup('a')[0]['href']
+            url = url[0:url.find('&Keep')]
+
+        return url
+
+#-------------------------------------------------------------------------------------------------
+
+    def parse_index(self):
+
+        pages = [
+                ('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
+                # useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
+                ('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
+
+            ]
+
+        feeds = []
+
+        for page in pages:
+            section, url, divider, attrList = page
+            newArticles = self.page_parse(url, divider, attrList)
+            feeds.append((section,newArticles))
+
+        # after the pages of the site have been processed, parse several RSS feeds for additional sections
+        newfeeds = Feed()
+        newfeeds = self.parse_rss()
+
+
+        # The utility code in parse_rss returns a Feed object.  Convert each feed/article combination into a form suitable
+        # for this module (parse_index).
+
+        for feed in newfeeds:
+            newArticles = []
+            for article in feed.articles:
+                newArt = {
+                            'title' : article.title,
+                            'url'   : article.url,
+                            'date'  : article.date,
+                            'description' : article.text_summary
+                        }
+                newArticles.append(newArt)
+
+
+            # New and Blogs should be the first two feeds.
+            if feed.title == 'New in RD':
+                feeds.insert(0,(feed.title,newArticles))
+            elif feed.title == 'Blogs':
+                feeds.insert(1,(feed.title,newArticles))
+            else:
+                feeds.append((feed.title,newArticles))
+
+
+        return feeds
+
+#-------------------------------------------------------------------------------------------------
+
+    def page_parse(self, mainurl, divider, attrList):
+
+        articles = []
+        mainsoup = self.index_to_soup(mainurl)
+        for item in mainsoup.findAll(attrs=attrList):
+            newArticle = {
+                        'title' : item('img')[0]['alt'],
+                        'url'   : 'http://www.rd.com'+item('a')[0]['href'],
+                        'date'  : '',
+                        'description' : ''
+                    }
+            articles.append(newArticle)
+
+
+
+        return articles
+
+
+
+#-------------------------------------------------------------------------------------------------
+
+    def parse_rss (self):
+
+        # Do the "official" parse_feeds first
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+
+        # Loop thru the articles in all feeds to find articles with "recipe" in it
+        recipeArticles = []
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if curarticle.title.upper().find('RECIPE') >= 0:
+                    recipeArticles.append(curarticle)
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        # If there are any recipes found, create a new Feed object and append.
+        if len(recipeArticles) > 0:
+            pfeed = Feed()
+            pfeed.title = 'Recipes'
+            pfeed.descrition = 'Recipe Feed (Virtual)'
+            pfeed.image_url  = None
+            pfeed.oldest_article = 30
+            pfeed.id_counter = len(recipeArticles)
+            # Create a new Feed, add the recipe articles, and then append
+            # to "official" list of feeds
+            pfeed.articles = recipeArticles[:]
+            feeds.append(pfeed)
+
+        return feeds
+