Fix recipe for The Christian Science Monitor

2025-09-29 15:31:08 -04:00 · 2010-01-11 19:42:16 -07:00 · 2010-01-11 19:42:16 -07:00 · f8840debaf
commit f8840debaf
parent 63a1434d62
1 changed files with 29 additions and 28 deletions
--- a/resources/recipes/chr_mon.recipe
+++ b/resources/recipes/chr_mon.recipe
@ -1,19 +1,38 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Kovid Goyal and Sujata Raman, Lorenzo Vigentini'
+__copyright__   = '2009, Kovid Goyal and Sujata Raman'
+__version__     = 'v1.02'
+__date__        = '10, January 2010'
+__description__ = 'Providing context and clarity on national and international news, peoples and cultures'
+
+'''csmonitor.com'''
+

 import re
-from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe

 class ChristianScienceMonitor(BasicNewsRecipe):

-    title = 'Christian Science Monitor'
+    author        = 'Kovid Goyal, Sujata Raman and Lorenzo Vigentini'
    description   = 'Providing context and clarity on national and international news, peoples and cultures'
-    max_articles_per_feed = 20
-    __author__ = 'Kovid Goyal and Sujata Raman'
+
+    cover_url      = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
+    title          = 'Christian Science Monitor'
+    publisher      = 'The Christian Science Monitor'
+    category       = 'News, politics, culture, economy, general interest'
+
    language = 'en'
    encoding = 'utf-8'
-    no_stylesheets = True
-    use_embedded_content   = False
+    timefmt        = '[%a, %d %b, %Y]'

+    oldest_article        = 16
+    max_articles_per_feed = 20
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets = True


    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
@ -55,33 +74,15 @@ class ChristianScienceMonitor(BasicNewsRecipe):
                     ]

    keep_only_tags = [
-                        dict(name='div', attrs={'id':['story','main']}),
+                        dict(name='div', attrs={'id':'mainColumn'}),
                        ]

    remove_tags    = [
                        dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}),
-                        dict(name='div', attrs={'class':[ 'spacer3','divvy spacer7','comment','storyIncludeBottom']}),
+                        dict(name='div', attrs={'class':['storyToolbar cfx','podStoryRel','spacer3','divvy spacer7','comment','storyIncludeBottom']}),
                        dict(name='ul', attrs={'class':[ 'centerliststories']}) ,
                        dict(name='form', attrs={'id':[ 'commentform']}) ,
                    ]

+    remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']})]

-    def find_articles(self, section):
-        ans = []
-        for x in section.findAll('head4'):
-            title = ' '.join(x.findAll(text=True)).strip()
-            a = x.find('a')
-            if not a: continue
-            href = a['href']
-            ans.append({'title':title, 'url':href, 'description':'', 'date': strftime('%a, %d %b')})
-
-        #for x in ans:
-        #    x['url'] += '/output/print'
-        return ans
-
-    def postprocess_html(self, soup, first_fetch):
-        html = soup.find('html')
-        if html is None:
-            return soup
-        html.extract()
-        return html