Fix recipe for The Christian Science Monitor

2025-10-22 14:28:55 -04:00 · 2010-01-11 19:42:16 -07:00 · 2010-01-11 19:42:16 -07:00 · f8840debaf
commit f8840debaf
parent 63a1434d62
1 changed files with 29 additions and 28 deletions
--- a/resources/recipes/chr_mon.recipe
+++ b/resources/recipes/chr_mon.recipe
@ -1,19 +1,38 @@
 #!/usr/bin/env  python
 __license__     = 'GPL v3'
 __author__      = 'Kovid Goyal and Sujata Raman, Lorenzo Vigentini'
 __copyright__   = '2009, Kovid Goyal and Sujata Raman'
 __version__     = 'v1.02'
 __date__        = '10, January 2010'
 __description__ = 'Providing context and clarity on national and international news, peoples and cultures'
 '''csmonitor.com'''
 import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class ChristianScienceMonitor(BasicNewsRecipe):
-    title = 'Christian Science Monitor'
+    author        = 'Kovid Goyal, Sujata Raman and Lorenzo Vigentini'
-    description = 'Providing context and clarity on national and international news, peoples and cultures'
+    description   = 'Providing context and clarity on national and international news, peoples and cultures'
-    max_articles_per_feed = 20
+
-    __author__ = 'Kovid Goyal and Sujata Raman'
+    cover_url      = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
    title          = 'Christian Science Monitor'
    publisher      = 'The Christian Science Monitor'
    category       = 'News, politics, culture, economy, general interest'
    language = 'en'
    encoding = 'utf-8'
-    no_stylesheets = True
+    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content   = False
    oldest_article        = 16
    max_articles_per_feed = 20
    use_embedded_content  = False
    recursion             = 10
    remove_javascript     = True
    no_stylesheets = True
    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
@ -55,33 +74,15 @@ class ChristianScienceMonitor(BasicNewsRecipe):
                     ]
    keep_only_tags = [
-                        dict(name='div', attrs={'id':['story','main']}),
+                        dict(name='div', attrs={'id':'mainColumn'}),
                        ]
    remove_tags    = [
                        dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}),
-                        dict(name='div', attrs={'class':[ 'spacer3','divvy spacer7','comment','storyIncludeBottom']}),
+                        dict(name='div', attrs={'class':['storyToolbar cfx','podStoryRel','spacer3','divvy spacer7','comment','storyIncludeBottom']}),
                        dict(name='ul', attrs={'class':[ 'centerliststories']}) ,
                        dict(name='form', attrs={'id':[ 'commentform']}) ,
                    ]
    remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']})]
    def find_articles(self, section):
        ans = []
        for x in section.findAll('head4'):
            title = ' '.join(x.findAll(text=True)).strip()
            a = x.find('a')
            if not a: continue
            href = a['href']
            ans.append({'title':title, 'url':href, 'description':'', 'date': strftime('%a, %d %b')})
        #for x in ans:
        #    x['url'] += '/output/print'
        return ans
    def postprocess_html(self, soup, first_fetch):
        html = soup.find('html')
        if html is None:
            return soup
        html.extract()
        return html