diff --git a/resources/recipes/chr_mon.recipe b/resources/recipes/chr_mon.recipe index f2fec1c24d..79c991efa8 100644 --- a/resources/recipes/chr_mon.recipe +++ b/resources/recipes/chr_mon.recipe @@ -1,19 +1,38 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Kovid Goyal and Sujata Raman, Lorenzo Vigentini' +__copyright__ = '2009, Kovid Goyal and Sujata Raman' +__version__ = 'v1.02' +__date__ = '10, January 2010' +__description__ = 'Providing context and clarity on national and international news, peoples and cultures' + +'''csmonitor.com''' + import re -from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class ChristianScienceMonitor(BasicNewsRecipe): - title = 'Christian Science Monitor' - description = 'Providing context and clarity on national and international news, peoples and cultures' - max_articles_per_feed = 20 - __author__ = 'Kovid Goyal and Sujata Raman' + author = 'Kovid Goyal, Sujata Raman and Lorenzo Vigentini' + description = 'Providing context and clarity on national and international news, peoples and cultures' + + cover_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif' + title = 'Christian Science Monitor' + publisher = 'The Christian Science Monitor' + category = 'News, politics, culture, economy, general interest' + language = 'en' encoding = 'utf-8' - no_stylesheets = True - use_embedded_content = False + timefmt = '[%a, %d %b, %Y]' + oldest_article = 16 + max_articles_per_feed = 20 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + no_stylesheets = True preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in @@ -55,33 +74,15 @@ class ChristianScienceMonitor(BasicNewsRecipe): ] keep_only_tags = [ - dict(name='div', attrs={'id':['story','main']}), + dict(name='div', attrs={'id':'mainColumn'}), ] remove_tags = [ dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}), - dict(name='div', attrs={'class':[ 'spacer3','divvy spacer7','comment','storyIncludeBottom']}), + dict(name='div', attrs={'class':['storyToolbar cfx','podStoryRel','spacer3','divvy spacer7','comment','storyIncludeBottom']}), dict(name='ul', attrs={'class':[ 'centerliststories']}) , dict(name='form', attrs={'id':[ 'commentform']}) , ] + remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']})] - def find_articles(self, section): - ans = [] - for x in section.findAll('head4'): - title = ' '.join(x.findAll(text=True)).strip() - a = x.find('a') - if not a: continue - href = a['href'] - ans.append({'title':title, 'url':href, 'description':'', 'date': strftime('%a, %d %b')}) - - #for x in ans: - # x['url'] += '/output/print' - return ans - - def postprocess_html(self, soup, first_fetch): - html = soup.find('html') - if html is None: - return soup - html.extract() - return html