diff --git a/recipes/chr_mon.recipe b/recipes/chr_mon.recipe index 50b626fcbf..12731e20d5 100644 --- a/recipes/chr_mon.recipe +++ b/recipes/chr_mon.recipe @@ -1,152 +1,111 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__author__ = 'Kovid Goyal and Sujata Raman, Lorenzo Vigentini' -__copyright__ = '2009, Kovid Goyal and Sujata Raman' -__version__ = 'v1.02' -__date__ = '10, January 2010' -__description__ = 'Providing context and clarity on national and international news, peoples and cultures' +__license__ = 'GPL v3' +__copyright__ = '2012, Darko Miletic ' +''' +www.csmonitor.com +''' -'''csmonitor.com''' - -import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup - -class ChristianScienceMonitor(BasicNewsRecipe): - - __author__ = 'Kovid Goyal' - description = 'Providing context and clarity on national and international news, peoples and cultures' - - cover_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif' - title = 'Christian Science Monitor' - publisher = 'The Christian Science Monitor' - category = 'News, politics, culture, economy, general interest' - - language = 'en' - encoding = 'utf-8' - timefmt = '[%a, %d %b, %Y]' - - oldest_article = 16 - max_articles_per_feed = 20 +class CSMonitor(BasicNewsRecipe): + title = 'The Christian Science Monitor - daily' + __author__ = 'Darko Miletic' + description = 'The Christian Science Monitor is an international news organization that delivers thoughtful, global coverage via its website, weekly magazine, daily news briefing, and email newsletters.' + publisher = 'The Christian Science Monitor' + category = 'news, politics, USA' + oldest_article = 2 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' use_embedded_content = False - recursion = 10 + language = 'en' + remove_empty_feeds = True + publication_type = 'newspaper' + masthead_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif' + extra_css = """ + body{font-family: Arial,Tahoma,Verdana,Helvetica,sans-serif } + img{margin-bottom: 0.4em; display:block} + .head {font-family: Georgia,"Times New Roman",Times,serif} + .sByline,.caption{font-size: x-small} + .hide{display: none} + .sLoc{font-weight: bold} + ul{list-style-type: none} + """ - remove_javascript = True - no_stylesheets = True - requires_version = (0, 8, 39) + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } - def preprocess_raw_html(self, raw, url): - try: - from html5lib import parse - root = parse(raw, namespaceHTMLElements=False, - treebuilder='lxml').getroot() - from lxml import etree - for tag in root.xpath( - '//script|//style|//noscript|//meta|//link|//object'): - tag.getparent().remove(tag) - for elem in list(root.iterdescendants(tag=etree.Comment)): - elem.getparent().remove(elem) - ans = etree.tostring(root, encoding=unicode) - ans = re.sub('.* listid and + isinstance(booklists[listid], self.booklist_class)): if not os.path.exists(prefix): os.makedirs(self.normalize_path(prefix)) with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f: