diff --git a/resources/recipes/foreign_policy.recipe b/resources/recipes/foreign_policy.recipe index 5af40a46a7..0d6f9984fd 100644 --- a/resources/recipes/foreign_policy.recipe +++ b/resources/recipes/foreign_policy.recipe @@ -1,28 +1,45 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +www.foreignpolicy.com +''' + from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1278059020(BasicNewsRecipe): - title = u'Foreign Policy' - pubisher = 'http://www.foreignpolicy.com' - category = 'Magazine, USA Foreign Policy' - __author__ = 'rty' - oldest_article = 7 - max_articles_per_feed = 100 - masthead_url = 'http://www.foreignpolicy.com/images/fp_logo.jpg' - feeds = [(u'Feed', u'http://www.foreignpolicy.com/node/feed')] - remove_javascript = True - use_embedded_content = False - no_stylesheets = True - language = 'en' - encoding = 'utf-8' - conversion_options = {'linearize_tables':True} +class ForeignPolicy(BasicNewsRecipe): + title = 'Foreign Policy' + __author__ = 'Darko Miletic' + description = 'International News' + publisher = 'Washingtonpost.Newsweek Interactive, LLC' + category = 'news, politics, USA' + oldest_article = 31 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'en' + remove_empty_feeds = True + extra_css = ' body{font-family: Georgia,"Times New Roman",Times,serif } img{margin-bottom: 0.4em} h1,h2,h3,h4,h5,h6{font-family: Arial,Helvetica,sans-serif} ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + keep_only_tags = [dict(attrs={'id':['art-mast','art-body','auth-bio']})] + remove_tags = [dict(name='iframe'),dict(attrs={'id':['share-box','base-ad']})] + remove_attributes = ['height','width'] + + + feeds = [(u'Articles', u'http://www.foreignpolicy.com/node/feed')] def print_version(self, url): - return url + '?print=yes&hidecomments=no&page=full' + return url + '?print=yes&page=full' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup - remove_tags = [ - dict(name='div', attrs={'id':'share-box'}), - ] - keep_only_tags = [ - # dict(name='div', attrs={'class':'cxArticleHeader'}), - dict(name='div', attrs={'id':['art-mast','art-body']}), - ]