From b3b7918e5aae6e4fcad2395762a4b1b2f6be8878 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 15 Aug 2014 16:58:40 +0530 Subject: [PATCH] Update The Atlantic --- recipes/atlantic.recipe | 115 +++++++++++----------------------------- 1 file changed, 32 insertions(+), 83 deletions(-) diff --git a/recipes/atlantic.recipe b/recipes/atlantic.recipe index 1fca16827c..22380221db 100644 --- a/recipes/atlantic.recipe +++ b/recipes/atlantic.recipe @@ -6,107 +6,56 @@ __copyright__ = '2008, Kovid Goyal ' theatlantic.com ''' import re - from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag, NavigableString class TheAtlantic(BasicNewsRecipe): title = 'The Atlantic' - __author__ = 'Kovid Goyal and Sujata Raman' + __author__ = 'Kovid Goyal' description = 'Current affairs and politics focussed on the US' INDEX = 'http://www.theatlantic.com/magazine/toc/0/' language = 'en' + encoding = 'utf-8' - keep_only_tags = [{'attrs':{'class':['article', 'articleHead', 'articleText']}}] - remove_tags = [dict(attrs={'class':'footer'})] + keep_only_tags = [ + {'attrs':{'class':['article-header', 'article-body', 'article-magazine']}}, + ] + remove_tags = [ + {'name': ['meta', 'link']}, + {'attrs':{'class':['offset-wrapper']}}, + {'attrs':{'class':lambda x: x and 'article-tools' in x}}, + ] no_stylesheets = True - preprocess_regexps = [ (re.compile(r'', re.DOTALL), lambda m: ''), (re.compile(r'.*