improvements to atlantic recipe

removed some things that were uglifying it. added imgs in addition to lead. smaller captions to distinguish from body. right-aligned credits
2025-07-09 03:04:10 -04:00 · 2017-02-17 22:08:44 -06:00 · 2017-02-17 22:08:44 -06:00 · a896d661f1
commit a896d661f1
parent 7cfda558ed
1 changed files with 14 additions and 8 deletions
--- a/recipes/atlantic.recipe
+++ b/recipes/atlantic.recipe
@ -26,19 +26,25 @@ class TheAtlantic(BasicNewsRecipe):
    keep_only_tags = [
        classes(
-            'article-header article-body article-magazine article-cover-content lead-img'),
+            'article-header article-body article-magazine article-cover-content article-cover-extra lead-img '),
-    ]
+            {'name': ['img']},
 		]
    remove_tags = [
-        {'name': ['meta', 'link', 'noscript']},
+        classes( 'social-kit-top letter-writer-info callout secondary-byline embed-wrapper offset-wrapper boxtop-most-popular'),
-        {'attrs': {'class': ['offset-wrapper', 'ad-boxfeatures-wrapper']}},
+        {'name': ['meta', 'link', 'noscript', 'aside', 'h3']},
        {'attrs': {'class': ['offset-wrapper', 'boxtop-most-popular']}},
        {'attrs': {'class': lambda x: x and 'article-tools' in x}},
        {'src': lambda x: x and 'spotxchange.com' in x},
    ]
    remove_tags_after = classes('article-body')
-
+	
    no_stylesheets = True
    remove_attributes = ['style']
-
+    extra_css = '''
                .credit { text-align: right; font-size: 75%; display: block }
 				.figcaption { font-size: 75% }
 				.caption { font-size: 75% }
 				.lead-img { display: block }'''
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.set_cookie('inEuropeanUnion', '0', '.theatlantic.com')
@ -51,8 +57,8 @@ class TheAtlantic(BasicNewsRecipe):
        return url + '?single_page=true'
    def preprocess_html(self, soup):
-        for img in soup.findAll('img', attrs={'data-src': True}):
+        for img in soup.findAll('img', attrs={'data-srcset': True}):
-            img['src'] = img['data-src']
+            img['src'] = img['data-srcset']
        return soup
    def parse_index(self):