Speedup economist download by not downloading images from the article carousel

2025-07-09 03:04:10 -04:00 · 2016-06-02 17:32:57 +05:30 · 2016-06-02 17:32:57 +05:30 · 3c27f28fdb
commit 3c27f28fdb
parent 7a9d29261d
2 changed files with 5 additions and 2 deletions
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@ -49,7 +49,7 @@ class Economist(BasicNewsRecipe):
                                 'main-content-container', 'ec-topic-widget']}),
            {'class': lambda x: x and 'share-links-header' in x},
    ]
-    keep_only_tags = [dict(name='article')]
+    keep_only_tags = [dict(name='article', id=lambda x: not x)]
    no_stylesheets = True
    preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
        lambda x:'</html>')]
@ -78,6 +78,7 @@ class Economist(BasicNewsRecipe):
        return self.economist_parse_index()

    def economist_parse_index(self):
+        # return [('Articles', [{'title':'test', 'url':'http://www.economist.com/news/americas/21699494-guide-cutting-corners-way-jos'}])]
        soup = self.index_to_soup(self.INDEX)
        div = soup.find('div', attrs={'class':'issue-image'})
        if div is not None:
--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@ -49,7 +49,7 @@ class Economist(BasicNewsRecipe):
                                 'main-content-container', 'ec-topic-widget']}),
            {'class': lambda x: x and 'share-links-header' in x},
    ]
-    keep_only_tags = [dict(name='article')]
+    keep_only_tags = [dict(name='article', id=lambda x: not x)]
    no_stylesheets = True
    preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
        lambda x:'</html>')]
@ -59,6 +59,7 @@ class Economist(BasicNewsRecipe):
    delay = 1

    needs_subscription = False
+
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        # Add a cookie indicating we have accepted Economist's cookie
@ -77,6 +78,7 @@ class Economist(BasicNewsRecipe):
        return self.economist_parse_index()

    def economist_parse_index(self):
+        # return [('Articles', [{'title':'test', 'url':'http://www.economist.com/news/americas/21699494-guide-cutting-corners-way-jos'}])]
        soup = self.index_to_soup(self.INDEX)
        div = soup.find('div', attrs={'class':'issue-image'})
        if div is not None: