From 3c27f28fdbcd8dbe990b5b04517376f1df4f81f6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 2 Jun 2016 17:32:57 +0530 Subject: [PATCH] Speedup economist download by not downloading images from the article carousel --- recipes/economist.recipe | 3 ++- recipes/economist_free.recipe | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/recipes/economist.recipe b/recipes/economist.recipe index d9062a66a4..82c57a0028 100644 --- a/recipes/economist.recipe +++ b/recipes/economist.recipe @@ -49,7 +49,7 @@ class Economist(BasicNewsRecipe): 'main-content-container', 'ec-topic-widget']}), {'class': lambda x: x and 'share-links-header' in x}, ] - keep_only_tags = [dict(name='article')] + keep_only_tags = [dict(name='article', id=lambda x: not x)] no_stylesheets = True preprocess_regexps = [(re.compile('.*', re.DOTALL), lambda x:'')] @@ -78,6 +78,7 @@ class Economist(BasicNewsRecipe): return self.economist_parse_index() def economist_parse_index(self): + # return [('Articles', [{'title':'test', 'url':'http://www.economist.com/news/americas/21699494-guide-cutting-corners-way-jos'}])] soup = self.index_to_soup(self.INDEX) div = soup.find('div', attrs={'class':'issue-image'}) if div is not None: diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe index 357ec236ee..82c57a0028 100644 --- a/recipes/economist_free.recipe +++ b/recipes/economist_free.recipe @@ -49,7 +49,7 @@ class Economist(BasicNewsRecipe): 'main-content-container', 'ec-topic-widget']}), {'class': lambda x: x and 'share-links-header' in x}, ] - keep_only_tags = [dict(name='article')] + keep_only_tags = [dict(name='article', id=lambda x: not x)] no_stylesheets = True preprocess_regexps = [(re.compile('.*', re.DOTALL), lambda x:'')] @@ -59,6 +59,7 @@ class Economist(BasicNewsRecipe): delay = 1 needs_subscription = False + def get_browser(self): br = BasicNewsRecipe.get_browser(self) # Add a cookie indicating we have accepted Economist's cookie @@ -77,6 +78,7 @@ class Economist(BasicNewsRecipe): return self.economist_parse_index() def economist_parse_index(self): + # return [('Articles', [{'title':'test', 'url':'http://www.economist.com/news/americas/21699494-guide-cutting-corners-way-jos'}])] soup = self.index_to_soup(self.INDEX) div = soup.find('div', attrs={'class':'issue-image'}) if div is not None: