From b0b904fbf13ce00dd37b4f755980abc31370d498 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 1 Aug 2010 08:51:10 -0600 Subject: [PATCH] Fix #6373 (Updated Recipe:Scientific American) --- resources/recipes/scientific_american.recipe | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/resources/recipes/scientific_american.recipe b/resources/recipes/scientific_american.recipe index 15d82a2cb5..3970684788 100644 --- a/resources/recipes/scientific_american.recipe +++ b/resources/recipes/scientific_american.recipe @@ -14,7 +14,7 @@ class ScientificAmerican(BasicNewsRecipe): description = u'Popular science. Monthly magazine.' __author__ = 'Kovid Goyal and Sujata Raman' language = 'en' - + remove_javascript = True oldest_article = 30 max_articles_per_feed = 100 no_stylesheets = True @@ -31,11 +31,13 @@ class ScientificAmerican(BasicNewsRecipe): remove_tags_after = dict(id=['article']) remove_tags = [ dict(id=['sharetools', 'reddit']), - dict(name='script'), + #dict(name='script'), {'class':['float_left', 'atools']}, {"class": re.compile(r'also-in-this')}, dict(name='a',title = ["Get the Rest of the Article","Subscribe","Buy this Issue"]), dict(name = 'img',alt = ["Graphic - Get the Rest of the Article"]), + dict(name='div', attrs={'class':['commentbox']}), + dict(name='h2', attrs={'class':['discuss_h2']}), ] html2lrf_options = ['--base-font-size', '8'] @@ -110,3 +112,10 @@ class ScientificAmerican(BasicNewsRecipe): div.extract() return soup + + preprocess_regexps = [ + (re.compile(r'Already a Digital subscriber.*Now', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(r'If your institution has site license access, enter.*here.', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(r'to subscribe to our.*;.*\}', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(r'\)\(jQuery\);.*-->', re.DOTALL|re.IGNORECASE), lambda match: ''), + ]