From d92c03de7d18bca07dff9c7247591708304b6802 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 12 Jan 2011 09:21:20 -0700 Subject: [PATCH] Updated MIT Technology Review --- resources/recipes/technology_review.recipe | 23 +++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/resources/recipes/technology_review.recipe b/resources/recipes/technology_review.recipe index cc8f13733e..e7cc6700d7 100644 --- a/resources/recipes/technology_review.recipe +++ b/resources/recipes/technology_review.recipe @@ -35,7 +35,6 @@ class TechnologyReview(BasicNewsRecipe): def get_article_url(self, article): return article.get('guid', article.get('id', None)) - def print_version(self, url): baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id=' split1 = string.split(url,"/") @@ -43,3 +42,25 @@ class TechnologyReview(BasicNewsRecipe): split2= string.split(xxx,"/") s = baseurl + split2[0] return s + + + def postprocess_html(self,soup, True): + #remove picture + headerhtml = soup.find(True, {'class':'header'}) + headerhtml.replaceWith("") + + #remove close button + closehtml = soup.find(True, {'class':'close'}) + closehtml.replaceWith("") + + #remove banner advertisement + bannerhtml = soup.find(True, {'class':'bannerad'}) + bannerhtml.replaceWith("") + + #thanks kiklop74! This code removes all links from the text + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + + return soup