diff --git a/recipes/technology_review.recipe b/recipes/technology_review.recipe index e7cc6700d7..089f5b00d0 100644 --- a/recipes/technology_review.recipe +++ b/recipes/technology_review.recipe @@ -1,4 +1,3 @@ -import string from calibre.web.feeds.news import BasicNewsRecipe class TechnologyReview(BasicNewsRecipe): @@ -11,56 +10,19 @@ class TechnologyReview(BasicNewsRecipe): oldest_article = 14 max_articles_per_feed = 100 No_stylesheets = True + auto_cleanup = True extra_css = """ .ArticleBody {font: normal; text-align: justify} .headline {font: bold x-large} .subheadline {font: italic large} """ feeds = [ - (u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'), - (u'Web', u'http://feeds.technologyreview.com/technology_review_Web'), - (u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'), - (u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'), - (u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'), - (u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'), - (u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech') - ] - remove_attributes = ['width', 'align','cellspacing'] + (u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'), + (u'Web', u'http://feeds.technologyreview.com/technology_review_Web'), + (u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'), + (u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'), + (u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'), + (u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'), + (u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech') + ] - remove_tags = [ - dict(name='div', attrs={'id':['CloseLink','footerAdDiv','copyright']}), - ] - remove_tags_after = [dict(name='div', attrs={'id':'copyright'})] - - def get_article_url(self, article): - return article.get('guid', article.get('id', None)) - - def print_version(self, url): - baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id=' - split1 = string.split(url,"/") - xxx=split1 [4] - split2= string.split(xxx,"/") - s = baseurl + split2[0] - return s - - - def postprocess_html(self,soup, True): - #remove picture - headerhtml = soup.find(True, {'class':'header'}) - headerhtml.replaceWith("") - - #remove close button - closehtml = soup.find(True, {'class':'close'}) - closehtml.replaceWith("") - - #remove banner advertisement - bannerhtml = soup.find(True, {'class':'bannerad'}) - bannerhtml.replaceWith("") - - #thanks kiklop74! This code removes all links from the text - for alink in soup.findAll('a'): - if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - - return soup