From 7360075e04e25e3bff31a22d86a1471597ee20c9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 25 Oct 2013 15:59:30 +0530 Subject: [PATCH] Update Science Based News --- recipes/science_based_medicine.recipe | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/recipes/science_based_medicine.recipe b/recipes/science_based_medicine.recipe index 7aa28cb170..352f42452f 100644 --- a/recipes/science_based_medicine.recipe +++ b/recipes/science_based_medicine.recipe @@ -1,8 +1,6 @@ #!/usr/bin/env python -import re from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class SBM(BasicNewsRecipe): title = 'Science Based Medicine' @@ -12,6 +10,7 @@ class SBM(BasicNewsRecipe): max_articles_per_feed = 15 no_stylesheets = True use_embedded_content = False + auto_cleanup = True encoding = 'utf-8' publisher = 'SBM' category = 'science, sbm, ebm, blog, pseudoscience' @@ -27,16 +26,16 @@ class SBM(BasicNewsRecipe): , 'pretty_print' : True } - keep_only_tags = [ - dict(name='a', attrs={'title':re.compile(r'Posts by.*', re.DOTALL|re.IGNORECASE)}), - dict(name='div', attrs={'class':'entry'}) - ] + # keep_only_tags = [ + #dict(name='a', attrs={'title':re.compile(r'Posts by.*', re.DOTALL|re.IGNORECASE)}), + #dict(name='div', attrs={'class':'entry'}) + #] feeds = [(u'Science Based Medicine', u'http://www.sciencebasedmedicine.org/?feed=rss2')] - def preprocess_html(self, soup): - mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')]) - soup.head.insert(0,mtag) - soup.html['lang'] = self.lang - return self.adeify_images(soup) + # def preprocess_html(self, soup): + #mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')]) + # soup.head.insert(0,mtag) + #soup.html['lang'] = self.lang + # return self.adeify_images(soup)