Update Science Based News

This commit is contained in:
Kovid Goyal 2013-10-25 15:59:30 +05:30
parent a6567a7c03
commit 7360075e04

View File

@ -1,8 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class SBM(BasicNewsRecipe): class SBM(BasicNewsRecipe):
title = 'Science Based Medicine' title = 'Science Based Medicine'
@ -12,6 +10,7 @@ class SBM(BasicNewsRecipe):
max_articles_per_feed = 15 max_articles_per_feed = 15
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
auto_cleanup = True
encoding = 'utf-8' encoding = 'utf-8'
publisher = 'SBM' publisher = 'SBM'
category = 'science, sbm, ebm, blog, pseudoscience' category = 'science, sbm, ebm, blog, pseudoscience'
@ -27,16 +26,16 @@ class SBM(BasicNewsRecipe):
, 'pretty_print' : True , 'pretty_print' : True
} }
keep_only_tags = [ # keep_only_tags = [
dict(name='a', attrs={'title':re.compile(r'Posts by.*', re.DOTALL|re.IGNORECASE)}), #dict(name='a', attrs={'title':re.compile(r'Posts by.*', re.DOTALL|re.IGNORECASE)}),
dict(name='div', attrs={'class':'entry'}) #dict(name='div', attrs={'class':'entry'})
] #]
feeds = [(u'Science Based Medicine', u'http://www.sciencebasedmedicine.org/?feed=rss2')] feeds = [(u'Science Based Medicine', u'http://www.sciencebasedmedicine.org/?feed=rss2')]
def preprocess_html(self, soup): # def preprocess_html(self, soup):
mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')]) #mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')])
soup.head.insert(0,mtag) # soup.head.insert(0,mtag)
soup.html['lang'] = self.lang #soup.html['lang'] = self.lang
return self.adeify_images(soup) # return self.adeify_images(soup)