Update MIT Technology Review

This commit is contained in:
Kovid Goyal 2013-10-25 16:20:18 +05:30
parent ae285264c5
commit c83d9e245c

View File

@ -1,4 +1,3 @@
import string
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class TechnologyReview(BasicNewsRecipe): class TechnologyReview(BasicNewsRecipe):
@ -11,56 +10,19 @@ class TechnologyReview(BasicNewsRecipe):
oldest_article = 14 oldest_article = 14
max_articles_per_feed = 100 max_articles_per_feed = 100
No_stylesheets = True No_stylesheets = True
auto_cleanup = True
extra_css = """ extra_css = """
.ArticleBody {font: normal; text-align: justify} .ArticleBody {font: normal; text-align: justify}
.headline {font: bold x-large} .headline {font: bold x-large}
.subheadline {font: italic large} .subheadline {font: italic large}
""" """
feeds = [ feeds = [
(u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'), (u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'),
(u'Web', u'http://feeds.technologyreview.com/technology_review_Web'), (u'Web', u'http://feeds.technologyreview.com/technology_review_Web'),
(u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'), (u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'),
(u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'), (u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'),
(u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'), (u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'),
(u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'), (u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'),
(u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech') (u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech')
] ]
remove_attributes = ['width', 'align','cellspacing']
remove_tags = [
dict(name='div', attrs={'id':['CloseLink','footerAdDiv','copyright']}),
]
remove_tags_after = [dict(name='div', attrs={'id':'copyright'})]
def get_article_url(self, article):
return article.get('guid', article.get('id', None))
def print_version(self, url):
baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
split1 = string.split(url,"/")
xxx=split1 [4]
split2= string.split(xxx,"/")
s = baseurl + split2[0]
return s
def postprocess_html(self,soup, True):
#remove picture
headerhtml = soup.find(True, {'class':'header'})
headerhtml.replaceWith("")
#remove close button
closehtml = soup.find(True, {'class':'close'})
closehtml.replaceWith("")
#remove banner advertisement
bannerhtml = soup.find(True, {'class':'bannerad'})
bannerhtml.replaceWith("")
#thanks kiklop74! This code removes all links from the text
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup