Update MIT Technology Review

This commit is contained in:
Kovid Goyal 2013-10-25 16:20:18 +05:30
parent ae285264c5
commit c83d9e245c

View File

@ -1,4 +1,3 @@
import string
from calibre.web.feeds.news import BasicNewsRecipe
class TechnologyReview(BasicNewsRecipe):
@ -11,6 +10,7 @@ class TechnologyReview(BasicNewsRecipe):
oldest_article = 14
max_articles_per_feed = 100
No_stylesheets = True
auto_cleanup = True
extra_css = """
.ArticleBody {font: normal; text-align: justify}
.headline {font: bold x-large}
@ -25,42 +25,4 @@ class TechnologyReview(BasicNewsRecipe):
(u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'),
(u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech')
]
remove_attributes = ['width', 'align','cellspacing']
remove_tags = [
dict(name='div', attrs={'id':['CloseLink','footerAdDiv','copyright']}),
]
remove_tags_after = [dict(name='div', attrs={'id':'copyright'})]
def get_article_url(self, article):
return article.get('guid', article.get('id', None))
def print_version(self, url):
baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
split1 = string.split(url,"/")
xxx=split1 [4]
split2= string.split(xxx,"/")
s = baseurl + split2[0]
return s
def postprocess_html(self,soup, True):
#remove picture
headerhtml = soup.find(True, {'class':'header'})
headerhtml.replaceWith("")
#remove close button
closehtml = soup.find(True, {'class':'close'})
closehtml.replaceWith("")
#remove banner advertisement
bannerhtml = soup.find(True, {'class':'bannerad'})
bannerhtml.replaceWith("")
#thanks kiklop74! This code removes all links from the text
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup