mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
67 lines
2.4 KiB
Plaintext
67 lines
2.4 KiB
Plaintext
import string
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
class TechnologyReview(BasicNewsRecipe):
|
|
title = u'Technology Review'
|
|
__author__ = 'rty'
|
|
description = 'MIT Technology Magazine'
|
|
publisher = 'Technology Review Inc.'
|
|
category = 'Technology, Innovation, R&D'
|
|
language = 'en'
|
|
oldest_article = 14
|
|
max_articles_per_feed = 100
|
|
No_stylesheets = True
|
|
extra_css = """
|
|
.ArticleBody {font: normal; text-align: justify}
|
|
.headline {font: bold x-large}
|
|
.subheadline {font: italic large}
|
|
"""
|
|
feeds = [
|
|
(u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'),
|
|
(u'Web', u'http://feeds.technologyreview.com/technology_review_Web'),
|
|
(u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'),
|
|
(u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'),
|
|
(u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'),
|
|
(u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'),
|
|
(u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech')
|
|
]
|
|
remove_attributes = ['width', 'align','cellspacing']
|
|
|
|
remove_tags = [
|
|
dict(name='div', attrs={'id':['CloseLink','footerAdDiv','copyright']}),
|
|
]
|
|
remove_tags_after = [dict(name='div', attrs={'id':'copyright'})]
|
|
|
|
def get_article_url(self, article):
|
|
return article.get('guid', article.get('id', None))
|
|
|
|
def print_version(self, url):
|
|
baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
|
|
split1 = string.split(url,"/")
|
|
xxx=split1 [4]
|
|
split2= string.split(xxx,"/")
|
|
s = baseurl + split2[0]
|
|
return s
|
|
|
|
|
|
def postprocess_html(self,soup, True):
|
|
#remove picture
|
|
headerhtml = soup.find(True, {'class':'header'})
|
|
headerhtml.replaceWith("")
|
|
|
|
#remove close button
|
|
closehtml = soup.find(True, {'class':'close'})
|
|
closehtml.replaceWith("")
|
|
|
|
#remove banner advertisement
|
|
bannerhtml = soup.find(True, {'class':'bannerad'})
|
|
bannerhtml.replaceWith("")
|
|
|
|
#thanks kiklop74! This code removes all links from the text
|
|
for alink in soup.findAll('a'):
|
|
if alink.string is not None:
|
|
tstr = alink.string
|
|
alink.replaceWith(tstr)
|
|
|
|
return soup
|