mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Update MIT Technology Review
This commit is contained in:
parent
ae285264c5
commit
c83d9e245c
@ -1,4 +1,3 @@
|
||||
import string
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TechnologyReview(BasicNewsRecipe):
|
||||
@ -11,56 +10,19 @@ class TechnologyReview(BasicNewsRecipe):
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 100
|
||||
No_stylesheets = True
|
||||
auto_cleanup = True
|
||||
extra_css = """
|
||||
.ArticleBody {font: normal; text-align: justify}
|
||||
.headline {font: bold x-large}
|
||||
.subheadline {font: italic large}
|
||||
"""
|
||||
feeds = [
|
||||
(u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'),
|
||||
(u'Web', u'http://feeds.technologyreview.com/technology_review_Web'),
|
||||
(u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'),
|
||||
(u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'),
|
||||
(u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'),
|
||||
(u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'),
|
||||
(u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech')
|
||||
]
|
||||
remove_attributes = ['width', 'align','cellspacing']
|
||||
(u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'),
|
||||
(u'Web', u'http://feeds.technologyreview.com/technology_review_Web'),
|
||||
(u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'),
|
||||
(u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'),
|
||||
(u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'),
|
||||
(u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'),
|
||||
(u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech')
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['CloseLink','footerAdDiv','copyright']}),
|
||||
]
|
||||
remove_tags_after = [dict(name='div', attrs={'id':'copyright'})]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', article.get('id', None))
|
||||
|
||||
def print_version(self, url):
|
||||
baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
|
||||
split1 = string.split(url,"/")
|
||||
xxx=split1 [4]
|
||||
split2= string.split(xxx,"/")
|
||||
s = baseurl + split2[0]
|
||||
return s
|
||||
|
||||
|
||||
def postprocess_html(self,soup, True):
|
||||
#remove picture
|
||||
headerhtml = soup.find(True, {'class':'header'})
|
||||
headerhtml.replaceWith("")
|
||||
|
||||
#remove close button
|
||||
closehtml = soup.find(True, {'class':'close'})
|
||||
closehtml.replaceWith("")
|
||||
|
||||
#remove banner advertisement
|
||||
bannerhtml = soup.find(True, {'class':'bannerad'})
|
||||
bannerhtml.replaceWith("")
|
||||
|
||||
#thanks kiklop74! This code removes all links from the text
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
|
||||
return soup
|
||||
|
Loading…
x
Reference in New Issue
Block a user