mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Update MIT Technology Review
This commit is contained in:
parent
ae285264c5
commit
c83d9e245c
@ -1,4 +1,3 @@
|
|||||||
import string
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class TechnologyReview(BasicNewsRecipe):
|
class TechnologyReview(BasicNewsRecipe):
|
||||||
@ -11,56 +10,19 @@ class TechnologyReview(BasicNewsRecipe):
|
|||||||
oldest_article = 14
|
oldest_article = 14
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
No_stylesheets = True
|
No_stylesheets = True
|
||||||
|
auto_cleanup = True
|
||||||
extra_css = """
|
extra_css = """
|
||||||
.ArticleBody {font: normal; text-align: justify}
|
.ArticleBody {font: normal; text-align: justify}
|
||||||
.headline {font: bold x-large}
|
.headline {font: bold x-large}
|
||||||
.subheadline {font: italic large}
|
.subheadline {font: italic large}
|
||||||
"""
|
"""
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'),
|
(u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'),
|
||||||
(u'Web', u'http://feeds.technologyreview.com/technology_review_Web'),
|
(u'Web', u'http://feeds.technologyreview.com/technology_review_Web'),
|
||||||
(u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'),
|
(u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'),
|
||||||
(u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'),
|
(u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'),
|
||||||
(u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'),
|
(u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'),
|
||||||
(u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'),
|
(u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'),
|
||||||
(u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech')
|
(u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech')
|
||||||
]
|
]
|
||||||
remove_attributes = ['width', 'align','cellspacing']
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'id':['CloseLink','footerAdDiv','copyright']}),
|
|
||||||
]
|
|
||||||
remove_tags_after = [dict(name='div', attrs={'id':'copyright'})]
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
return article.get('guid', article.get('id', None))
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
|
|
||||||
split1 = string.split(url,"/")
|
|
||||||
xxx=split1 [4]
|
|
||||||
split2= string.split(xxx,"/")
|
|
||||||
s = baseurl + split2[0]
|
|
||||||
return s
|
|
||||||
|
|
||||||
|
|
||||||
def postprocess_html(self,soup, True):
|
|
||||||
#remove picture
|
|
||||||
headerhtml = soup.find(True, {'class':'header'})
|
|
||||||
headerhtml.replaceWith("")
|
|
||||||
|
|
||||||
#remove close button
|
|
||||||
closehtml = soup.find(True, {'class':'close'})
|
|
||||||
closehtml.replaceWith("")
|
|
||||||
|
|
||||||
#remove banner advertisement
|
|
||||||
bannerhtml = soup.find(True, {'class':'bannerad'})
|
|
||||||
bannerhtml.replaceWith("")
|
|
||||||
|
|
||||||
#thanks kiklop74! This code removes all links from the text
|
|
||||||
for alink in soup.findAll('a'):
|
|
||||||
if alink.string is not None:
|
|
||||||
tstr = alink.string
|
|
||||||
alink.replaceWith(tstr)
|
|
||||||
|
|
||||||
return soup
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user