mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #7917 (New Scientist recipe update)
This commit is contained in:
parent
5ce7afa6e2
commit
8289d68454
@ -5,6 +5,7 @@ newscientist.com
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import urllib
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class NewScientist(BasicNewsRecipe):
|
class NewScientist(BasicNewsRecipe):
|
||||||
@ -24,7 +25,7 @@ class NewScientist(BasicNewsRecipe):
|
|||||||
needs_subscription = 'optional'
|
needs_subscription = 'optional'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family: Arial,sans-serif}
|
body{font-family: Arial,sans-serif}
|
||||||
img{margin-bottom: 0.8em}
|
img{margin-bottom: 0.8em; display: block}
|
||||||
.quotebx{font-size: x-large; font-weight: bold; margin-right: 2em; margin-left: 2em}
|
.quotebx{font-size: x-large; font-weight: bold; margin-right: 2em; margin-left: 2em}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -41,12 +42,14 @@ class NewScientist(BasicNewsRecipe):
|
|||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
br.open('http://www.newscientist.com/')
|
br.open('http://www.newscientist.com/')
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('https://www.newscientist.com/user/login?redirectURL=')
|
br.open('https://www.newscientist.com/user/login')
|
||||||
br.select_form(nr=2)
|
data = urllib.urlencode({ 'source':'form'
|
||||||
br['loginId' ] = self.username
|
,'redirectURL':''
|
||||||
br['password'] = self.password
|
,'loginId':self.username
|
||||||
br.submit()
|
,'password':self.password
|
||||||
|
})
|
||||||
|
br.open('https://www.newscientist.com/user/login',data)
|
||||||
return br
|
return br
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -55,21 +58,22 @@ class NewScientist(BasicNewsRecipe):
|
|||||||
,dict(name='p' , attrs={'class':['marker','infotext' ]})
|
,dict(name='p' , attrs={'class':['marker','infotext' ]})
|
||||||
,dict(name='meta' , attrs={'name' :'description' })
|
,dict(name='meta' , attrs={'name' :'description' })
|
||||||
,dict(name='a' , attrs={'rel' :'tag' })
|
,dict(name='a' , attrs={'rel' :'tag' })
|
||||||
|
,dict(name='ul' , attrs={'class':'markerlist' })
|
||||||
,dict(name=['link','base','meta','iframe','object','embed'])
|
,dict(name=['link','base','meta','iframe','object','embed'])
|
||||||
]
|
]
|
||||||
remove_tags_after = dict(attrs={'class':['nbpcopy','comments']})
|
remove_tags_after = dict(attrs={'class':['nbpcopy','comments']})
|
||||||
remove_attributes = ['height','width','lang']
|
remove_attributes = ['height','width','lang','onclick']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Latest Headlines' , u'http://feeds.newscientist.com/science-news' )
|
(u'Latest Headlines' , u'http://feeds.newscientist.com/science-news' )
|
||||||
,(u'Magazine' , u'http://www.newscientist.com/feed/magazine' )
|
,(u'Magazine' , u'http://feeds.newscientist.com/magazine' )
|
||||||
,(u'Health' , u'http://www.newscientist.com/feed/view?id=2&type=channel' )
|
,(u'Health' , u'http://feeds.newscientist.com/health' )
|
||||||
,(u'Life' , u'http://www.newscientist.com/feed/view?id=3&type=channel' )
|
,(u'Life' , u'http://feeds.newscientist.com/life' )
|
||||||
,(u'Space' , u'http://www.newscientist.com/feed/view?id=6&type=channel' )
|
,(u'Space' , u'http://feeds.newscientist.com/space' )
|
||||||
,(u'Physics and Mathematics' , u'http://www.newscientist.com/feed/view?id=4&type=channel' )
|
,(u'Physics and Mathematics' , u'http://feeds.newscientist.com/physics-math' )
|
||||||
,(u'Environment' , u'http://www.newscientist.com/feed/view?id=1&type=channel' )
|
,(u'Environment' , u'http://feeds.newscientist.com/environment' )
|
||||||
,(u'Science in Society' , u'http://www.newscientist.com/feed/view?id=5&type=channel' )
|
,(u'Science in Society' , u'http://feeds.newscientist.com/science-in-society' )
|
||||||
,(u'Tech' , u'http://www.newscientist.com/feed/view?id=7&type=channel' )
|
,(u'Tech' , u'http://feeds.newscientist.com/tech' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
@ -79,11 +83,21 @@ class NewScientist(BasicNewsRecipe):
|
|||||||
return url + '?full=true&print=true'
|
return url + '?full=true&print=true'
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
if soup.html.has_key('id'):
|
||||||
|
del soup.html['id']
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
for item in soup.findAll(['quote','quotetext']):
|
for item in soup.findAll(['quote','quotetext']):
|
||||||
item.name='p'
|
item.name='p'
|
||||||
|
for item in soup.findAll(['xref','figref']):
|
||||||
|
tstr = item.string
|
||||||
|
item.replaceWith(tstr)
|
||||||
for tg in soup.findAll('a'):
|
for tg in soup.findAll('a'):
|
||||||
if tg.string == 'Home':
|
if tg.string == 'Home':
|
||||||
tg.parent.extract()
|
tg.parent.extract()
|
||||||
return self.adeify_images(soup)
|
else:
|
||||||
return self.adeify_images(soup)
|
if tg.string is not None:
|
||||||
|
tstr = tg.string
|
||||||
|
tg.replaceWith(tstr)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user