mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Updated Newsweek recipe
This commit is contained in:
parent
0744534b2d
commit
87f281cf4d
@ -11,7 +11,20 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
BASE_URL = 'http://www.newsweek.com'
|
BASE_URL = 'http://www.newsweek.com'
|
||||||
INDEX = BASE_URL+'/topics.html'
|
|
||||||
|
topics = {
|
||||||
|
'Culture' : '/tag/culture.html',
|
||||||
|
'Business' : '/tag/business.html',
|
||||||
|
'Society' : '/tag/society.html',
|
||||||
|
'Science' : '/tag/science.html',
|
||||||
|
'Education' : '/tag/education.html',
|
||||||
|
'Politics' : '/tag/politics.html',
|
||||||
|
'Health' : '/tag/health.html',
|
||||||
|
'World' : '/tag/world.html',
|
||||||
|
'Nation' : '/tag/nation.html',
|
||||||
|
'Technology' : '/tag/technology.html',
|
||||||
|
'Game Changers' : '/tag/game-changers.html',
|
||||||
|
}
|
||||||
|
|
||||||
keep_only_tags = dict(name='article', attrs={'class':'article-text'})
|
keep_only_tags = dict(name='article', attrs={'class':'article-text'})
|
||||||
remove_tags = [dict(attrs={'data-dartad':True})]
|
remove_tags = [dict(attrs={'data-dartad':True})]
|
||||||
@ -23,10 +36,9 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
return soup
|
return soup
|
||||||
|
|
||||||
def newsweek_sections(self):
|
def newsweek_sections(self):
|
||||||
soup = self.index_to_soup(self.INDEX)
|
for topic_name, topic_url in self.topics.iteritems():
|
||||||
for a in soup.findAll('a', title='Primary tag', href=True):
|
yield (topic_name,
|
||||||
yield (string.capitalize(self.tag_to_string(a)),
|
self.BASE_URL+topic_url)
|
||||||
self.BASE_URL+a['href'])
|
|
||||||
|
|
||||||
|
|
||||||
def newsweek_parse_section_page(self, soup):
|
def newsweek_parse_section_page(self, soup):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user