mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Smithsonian Magazine
This commit is contained in:
parent
6e53973981
commit
7c1b735906
@ -25,7 +25,7 @@ class Smithsonian(BasicNewsRecipe):
|
|||||||
soup = self.index_to_soup(current_issue_url)
|
soup = self.index_to_soup(current_issue_url)
|
||||||
|
|
||||||
#Go to the main body
|
#Go to the main body
|
||||||
div = soup.find ('div', attrs={'id':'content-inset'})
|
div = soup.find ('div', attrs={'id':'article-body'})
|
||||||
|
|
||||||
#Find date
|
#Find date
|
||||||
date = re.sub('.*\:\W*', "", self.tag_to_string(div.find('h2')).strip())
|
date = re.sub('.*\:\W*', "", self.tag_to_string(div.find('h2')).strip())
|
||||||
@ -36,35 +36,26 @@ class Smithsonian(BasicNewsRecipe):
|
|||||||
|
|
||||||
feeds = OrderedDict()
|
feeds = OrderedDict()
|
||||||
section_title = ''
|
section_title = ''
|
||||||
subsection_title = ''
|
articles = []
|
||||||
for post in div.findAll('div', attrs={'class':['plainModule', 'departments plainModule']}):
|
for post in div.findAll('div', attrs={'class':['plainModule', 'departments plainModule']}):
|
||||||
articles = []
|
h4=post.find('h3')
|
||||||
prefix = ''
|
if h4 is not None:
|
||||||
h3=post.find('h3')
|
if articles:
|
||||||
if h3 is not None:
|
if section_title not in feeds:
|
||||||
section_title = self.tag_to_string(h3)
|
feeds[section_title] = []
|
||||||
|
feeds[section_title] += articles
|
||||||
|
section_title = self.tag_to_string(h4)
|
||||||
|
articles = []
|
||||||
|
self.log('Found section:', section_title)
|
||||||
else:
|
else:
|
||||||
subsection=post.find('p',attrs={'class':'article-cat'})
|
|
||||||
link=post.find('a',href=True)
|
link=post.find('a',href=True)
|
||||||
url=link['href']+'?c=y&story=fullstory'
|
url=link['href']+'?c=y&story=fullstory'
|
||||||
if subsection is not None:
|
description=self.tag_to_string(post.find('p')).strip()
|
||||||
subsection_title = self.tag_to_string(subsection).strip()
|
|
||||||
prefix = (subsection_title+': ')
|
|
||||||
description=self.tag_to_string(post('p', limit=2)[1]).strip()
|
|
||||||
else:
|
|
||||||
if post.find('img') is not None:
|
|
||||||
subsection_title = self.tag_to_string(post.findPrevious('div', attrs={'class':'departments plainModule'}).find('p', attrs={'class':'article-cat'})).strip()
|
|
||||||
prefix = (subsection_title+': ')
|
|
||||||
|
|
||||||
description=self.tag_to_string(post.find('p')).strip()
|
|
||||||
desc=re.sub('\sBy\s.*', '', description, re.DOTALL)
|
desc=re.sub('\sBy\s.*', '', description, re.DOTALL)
|
||||||
author=re.sub('.*By\s', '', description, re.DOTALL)
|
author=re.sub('.*By\s', '', description, re.DOTALL)
|
||||||
title=prefix + self.tag_to_string(link).strip()+ u' (%s)'%author
|
title=self.tag_to_string(link).strip()+ u' (%s)'%author
|
||||||
|
self.log('\tFound article:', title)
|
||||||
articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
|
articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
|
||||||
|
|
||||||
if articles:
|
|
||||||
if section_title not in feeds:
|
|
||||||
feeds[section_title] = []
|
|
||||||
feeds[section_title] += articles
|
|
||||||
ans = [(key, val) for key, val in feeds.iteritems()]
|
ans = [(key, val) for key, val in feeds.iteritems()]
|
||||||
return ans
|
return ans
|
||||||
|
Loading…
x
Reference in New Issue
Block a user