mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-05 08:40:13 -04:00
Update The Atlantic
This commit is contained in:
parent
d545fe19c1
commit
f4e7908936
@ -38,8 +38,10 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
self.timefmt = ' [%s]'%ds
|
self.timefmt = ' [%s]'%ds
|
||||||
|
|
||||||
cover = soup.find('img', src=True, attrs={'class':'cover'})
|
cover = soup.find('img', src=True, attrs={'class':'cover'})
|
||||||
|
|
||||||
if cover is not None:
|
if cover is not None:
|
||||||
self.cover_url = cover['src'].replace(' ', '%20')
|
self.cover_url = re.sub('\s','%20',re.sub('jpg.*','jpg',cover['src']))
|
||||||
|
self.log(self.cover_url)
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
seen_titles = set([])
|
seen_titles = set([])
|
||||||
@ -47,18 +49,16 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
section_title = self.tag_to_string(section.find('h2'))
|
section_title = self.tag_to_string(section.find('h2'))
|
||||||
self.log('Found section:', section_title)
|
self.log('Found section:', section_title)
|
||||||
articles = []
|
articles = []
|
||||||
for post in section.findAll('div', attrs={'class':lambda x : x and
|
for post in section.findAll('h3', attrs={'class':'headline'}):
|
||||||
'post' in x}):
|
a = post.find('a', href=True)
|
||||||
h = post.find(['h3', 'h4'])
|
title = self.tag_to_string(a)
|
||||||
title = self.tag_to_string(h)
|
|
||||||
if title in seen_titles:
|
if title in seen_titles:
|
||||||
continue
|
continue
|
||||||
seen_titles.add(title)
|
seen_titles.add(title)
|
||||||
a = post.find('a', href=True)
|
|
||||||
url = a['href']
|
url = a['href']
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://www.theatlantic.com'+url
|
url = 'http://www.theatlantic.com'+url
|
||||||
p = post.find('p', attrs={'class':'dek'})
|
p = post.parent.find('p', attrs={'class':'dek'})
|
||||||
desc = None
|
desc = None
|
||||||
self.log('\tFound article:', title, 'at', url)
|
self.log('\tFound article:', title, 'at', url)
|
||||||
if p is not None:
|
if p is not None:
|
||||||
@ -69,19 +69,29 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
if articles:
|
if articles:
|
||||||
feeds.append((section_title, articles))
|
feeds.append((section_title, articles))
|
||||||
|
|
||||||
poems = []
|
rightContent=soup.find('div', attrs = {'class':'rightContent'})
|
||||||
self.log('Found section: Poems')
|
for module in rightContent.findAll('div', attrs={'class':'module'}):
|
||||||
pd = soup.find('h2', text='Poetry').parent.parent
|
section_title = self.tag_to_string(module.find('h2'))
|
||||||
for poem in pd.findAll('h4'):
|
articles = []
|
||||||
title = self.tag_to_string(poem)
|
for post in module.findAll('div', attrs={'class':'post'}):
|
||||||
url = poem.find('a')['href']
|
a = post.find('a', href=True)
|
||||||
if url.startswith('/'):
|
title = self.tag_to_string(a)
|
||||||
url = 'http://www.theatlantic.com' + url
|
if title in seen_titles:
|
||||||
self.log('\tFound article:', title, 'at', url)
|
continue
|
||||||
poems.append({'title':title, 'url':url, 'description':'',
|
seen_titles.add(title)
|
||||||
'date':''})
|
url = a['href']
|
||||||
if poems:
|
if url.startswith('/'):
|
||||||
feeds.append(('Poems', poems))
|
url = 'http://www.theatlantic.com'+url
|
||||||
|
p = post.parent.find('p', attrs={'class':'dek'})
|
||||||
|
desc = None
|
||||||
|
self.log('\tFound article:', title, 'at', url)
|
||||||
|
if p is not None:
|
||||||
|
desc = self.tag_to_string(p)
|
||||||
|
self.log('\t\t', desc)
|
||||||
|
articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
|
||||||
|
if articles:
|
||||||
|
feeds.append((section_title, articles))
|
||||||
|
|
||||||
|
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
@ -100,4 +110,3 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
table.replaceWith(div)
|
table.replaceWith(div)
|
||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user