Update New York Review of Books

This commit is contained in:
Kovid Goyal 2014-01-05 08:59:18 +05:30
parent e5939d3d3d
commit 47a9bbda79
2 changed files with 32 additions and 30 deletions

View File

@ -74,22 +74,23 @@ class NewYorkReviewOfBooks(BasicNewsRecipe):
self.log('Issue date:', date) self.log('Issue date:', date)
# Find TOC # Find TOC
toc = soup.find('div', attrs={'class':'current_issue'}).find('div', attrs={'class':'articles_list'}) tocs = soup.find('div', attrs={'class':'current_issue'}).findAll('div', attrs={'class':'articles_list'})
articles = [] articles = []
for div in toc.findAll('div', attrs={'class':'row'}): for toc in tocs:
h2 = div.find('h2') for div in toc.findAll('div', attrs={'class':'row'}):
title = self.tag_to_string(h2).strip() h2 = div.find('h2')
author = self.tag_to_string(div.find('div', attrs={'class':'author'})).strip() title = self.tag_to_string(h2).strip()
title = title + u' (%s)'%author author = self.tag_to_string(div.find('div', attrs={'class':'author'})).strip()
url = 'http://www.nybooks.com' + h2.find('a', href=True)['href'] title = title + u' (%s)'%author
desc = '' url = 'http://www.nybooks.com' + h2.find('a', href=True)['href']
for p in div.findAll('p', attrs={'class':lambda x: x and 'quiet' in x}): desc = ''
desc += self.tag_to_string(p) for p in div.findAll('p', attrs={'class':lambda x: x and 'quiet' in x}):
self.log('Found article:', title) desc += self.tag_to_string(p)
self.log('\t', url) self.log('Found article:', title)
self.log('\t', desc) self.log('\t', url)
articles.append({'title':title, 'url':url, 'date':'', self.log('\t', desc)
'description':desc}) articles.append({'title':title, 'url':url, 'date':'',
'description':desc})
return [('Current Issue', articles)] return [('Current Issue', articles)]

View File

@ -64,22 +64,23 @@ class NewYorkReviewOfBooks(BasicNewsRecipe):
self.log('Issue date:', date) self.log('Issue date:', date)
# Find TOC # Find TOC
toc = soup.find('div', attrs={'class':'current_issue'}).find('div', attrs={'class':'articles_list'}) tocs = soup.find('div', attrs={'class':'current_issue'}).findAll('div', attrs={'class':'articles_list'})
articles = [] articles = []
for div in toc.findAll('div', attrs={'class':'row'}): for toc in tocs:
h2 = div.find('h2') for div in toc.findAll('div', attrs={'class':'row'}):
title = self.tag_to_string(h2).strip() h2 = div.find('h2')
author = self.tag_to_string(div.find('div', attrs={'class':'author'})).strip() title = self.tag_to_string(h2).strip()
title = title + u' (%s)'%author author = self.tag_to_string(div.find('div', attrs={'class':'author'})).strip()
url = 'http://www.nybooks.com' + h2.find('a', href=True)['href'] title = title + u' (%s)'%author
desc = '' url = 'http://www.nybooks.com' + h2.find('a', href=True)['href']
for p in div.findAll('p', attrs={'class':lambda x: x and 'quiet' in x}): desc = ''
desc += self.tag_to_string(p) for p in div.findAll('p', attrs={'class':lambda x: x and 'quiet' in x}):
self.log('Found article:', title) desc += self.tag_to_string(p)
self.log('\t', url) self.log('Found article:', title)
self.log('\t', desc) self.log('\t', url)
articles.append({'title':title, 'url':url, 'date':'', self.log('\t', desc)
'description':desc}) articles.append({'title':title, 'url':url, 'date':'',
'description':desc})
return [('Current Issue', articles)] return [('Current Issue', articles)]