Update grantland.com

2025-12-17 18:45:04 -05:00 · 2012-01-13 08:15:59 +05:30 · 2012-01-13 08:15:59 +05:30 · 5416312344
commit 5416312344
parent 5dd6c7d9d1
1 changed files with 29 additions and 19 deletions
--- a/recipes/grantland.recipe
+++ b/recipes/grantland.recipe
@ -5,7 +5,7 @@ class GrantLand(BasicNewsRecipe):
 	title          = u"Grantland"
 	description    = 'Writings on Sports & Pop Culture'
 	language       = 'en'
-	__author__     = 'Barty'
+	__author__     = 'barty on mobileread.com forum'
 	max_articles_per_feed = 100
 	no_stylesheets = False
 	# auto_cleanup is too aggressive sometimes and we end up with blank articles
@ -57,33 +57,43 @@ class GrantLand(BasicNewsRecipe):
 			headers = soup.findAll('h2' if tag=='' else 'h3')
 			for header in headers:
-				tag = header.find('a')
+				tag = header.find('a',href=True)
-				if tag is None or not hasattr(tag,'href'):
+				if tag is None:
 					continue
 				url = tag['href']
 				if url.startswith('/'):
 					url = self.INDEX + url
 				if url in seen_urls:
 					continue
 				seen_urls.add(url)
 				title = self.tag_to_string(tag)
 				if 'Podcast:' in title or 'In Case You Missed It' in title:
 					continue
 				desc = dt = ''
-				par = header.parent
+				# get at the div that contains description and other info
-				#tag = par.find('cite')
+				div = header.parent.find('div')
-				#if tag is not None:
+				if div is not None:
-				#	desc = '['+self.tag_to_string(tag) + '] '
+					desc = self.tag_to_string(div)
-				tag = par.find('div')
+					dt = div.find('time')
-				if tag is not None:
+					if dt is not None:
-					desc = desc + self.tag_to_string(tag)
+						dt = self.tag_to_string( dt)
-					tag = tag.find('time')
+
-					if tag is not None:
+				# if div contains the same url that is in h2/h3
-						dt = self.tag_to_string( tag)
+				# that means this is a series split into multiple articles
 				if div.find('a',href=url):
 					self.log('\tFound series:', title)
 					# grab all articles in series
 					for tag in div.findAll('a',href=True):
 						url = tag['href']
 						if url in seen_urls:
 							continue
 						self.log('\t', url)
 						seen_urls.add(url)
 						articles.append({'title':title+' - '+self.tag_to_string( tag),
 							'url':url,'description':desc,'date':dt})
 				else:
 					self.log('\tFound article:', title)
 					self.log('\t', url)
 					seen_urls.add(url)
 					articles.append({'title':title,'url':url,'description':desc,'date':dt})
 				self.log('\tFound article:', title)
 				self.log('\t', url)
 				articles.append({'title':title,'url':url,'description':desc,'date':dt})
 				if len(articles) >= max_articles:
 					break