ga - More resilient with different title/author formatting

This commit is contained in:
Gary Arnold 2018-03-05 14:19:30 -08:00
parent 76c2d5a545
commit 840ca8092b

View File

@ -257,9 +257,20 @@ class Granta(BasicNewsRecipe):
# Either user is logged in or the article is unlocked
h1 = toc.find('h1')
h2 = toc.find('h2')
title = h1.find('a').contents[0].strip()
author = h2.find('a').contents[0].strip()
title = title + u' (%s)' % author
if h1.find('a') is not None and h1.find('a').contents is not None:
title = h1.find('a').contents[0].strip()
elif h1.contents[0] is not None:
title = h1.contents[0]
else:
title = ''
if h2.find('a') is not None and h2.find('a').contents is not None:
author = h2.find('a').contents[0].strip()
title = title + u' (%s)' % author
elif h2.contents[0] is not None:
author = h2.contents[0]
title = title + u' (%s)' % author
else:
author = ''
url = absurl(h1.find('a', href=True)['href'])
self.log.info('Found article:', title)
self.log.info('\t', url)