From b32374ca7be4ee7497bc7ca89d0cb981b388914b Mon Sep 17 00:00:00 2001 From: Gary Arnold Date: Mon, 5 Mar 2018 15:19:06 -0800 Subject: [PATCH 1/3] ga - More handling of odd titles (issue 132), still needs work --- recipes/granta.recipe | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/recipes/granta.recipe b/recipes/granta.recipe index b6539825e8..1c20f61732 100644 --- a/recipes/granta.recipe +++ b/recipes/granta.recipe @@ -264,14 +264,14 @@ class Granta(BasicNewsRecipe): # Either user is logged in or the article is unlocked h1 = toc.find('h1') h2 = toc.find('h2') - if h1.find('a') is not None and h1.find('a').contents is not None: - title = h1.find('a').contents[0].strip() + if h1.find('a') is not None and len(h1.find('a').contents) > 0 and h1.find('a').contents[0] is not None: + title = str(h1.find('a').contents[0]).strip() elif len(h1.contents) > 0 and h1.contents[0] is not None: title = h1.contents[0] else: title = '' - if h2.find('a') is not None and h2.find('a').contents is not None: - author = h2.find('a').contents[0].strip() + if h2.find('a') is not None and len(h2.find('a').contents) > 0 and h2.find('a').contents[0] is not None: + author = str(h2.find('a').contents[0]).strip() title = title + u' (%s)' % author elif len(h2.contents) > 0 and h2.contents[0] is not None: author = h2.contents[0] From 32c88dbe5fa088b0cc897b944b7a22c4824c92b9 Mon Sep 17 00:00:00 2001 From: Gary Arnold Date: Mon, 5 Mar 2018 15:51:13 -0800 Subject: [PATCH 2/3] ga - Handles article titles enclosed in tags (issue 132) --- recipes/granta.recipe | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/recipes/granta.recipe b/recipes/granta.recipe index 1c20f61732..d0fa38bbcb 100644 --- a/recipes/granta.recipe +++ b/recipes/granta.recipe @@ -156,10 +156,14 @@ def absurl(url): url = 'https://www.granta.com' + url return url - def stripstyle(tag): if tag is not None: del tag['style'] + +def get_innermost_string(tag): + while hasattr(tag, 'contents') and len(tag.contents) > 0 and tag.contents[0] is not None: + tag = tag.contents[0] + return str(tag).strip() ################################################################## @@ -265,16 +269,16 @@ class Granta(BasicNewsRecipe): h1 = toc.find('h1') h2 = toc.find('h2') if h1.find('a') is not None and len(h1.find('a').contents) > 0 and h1.find('a').contents[0] is not None: - title = str(h1.find('a').contents[0]).strip() + title = get_innermost_string(h1.find('a').contents[0]) elif len(h1.contents) > 0 and h1.contents[0] is not None: - title = h1.contents[0] + title = get_innermost_string(h1.contents[0]) else: title = '' if h2.find('a') is not None and len(h2.find('a').contents) > 0 and h2.find('a').contents[0] is not None: - author = str(h2.find('a').contents[0]).strip() + author = get_innermost_string(h2.find('a').contents[0]) title = title + u' (%s)' % author elif len(h2.contents) > 0 and h2.contents[0] is not None: - author = h2.contents[0] + author = get_innermost_string(h2.contents[0]) title = title + u' (%s)' % author else: author = '' From f65689d6dfadc543e2b19cc5a6fa13db1a306c5f Mon Sep 17 00:00:00 2001 From: Gary Arnold Date: Mon, 5 Mar 2018 16:09:54 -0800 Subject: [PATCH 3/3] ga - Corrects recipe description --- recipes/granta.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/granta.recipe b/recipes/granta.recipe index d0fa38bbcb..7c36e006a5 100644 --- a/recipes/granta.recipe +++ b/recipes/granta.recipe @@ -170,7 +170,7 @@ def get_innermost_string(tag): class Granta(BasicNewsRecipe): title = u'Granta' - description = u'Granta magazine' + description = u'The Magazine of New Writing' language = 'en' __author__ = 'Gary Arnold'