diff --git a/recipes/granta.recipe b/recipes/granta.recipe index 1c20f61732..d0fa38bbcb 100644 --- a/recipes/granta.recipe +++ b/recipes/granta.recipe @@ -156,10 +156,14 @@ def absurl(url): url = 'https://www.granta.com' + url return url - def stripstyle(tag): if tag is not None: del tag['style'] + +def get_innermost_string(tag): + while hasattr(tag, 'contents') and len(tag.contents) > 0 and tag.contents[0] is not None: + tag = tag.contents[0] + return str(tag).strip() ################################################################## @@ -265,16 +269,16 @@ class Granta(BasicNewsRecipe): h1 = toc.find('h1') h2 = toc.find('h2') if h1.find('a') is not None and len(h1.find('a').contents) > 0 and h1.find('a').contents[0] is not None: - title = str(h1.find('a').contents[0]).strip() + title = get_innermost_string(h1.find('a').contents[0]) elif len(h1.contents) > 0 and h1.contents[0] is not None: - title = h1.contents[0] + title = get_innermost_string(h1.contents[0]) else: title = '' if h2.find('a') is not None and len(h2.find('a').contents) > 0 and h2.find('a').contents[0] is not None: - author = str(h2.find('a').contents[0]).strip() + author = get_innermost_string(h2.find('a').contents[0]) title = title + u' (%s)' % author elif len(h2.contents) > 0 and h2.contents[0] is not None: - author = h2.contents[0] + author = get_innermost_string(h2.contents[0]) title = title + u' (%s)' % author else: author = ''