mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
ga - Handles article titles enclosed in tags (issue 132)
This commit is contained in:
parent
b32374ca7b
commit
32c88dbe5f
@ -156,10 +156,14 @@ def absurl(url):
|
||||
url = 'https://www.granta.com' + url
|
||||
return url
|
||||
|
||||
|
||||
def stripstyle(tag):
|
||||
if tag is not None:
|
||||
del tag['style']
|
||||
|
||||
def get_innermost_string(tag):
|
||||
while hasattr(tag, 'contents') and len(tag.contents) > 0 and tag.contents[0] is not None:
|
||||
tag = tag.contents[0]
|
||||
return str(tag).strip()
|
||||
##################################################################
|
||||
|
||||
|
||||
@ -265,16 +269,16 @@ class Granta(BasicNewsRecipe):
|
||||
h1 = toc.find('h1')
|
||||
h2 = toc.find('h2')
|
||||
if h1.find('a') is not None and len(h1.find('a').contents) > 0 and h1.find('a').contents[0] is not None:
|
||||
title = str(h1.find('a').contents[0]).strip()
|
||||
title = get_innermost_string(h1.find('a').contents[0])
|
||||
elif len(h1.contents) > 0 and h1.contents[0] is not None:
|
||||
title = h1.contents[0]
|
||||
title = get_innermost_string(h1.contents[0])
|
||||
else:
|
||||
title = ''
|
||||
if h2.find('a') is not None and len(h2.find('a').contents) > 0 and h2.find('a').contents[0] is not None:
|
||||
author = str(h2.find('a').contents[0]).strip()
|
||||
author = get_innermost_string(h2.find('a').contents[0])
|
||||
title = title + u' (%s)' % author
|
||||
elif len(h2.contents) > 0 and h2.contents[0] is not None:
|
||||
author = h2.contents[0]
|
||||
author = get_innermost_string(h2.contents[0])
|
||||
title = title + u' (%s)' % author
|
||||
else:
|
||||
author = ''
|
||||
|
Loading…
x
Reference in New Issue
Block a user