From c7b753c469ec004d0ab7f163b080eac77c51ad35 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 3 Mar 2017 12:07:11 +0530 Subject: [PATCH] Log results page title when no results found --- src/calibre/ebooks/metadata/sources/search_engines.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/search_engines.py b/src/calibre/ebooks/metadata/sources/search_engines.py index cade3f8742..fca9aa3750 100644 --- a/src/calibre/ebooks/metadata/sources/search_engines.py +++ b/src/calibre/ebooks/metadata/sources/search_engines.py @@ -174,7 +174,7 @@ def bing_search(terms, site=None, br=None, log=prints, safe_search=False, dump_r try: div = li.xpath('descendant::div[@class="b_attribution" and @u]')[0] except IndexError: - log('Ignoring', title, 'as it has no cached page') + log('Ignoring {!r} as it has no cached page'.format(title)) continue d, w = div.get('u').split('|')[-2:] # The bing cache does not have a valid https certificate currently @@ -182,6 +182,9 @@ def bing_search(terms, site=None, br=None, log=prints, safe_search=False, dump_r cached_url = 'http://cc.bingj.com/cache.aspx?q={q}&d={d}&mkt=en-US&setlang=en-US&w={w}'.format( q=q, d=d, w=w) ans.append(Result(a.get('href'), title, cached_url)) + if not ans: + title = ' '.join(root.xpath('//title/text()')) + log('Failed to find any results on results page, with title:', title) return ans, url @@ -226,10 +229,13 @@ def google_search(terms, site=None, br=None, log=prints, safe_search=False, dump try: c = div.xpath('descendant::div[@class="s"]//a[@class="fl"]')[0] except IndexError: - log('Ignoring', title, 'as it has no cached page') + log('Ignoring {!r} as it has no cached page'.format(title)) continue cached_url = c.get('href') ans.append(Result(a.get('href'), title, cached_url)) + if not ans: + title = ' '.join(root.xpath('//title/text()')) + log('Failed to find any results on results page, with title:', title) return ans, url