From c4c1c5f841d2e5216bbebe7bbe4ee54dde05f12d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 1 Jan 2021 19:38:42 +0530 Subject: [PATCH] Amazon metadata download: Ignore ' and " in titles when matching results --- src/calibre/ebooks/metadata/sources/amazon.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 71b18ae5ae..7adaae6a58 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -908,7 +908,7 @@ class Worker(Thread): # Get details {{{ class Amazon(Source): name = 'Amazon.com' - version = (1, 2, 14) + version = (1, 2, 15) minimum_calibre_version = (2, 82, 0) description = _('Downloads metadata and covers from Amazon') @@ -1478,9 +1478,13 @@ class Amazon(Source): if not self.use_search_engine: return True if title is not None: - tokens = {icu_lower(x).rstrip(':') for x in title.split() if len(x) > 3} + + def tokenize_title(x): + return icu_lower(x).replace("'", '').replace('"', '').rstrip(':') + + tokens = {tokenize_title(x) for x in title.split() if len(x) > 3} if tokens: - result_tokens = {icu_lower(x).rstrip(':') for x in mi.title.split()} + result_tokens = {tokenize_title(x) for x in mi.title.split()} if not tokens.intersection(result_tokens): log('Ignoring result:', mi.title, 'as its title does not match') return False