Amazon metadata: When filtering search engine results by title ignore words of the title that are purely punctuation

2025-08-11 09:13:57 -04:00 · 2024-06-17 10:10:55 +05:30 · 2024-06-17 10:10:55 +05:30 · 44bceacf03
commit 44bceacf03
parent 43121af37d
1 changed files with 9 additions and 2 deletions
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -1082,7 +1082,7 @@ class Worker(Thread):  # Get details {{{
 class Amazon(Source):
    name = 'Amazon.com'
-    version = (1, 3, 7)
+    version = (1, 3, 8)
    minimum_calibre_version = (2, 82, 0)
    description = _('Downloads metadata and covers from Amazon')
@ -1684,13 +1684,20 @@ class Amazon(Source):
        if not self.use_search_engine:
            return True
        if title is not None:
            import regex
            only_punctuation_pat = regex.compile(r'^\p{P}+$')
            def tokenize_title(x):
-                return icu_lower(x).replace("'", '').replace('"', '').rstrip(':')
+                ans = icu_lower(x).replace("'", '').replace('"', '').rstrip(':')
                if only_punctuation_pat.match(ans) is not None:
                    ans = ''
                return ans
            tokens = {tokenize_title(x) for x in title.split() if len(x) > 3}
            tokens.discard('')
            if tokens:
                result_tokens = {tokenize_title(x) for x in mi.title.split()}
                result_tokens.discard('')
                if not tokens.intersection(result_tokens):
                    log('Ignoring result:', mi.title, 'as its title does not match')
                    return False