mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Amazon metadata: When filtering search engine results by title ignore words of the title that are purely punctuation
This commit is contained in:
parent
43121af37d
commit
44bceacf03
@ -1082,7 +1082,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
class Amazon(Source):
|
class Amazon(Source):
|
||||||
|
|
||||||
name = 'Amazon.com'
|
name = 'Amazon.com'
|
||||||
version = (1, 3, 7)
|
version = (1, 3, 8)
|
||||||
minimum_calibre_version = (2, 82, 0)
|
minimum_calibre_version = (2, 82, 0)
|
||||||
description = _('Downloads metadata and covers from Amazon')
|
description = _('Downloads metadata and covers from Amazon')
|
||||||
|
|
||||||
@ -1684,13 +1684,20 @@ class Amazon(Source):
|
|||||||
if not self.use_search_engine:
|
if not self.use_search_engine:
|
||||||
return True
|
return True
|
||||||
if title is not None:
|
if title is not None:
|
||||||
|
import regex
|
||||||
|
only_punctuation_pat = regex.compile(r'^\p{P}+$')
|
||||||
|
|
||||||
def tokenize_title(x):
|
def tokenize_title(x):
|
||||||
return icu_lower(x).replace("'", '').replace('"', '').rstrip(':')
|
ans = icu_lower(x).replace("'", '').replace('"', '').rstrip(':')
|
||||||
|
if only_punctuation_pat.match(ans) is not None:
|
||||||
|
ans = ''
|
||||||
|
return ans
|
||||||
|
|
||||||
tokens = {tokenize_title(x) for x in title.split() if len(x) > 3}
|
tokens = {tokenize_title(x) for x in title.split() if len(x) > 3}
|
||||||
|
tokens.discard('')
|
||||||
if tokens:
|
if tokens:
|
||||||
result_tokens = {tokenize_title(x) for x in mi.title.split()}
|
result_tokens = {tokenize_title(x) for x in mi.title.split()}
|
||||||
|
result_tokens.discard('')
|
||||||
if not tokens.intersection(result_tokens):
|
if not tokens.intersection(result_tokens):
|
||||||
log('Ignoring result:', mi.title, 'as its title does not match')
|
log('Ignoring result:', mi.title, 'as its title does not match')
|
||||||
return False
|
return False
|
||||||
|
Loading…
x
Reference in New Issue
Block a user