Google metadata download: When excluding results from a web search because they don't match the specified title/author ignore diacritic, accents, etc.

This commit is contained in:
Kovid Goyal 2022-08-17 06:36:00 +05:30
parent 765db1fd4c
commit 742fc54bdd
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -202,7 +202,7 @@ def to_metadata(browser, log, entry_, timeout, running_a_test=False): # {{{
class GoogleBooks(Source): class GoogleBooks(Source):
name = 'Google' name = 'Google'
version = (1, 1, 0) version = (1, 1, 1)
minimum_calibre_version = (2, 80, 0) minimum_calibre_version = (2, 80, 0)
description = _('Downloads metadata and covers from Google Books') description = _('Downloads metadata and covers from Google Books')
@ -397,6 +397,7 @@ class GoogleBooks(Source):
identifiers={}, identifiers={},
timeout=30 timeout=30
): ):
from calibre.utils.filenames import ascii_text
isbn = check_isbn(identifiers.get('isbn', None)) isbn = check_isbn(identifiers.get('isbn', None))
q = [] q = []
strip_punc_pat = regex.compile(r'[\p{C}|\p{M}|\p{P}|\p{S}|\p{Z}]+', regex.UNICODE) strip_punc_pat = regex.compile(r'[\p{C}|\p{M}|\p{P}|\p{S}|\p{Z}]+', regex.UNICODE)
@ -411,7 +412,7 @@ class GoogleBooks(Source):
t = t.lower() t = t.lower()
if t in ('and', 'not', 'the'): if t in ('and', 'not', 'the'):
continue continue
yield strip_punc_pat.sub('', t) yield ascii_text(strip_punc_pat.sub('', t))
if has_google_id: if has_google_id:
google_ids.append(identifiers['google']) google_ids.append(identifiers['google'])