mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Remove all punctuation when checking tokens
This commit is contained in:
parent
17b65bbc49
commit
8b01843531
@ -6,6 +6,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
|||||||
import hashlib
|
import hashlib
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
import regex
|
||||||
try:
|
try:
|
||||||
from queue import Empty, Queue
|
from queue import Empty, Queue
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -185,7 +186,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
|||||||
class GoogleBooks(Source):
|
class GoogleBooks(Source):
|
||||||
|
|
||||||
name = 'Google'
|
name = 'Google'
|
||||||
version = (1, 0, 5)
|
version = (1, 0, 6)
|
||||||
minimum_calibre_version = (2, 80, 0)
|
minimum_calibre_version = (2, 80, 0)
|
||||||
description = _('Downloads metadata and covers from Google Books')
|
description = _('Downloads metadata and covers from Google Books')
|
||||||
|
|
||||||
@ -376,6 +377,7 @@ class GoogleBooks(Source):
|
|||||||
):
|
):
|
||||||
isbn = check_isbn(identifiers.get('isbn', None))
|
isbn = check_isbn(identifiers.get('isbn', None))
|
||||||
q = []
|
q = []
|
||||||
|
strip_punc_pat = regex.compile(r'[\p{C}|\p{M}|\p{P}|\p{S}|\p{Z}]+', regex.UNICODE)
|
||||||
|
|
||||||
def to_check_tokens(*tokens):
|
def to_check_tokens(*tokens):
|
||||||
for t in tokens:
|
for t in tokens:
|
||||||
@ -384,7 +386,7 @@ class GoogleBooks(Source):
|
|||||||
t = t.lower()
|
t = t.lower()
|
||||||
if t in ('and', 'not', 'the'):
|
if t in ('and', 'not', 'the'):
|
||||||
continue
|
continue
|
||||||
yield t.strip(':')
|
yield strip_punc_pat.sub('', t)
|
||||||
|
|
||||||
check_tokens = set()
|
check_tokens = set()
|
||||||
if isbn is not None:
|
if isbn is not None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user