mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Amazon metadata download: Fix spurious results when searching for books that are not present on amazon using a search engine.
This commit is contained in:
parent
c8131cf507
commit
b2e38cd0d4
@ -6,6 +6,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
||||
import re
|
||||
import socket
|
||||
import time
|
||||
from functools import partial
|
||||
from Queue import Empty, Queue
|
||||
from threading import Thread
|
||||
from urlparse import urlparse
|
||||
@ -105,7 +106,8 @@ class Worker(Thread): # Get details {{{
|
||||
'''
|
||||
|
||||
def __init__(self, url, result_queue, browser, log, relevance, domain,
|
||||
plugin, timeout=20, testing=False, preparsed_root=None, cover_url_processor=None):
|
||||
plugin, timeout=20, testing=False, preparsed_root=None,
|
||||
cover_url_processor=None, filter_result=None):
|
||||
Thread.__init__(self)
|
||||
self.cover_url_processor = cover_url_processor
|
||||
self.preparsed_root = preparsed_root
|
||||
@ -113,6 +115,7 @@ class Worker(Thread): # Get details {{{
|
||||
self.testing = testing
|
||||
self.url, self.result_queue = url, result_queue
|
||||
self.log, self.timeout = log, timeout
|
||||
self.filter_result = filter_result or (lambda x, log: True)
|
||||
self.relevance, self.plugin = relevance, plugin
|
||||
self.browser = browser
|
||||
self.cover_url = self.amazon_id = self.isbn = None
|
||||
@ -447,6 +450,7 @@ class Worker(Thread): # Get details {{{
|
||||
|
||||
self.plugin.clean_downloaded_metadata(mi)
|
||||
|
||||
if self.filter_result(mi, self.log):
|
||||
self.result_queue.put(mi)
|
||||
|
||||
def totext(self, elem):
|
||||
@ -817,7 +821,7 @@ class Worker(Thread): # Get details {{{
|
||||
class Amazon(Source):
|
||||
|
||||
name = 'Amazon.com'
|
||||
version = (1, 0, 0)
|
||||
version = (1, 1, 0)
|
||||
minimum_calibre_version = (2, 80, 0)
|
||||
description = _('Downloads metadata and covers from Amazon')
|
||||
|
||||
@ -1338,8 +1342,10 @@ class Amazon(Source):
|
||||
log.error('No matches found with query: %r' % query)
|
||||
return
|
||||
|
||||
workers = [Worker(url, result_queue, br, log, i, domain, self, testing=testing, timeout=timeout,
|
||||
cover_url_processor=cover_url_processor) for i, url in enumerate(matches)]
|
||||
workers = [Worker(
|
||||
url, result_queue, br, log, i, domain, self, testing=testing, timeout=timeout,
|
||||
cover_url_processor=cover_url_processor, filter_result=partial(
|
||||
self.filter_result, title, authors, identifiers)) for i, url in enumerate(matches)]
|
||||
|
||||
for w in workers:
|
||||
# Don't send all requests at the same time
|
||||
@ -1362,6 +1368,29 @@ class Amazon(Source):
|
||||
return None
|
||||
# }}}
|
||||
|
||||
def filter_result(self, title, authors, identifiers, mi, log): # {{{
|
||||
if not self.use_search_engine:
|
||||
return True
|
||||
if title is not None:
|
||||
tokens = {icu_lower(x) for x in title.split() if len(x) > 3}
|
||||
if tokens:
|
||||
result_tokens = {icu_lower(x) for x in mi.title.split()}
|
||||
if not tokens.intersection(result_tokens):
|
||||
log('Ignoring result:', mi.title, 'as its title does not match')
|
||||
return False
|
||||
if authors:
|
||||
author_tokens = set()
|
||||
for author in authors:
|
||||
author_tokens |= {icu_lower(x) for x in author.split() if len(x) > 2}
|
||||
result_tokens = set()
|
||||
for author in mi.authors:
|
||||
result_tokens |= {icu_lower(x) for x in author.split() if len(x) > 2}
|
||||
if author_tokens and not author_tokens.intersection(result_tokens):
|
||||
log('Ignoring result:', mi.title, 'by', ' & '.join(mi.authors), 'as its author does not match')
|
||||
return False
|
||||
return True
|
||||
# }}}
|
||||
|
||||
def download_cover(self, log, result_queue, abort, # {{{
|
||||
title=None, authors=None, identifiers={}, timeout=60, get_best_cover=False):
|
||||
cached_url = self.get_cached_cover_url(identifiers)
|
||||
@ -1450,14 +1479,6 @@ if __name__ == '__main__': # tests {{{
|
||||
]
|
||||
),
|
||||
|
||||
( # Description has links
|
||||
{'identifiers': {'isbn': '9780671578275'}},
|
||||
[title_test('A Civil Campaign: A Comedy of Biology and Manners',
|
||||
exact=True), authors_test(['Lois McMaster Bujold'])
|
||||
]
|
||||
|
||||
),
|
||||
|
||||
( # Sophisticated comment formatting
|
||||
{'identifiers': {'isbn': '9781416580829'}},
|
||||
[title_test('Angels & Demons - Movie Tie-In: A Novel',
|
||||
|
Loading…
x
Reference in New Issue
Block a user