mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Amazon metadata download: Fix spurious results when searching for books that are not present on amazon using a search engine.
This commit is contained in:
parent
c8131cf507
commit
b2e38cd0d4
@ -6,6 +6,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
|||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
import time
|
import time
|
||||||
|
from functools import partial
|
||||||
from Queue import Empty, Queue
|
from Queue import Empty, Queue
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
@ -105,7 +106,8 @@ class Worker(Thread): # Get details {{{
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, url, result_queue, browser, log, relevance, domain,
|
def __init__(self, url, result_queue, browser, log, relevance, domain,
|
||||||
plugin, timeout=20, testing=False, preparsed_root=None, cover_url_processor=None):
|
plugin, timeout=20, testing=False, preparsed_root=None,
|
||||||
|
cover_url_processor=None, filter_result=None):
|
||||||
Thread.__init__(self)
|
Thread.__init__(self)
|
||||||
self.cover_url_processor = cover_url_processor
|
self.cover_url_processor = cover_url_processor
|
||||||
self.preparsed_root = preparsed_root
|
self.preparsed_root = preparsed_root
|
||||||
@ -113,6 +115,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
self.testing = testing
|
self.testing = testing
|
||||||
self.url, self.result_queue = url, result_queue
|
self.url, self.result_queue = url, result_queue
|
||||||
self.log, self.timeout = log, timeout
|
self.log, self.timeout = log, timeout
|
||||||
|
self.filter_result = filter_result or (lambda x, log: True)
|
||||||
self.relevance, self.plugin = relevance, plugin
|
self.relevance, self.plugin = relevance, plugin
|
||||||
self.browser = browser
|
self.browser = browser
|
||||||
self.cover_url = self.amazon_id = self.isbn = None
|
self.cover_url = self.amazon_id = self.isbn = None
|
||||||
@ -447,7 +450,8 @@ class Worker(Thread): # Get details {{{
|
|||||||
|
|
||||||
self.plugin.clean_downloaded_metadata(mi)
|
self.plugin.clean_downloaded_metadata(mi)
|
||||||
|
|
||||||
self.result_queue.put(mi)
|
if self.filter_result(mi, self.log):
|
||||||
|
self.result_queue.put(mi)
|
||||||
|
|
||||||
def totext(self, elem):
|
def totext(self, elem):
|
||||||
return self.tostring(elem, encoding=unicode, method='text').strip()
|
return self.tostring(elem, encoding=unicode, method='text').strip()
|
||||||
@ -817,7 +821,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
class Amazon(Source):
|
class Amazon(Source):
|
||||||
|
|
||||||
name = 'Amazon.com'
|
name = 'Amazon.com'
|
||||||
version = (1, 0, 0)
|
version = (1, 1, 0)
|
||||||
minimum_calibre_version = (2, 80, 0)
|
minimum_calibre_version = (2, 80, 0)
|
||||||
description = _('Downloads metadata and covers from Amazon')
|
description = _('Downloads metadata and covers from Amazon')
|
||||||
|
|
||||||
@ -1338,8 +1342,10 @@ class Amazon(Source):
|
|||||||
log.error('No matches found with query: %r' % query)
|
log.error('No matches found with query: %r' % query)
|
||||||
return
|
return
|
||||||
|
|
||||||
workers = [Worker(url, result_queue, br, log, i, domain, self, testing=testing, timeout=timeout,
|
workers = [Worker(
|
||||||
cover_url_processor=cover_url_processor) for i, url in enumerate(matches)]
|
url, result_queue, br, log, i, domain, self, testing=testing, timeout=timeout,
|
||||||
|
cover_url_processor=cover_url_processor, filter_result=partial(
|
||||||
|
self.filter_result, title, authors, identifiers)) for i, url in enumerate(matches)]
|
||||||
|
|
||||||
for w in workers:
|
for w in workers:
|
||||||
# Don't send all requests at the same time
|
# Don't send all requests at the same time
|
||||||
@ -1362,6 +1368,29 @@ class Amazon(Source):
|
|||||||
return None
|
return None
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
def filter_result(self, title, authors, identifiers, mi, log): # {{{
|
||||||
|
if not self.use_search_engine:
|
||||||
|
return True
|
||||||
|
if title is not None:
|
||||||
|
tokens = {icu_lower(x) for x in title.split() if len(x) > 3}
|
||||||
|
if tokens:
|
||||||
|
result_tokens = {icu_lower(x) for x in mi.title.split()}
|
||||||
|
if not tokens.intersection(result_tokens):
|
||||||
|
log('Ignoring result:', mi.title, 'as its title does not match')
|
||||||
|
return False
|
||||||
|
if authors:
|
||||||
|
author_tokens = set()
|
||||||
|
for author in authors:
|
||||||
|
author_tokens |= {icu_lower(x) for x in author.split() if len(x) > 2}
|
||||||
|
result_tokens = set()
|
||||||
|
for author in mi.authors:
|
||||||
|
result_tokens |= {icu_lower(x) for x in author.split() if len(x) > 2}
|
||||||
|
if author_tokens and not author_tokens.intersection(result_tokens):
|
||||||
|
log('Ignoring result:', mi.title, 'by', ' & '.join(mi.authors), 'as its author does not match')
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
# }}}
|
||||||
|
|
||||||
def download_cover(self, log, result_queue, abort, # {{{
|
def download_cover(self, log, result_queue, abort, # {{{
|
||||||
title=None, authors=None, identifiers={}, timeout=60, get_best_cover=False):
|
title=None, authors=None, identifiers={}, timeout=60, get_best_cover=False):
|
||||||
cached_url = self.get_cached_cover_url(identifiers)
|
cached_url = self.get_cached_cover_url(identifiers)
|
||||||
@ -1450,14 +1479,6 @@ if __name__ == '__main__': # tests {{{
|
|||||||
]
|
]
|
||||||
),
|
),
|
||||||
|
|
||||||
( # Description has links
|
|
||||||
{'identifiers': {'isbn': '9780671578275'}},
|
|
||||||
[title_test('A Civil Campaign: A Comedy of Biology and Manners',
|
|
||||||
exact=True), authors_test(['Lois McMaster Bujold'])
|
|
||||||
]
|
|
||||||
|
|
||||||
),
|
|
||||||
|
|
||||||
( # Sophisticated comment formatting
|
( # Sophisticated comment formatting
|
||||||
{'identifiers': {'isbn': '9781416580829'}},
|
{'identifiers': {'isbn': '9781416580829'}},
|
||||||
[title_test('Angels & Demons - Movie Tie-In: A Novel',
|
[title_test('Angels & Demons - Movie Tie-In: A Novel',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user