mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add a config option to allow the user to control which servers to query for amazon metadata
This commit is contained in:
parent
0030c69626
commit
e2bb9b1508
@ -28,7 +28,6 @@ class SearchFailed(ValueError):
|
|||||||
|
|
||||||
|
|
||||||
ua_index = -1
|
ua_index = -1
|
||||||
USE_SEARCH_ENGINE = True
|
|
||||||
|
|
||||||
|
|
||||||
def parse_details_page(url, log, timeout, browser, domain):
|
def parse_details_page(url, log, timeout, browser, domain):
|
||||||
@ -844,10 +843,25 @@ class Amazon(Source):
|
|||||||
'ca': _('Canada'),
|
'ca': _('Canada'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SERVERS = {
|
||||||
|
'auto': _('Choose server automatically'),
|
||||||
|
'amazon': _('Amazon servers'),
|
||||||
|
'bing': _('Bing search cache'),
|
||||||
|
'google': _('Google search cache'),
|
||||||
|
'wayback': _('Wayback machine cache (slow)'),
|
||||||
|
}
|
||||||
|
|
||||||
options = (
|
options = (
|
||||||
Option('domain', 'choices', 'com', _('Amazon website to use:'),
|
Option('domain', 'choices', 'com', _('Amazon country website to use:'),
|
||||||
_('Metadata from Amazon will be fetched using this '
|
_('Metadata from Amazon will be fetched using this '
|
||||||
'country\'s Amazon website.'), choices=AMAZON_DOMAINS),
|
'country\'s Amazon website.'), choices=AMAZON_DOMAINS),
|
||||||
|
Option('server', 'choices', 'auto', _('Server to get data from:'),
|
||||||
|
_(
|
||||||
|
'Amazon has started blocking attempts to download'
|
||||||
|
' metadata from its servers. To get around this problem,'
|
||||||
|
' calibre can fetch the Amazon data from many different'
|
||||||
|
' places where it is cached. Choose the source you prefer.'
|
||||||
|
), choices=SERVERS),
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -873,7 +887,7 @@ class Amazon(Source):
|
|||||||
@property
|
@property
|
||||||
def browser(self):
|
def browser(self):
|
||||||
global ua_index
|
global ua_index
|
||||||
if USE_SEARCH_ENGINE:
|
if self.use_search_engine:
|
||||||
if self._browser is None:
|
if self._browser is None:
|
||||||
ua = random_user_agent(allow_ie=False)
|
ua = random_user_agent(allow_ie=False)
|
||||||
self._browser = br = browser(user_agent=ua)
|
self._browser = br = browser(user_agent=ua)
|
||||||
@ -963,6 +977,20 @@ class Amazon(Source):
|
|||||||
|
|
||||||
return domain
|
return domain
|
||||||
|
|
||||||
|
@property
|
||||||
|
def server(self):
|
||||||
|
x = getattr(self, 'testing_server', None)
|
||||||
|
if x is not None:
|
||||||
|
return x
|
||||||
|
server = self.prefs['server']
|
||||||
|
if server not in self.SERVERS:
|
||||||
|
server = 'auto'
|
||||||
|
return server
|
||||||
|
|
||||||
|
@property
|
||||||
|
def use_search_engine(self):
|
||||||
|
return self.server != 'amazon'
|
||||||
|
|
||||||
def clean_downloaded_metadata(self, mi):
|
def clean_downloaded_metadata(self, mi):
|
||||||
docase = (
|
docase = (
|
||||||
mi.language == 'eng' or
|
mi.language == 'eng' or
|
||||||
@ -1223,8 +1251,14 @@ class Amazon(Source):
|
|||||||
domain)[len('https://'):].partition('/')[0]
|
domain)[len('https://'):].partition('/')[0]
|
||||||
matches = []
|
matches = []
|
||||||
se = search_engines_module()
|
se = search_engines_module()
|
||||||
urlproc = se.bing_url_processor
|
server = self.server
|
||||||
results, qurl = se.bing_search(terms, site, log=log, br=br, timeout=timeout)
|
if server in ('auto', 'bing'):
|
||||||
|
urlproc, sfunc = se.bing_url_processor, se.bing_search
|
||||||
|
elif server == 'google':
|
||||||
|
urlproc, sfunc = se.google_url_processor, se.google_search
|
||||||
|
elif server == 'wayback':
|
||||||
|
urlproc, sfunc = se.wayback_url_processor, se.ddg_search
|
||||||
|
results, qurl = sfunc(terms, site, log=log, br=br, timeout=timeout)
|
||||||
br.set_current_header('Referer', qurl)
|
br.set_current_header('Referer', qurl)
|
||||||
for result in results:
|
for result in results:
|
||||||
if abort.is_set():
|
if abort.is_set():
|
||||||
@ -1264,7 +1298,7 @@ class Amazon(Source):
|
|||||||
log('User-agent:', br.current_user_agent())
|
log('User-agent:', br.current_user_agent())
|
||||||
if testing:
|
if testing:
|
||||||
print('User-agent:', br.current_user_agent())
|
print('User-agent:', br.current_user_agent())
|
||||||
if udata is not None and not USE_SEARCH_ENGINE:
|
if udata is not None and not self.use_search_engine:
|
||||||
# Try to directly get details page instead of running a search
|
# Try to directly get details page instead of running a search
|
||||||
# Cannot use search engine as the directly constructed URL is
|
# Cannot use search engine as the directly constructed URL is
|
||||||
# usually redirected to a full URL by amazon, and is therefore
|
# usually redirected to a full URL by amazon, and is therefore
|
||||||
@ -1284,7 +1318,7 @@ class Amazon(Source):
|
|||||||
except Exception:
|
except Exception:
|
||||||
log.exception(
|
log.exception(
|
||||||
'get_details failed for url: %r' % durl)
|
'get_details failed for url: %r' % durl)
|
||||||
func = self.search_search_engine if USE_SEARCH_ENGINE else self.search_amazon
|
func = self.search_search_engine if self.use_search_engine else self.search_amazon
|
||||||
try:
|
try:
|
||||||
matches, query, domain, cover_url_processor = func(
|
matches, query, domain, cover_url_processor = func(
|
||||||
br, testing, log, abort, title, authors, identifiers, timeout)
|
br, testing, log, abort, title, authors, identifiers, timeout)
|
||||||
@ -1360,7 +1394,7 @@ class Amazon(Source):
|
|||||||
return
|
return
|
||||||
log('Downloading cover from:', cached_url)
|
log('Downloading cover from:', cached_url)
|
||||||
br = self.browser
|
br = self.browser
|
||||||
if USE_SEARCH_ENGINE:
|
if self.use_search_engine:
|
||||||
br = br.clone_browser()
|
br = br.clone_browser()
|
||||||
br.set_current_header('Referer', self.referrer_for_domain(self.domain))
|
br.set_current_header('Referer', self.referrer_for_domain(self.domain))
|
||||||
try:
|
try:
|
||||||
@ -1437,13 +1471,6 @@ if __name__ == '__main__': # tests {{{
|
|||||||
),
|
),
|
||||||
|
|
||||||
]
|
]
|
||||||
if not USE_SEARCH_ENGINE:
|
|
||||||
com_tests.append(
|
|
||||||
( # A kindle edition that does not appear in the search results when searching by ASIN
|
|
||||||
{'identifiers': {'amazon': 'B004JHY6OG'}},
|
|
||||||
[title_test(
|
|
||||||
'The Heroes: A First Law Novel (First Law World 2)', exact=True)]
|
|
||||||
))
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
@ -1568,13 +1595,16 @@ if __name__ == '__main__': # tests {{{
|
|||||||
),
|
),
|
||||||
] # }}}
|
] # }}}
|
||||||
|
|
||||||
def do_test(domain, start=0, stop=None):
|
def do_test(domain, start=0, stop=None, server='auto'):
|
||||||
tests = globals().get(domain + '_tests')
|
tests = globals().get(domain + '_tests')
|
||||||
if stop is None:
|
if stop is None:
|
||||||
stop = len(tests)
|
stop = len(tests)
|
||||||
tests = tests[start:stop]
|
tests = tests[start:stop]
|
||||||
test_identify_plugin(Amazon.name, tests, modify_plugin=lambda
|
test_identify_plugin(Amazon.name, tests, modify_plugin=lambda p: (
|
||||||
p: (setattr(p, 'testing_domain', domain), setattr(p, 'touched_fields', p.touched_fields - {'tags'})))
|
setattr(p, 'testing_domain', domain),
|
||||||
|
setattr(p, 'touched_fields', p.touched_fields - {'tags'}),
|
||||||
|
setattr(p, 'testing_server', server),
|
||||||
|
))
|
||||||
|
|
||||||
do_test('com')
|
do_test('com')
|
||||||
# do_test('de')
|
# do_test('de')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user