mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Use a common UA when searching via SE
This commit is contained in:
parent
9c78d6d18b
commit
17a2d6dc26
@ -10,7 +10,7 @@ from Queue import Empty, Queue
|
|||||||
from threading import Thread
|
from threading import Thread
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
|
|
||||||
from calibre import as_unicode, browser
|
from calibre import as_unicode, browser, random_user_agent
|
||||||
from calibre.ebooks.metadata import check_isbn
|
from calibre.ebooks.metadata import check_isbn
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
from calibre.ebooks.metadata.sources.base import Option, Source, fixauthors, fixcase
|
from calibre.ebooks.metadata.sources.base import Option, Source, fixauthors, fixcase
|
||||||
@ -47,7 +47,7 @@ def parse_details_page(url, log, timeout, browser, domain):
|
|||||||
attr = getattr(e, 'args', [None])
|
attr = getattr(e, 'args', [None])
|
||||||
attr = attr if attr else [None]
|
attr = attr if attr else [None]
|
||||||
if isinstance(attr[0], socket.timeout):
|
if isinstance(attr[0], socket.timeout):
|
||||||
msg = 'Amazon timed out. Try again later.'
|
msg = 'Details page timed out. Try again later.'
|
||||||
log.error(msg)
|
log.error(msg)
|
||||||
else:
|
else:
|
||||||
msg = 'Failed to make details query: %r' % url
|
msg = 'Failed to make details query: %r' % url
|
||||||
@ -873,6 +873,17 @@ class Amazon(Source):
|
|||||||
@property
|
@property
|
||||||
def browser(self):
|
def browser(self):
|
||||||
global ua_index
|
global ua_index
|
||||||
|
if USE_SEARCH_ENGINE:
|
||||||
|
if self._browser is None:
|
||||||
|
ua = random_user_agent(allow_ie=False)
|
||||||
|
self._browser = br = browser(user_agent=ua)
|
||||||
|
br.set_handle_gzip(True)
|
||||||
|
br.addheaders += [
|
||||||
|
('Accept', accept_header_for_ua(ua)),
|
||||||
|
('Upgrade-insecure-requests', '1'),
|
||||||
|
]
|
||||||
|
br = self._browser
|
||||||
|
else:
|
||||||
all_uas = all_user_agents()
|
all_uas = all_user_agents()
|
||||||
ua_index = (ua_index + 1) % len(all_uas)
|
ua_index = (ua_index + 1) % len(all_uas)
|
||||||
ua = all_uas[ua_index]
|
ua = all_uas[ua_index]
|
||||||
@ -1260,7 +1271,7 @@ class Amazon(Source):
|
|||||||
qasin = parse_asin(preparsed_root[1], log, durl)
|
qasin = parse_asin(preparsed_root[1], log, durl)
|
||||||
if qasin == asin:
|
if qasin == asin:
|
||||||
w = Worker(durl, result_queue, br, log, 0, domain,
|
w = Worker(durl, result_queue, br, log, 0, domain,
|
||||||
self, testing=testing, preparsed_root=preparsed_root)
|
self, testing=testing, preparsed_root=preparsed_root, timeout=timeout)
|
||||||
try:
|
try:
|
||||||
w.get_details()
|
w.get_details()
|
||||||
return
|
return
|
||||||
@ -1287,7 +1298,7 @@ class Amazon(Source):
|
|||||||
log.error('No matches found with query: %r' % query)
|
log.error('No matches found with query: %r' % query)
|
||||||
return
|
return
|
||||||
|
|
||||||
workers = [Worker(url, result_queue, br, log, i, domain, self, testing=testing,
|
workers = [Worker(url, result_queue, br, log, i, domain, self, testing=testing, timeout=timeout,
|
||||||
cover_url_processor=cover_url_processor) for i, url in enumerate(matches)]
|
cover_url_processor=cover_url_processor) for i, url in enumerate(matches)]
|
||||||
|
|
||||||
for w in workers:
|
for w in workers:
|
||||||
@ -1371,12 +1382,6 @@ if __name__ == '__main__': # tests {{{
|
|||||||
series_test('Craft Sequence', 1)]
|
series_test('Craft Sequence', 1)]
|
||||||
),
|
),
|
||||||
|
|
||||||
( # A kindle edition that does not appear in the search results when searching by ASIN
|
|
||||||
{'identifiers': {'amazon': 'B004JHY6OG'}},
|
|
||||||
[title_test(
|
|
||||||
'The Heroes: A First Law Novel (First Law World 2)', exact=True)]
|
|
||||||
),
|
|
||||||
|
|
||||||
( # + in title and uses id="main-image" for cover
|
( # + in title and uses id="main-image" for cover
|
||||||
{'identifiers': {'amazon': '1933988770'}},
|
{'identifiers': {'amazon': '1933988770'}},
|
||||||
[title_test(
|
[title_test(
|
||||||
@ -1384,14 +1389,6 @@ if __name__ == '__main__': # tests {{{
|
|||||||
),
|
),
|
||||||
|
|
||||||
|
|
||||||
( # noscript description
|
|
||||||
{'identifiers': {'amazon': '0756407117'}},
|
|
||||||
[title_test(
|
|
||||||
"Throne of the Crescent Moon"),
|
|
||||||
comments_test('Makhslood'), comments_test('Dhamsawaat'),
|
|
||||||
]
|
|
||||||
),
|
|
||||||
|
|
||||||
( # Different comments markup, using Book Description section
|
( # Different comments markup, using Book Description section
|
||||||
{'identifiers': {'amazon': '0982514506'}},
|
{'identifiers': {'amazon': '0982514506'}},
|
||||||
[title_test(
|
[title_test(
|
||||||
@ -1429,7 +1426,16 @@ if __name__ == '__main__': # tests {{{
|
|||||||
authors_test(['F. Scott Fitzgerald'])]
|
authors_test(['F. Scott Fitzgerald'])]
|
||||||
),
|
),
|
||||||
|
|
||||||
] # }}}
|
]
|
||||||
|
if not USE_SEARCH_ENGINE:
|
||||||
|
com_tests.append(
|
||||||
|
( # A kindle edition that does not appear in the search results when searching by ASIN
|
||||||
|
{'identifiers': {'amazon': 'B004JHY6OG'}},
|
||||||
|
[title_test(
|
||||||
|
'The Heroes: A First Law Novel (First Law World 2)', exact=True)]
|
||||||
|
))
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
de_tests = [ # {{{
|
de_tests = [ # {{{
|
||||||
(
|
(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user