Amazon metadata download: Fix some downloads failing when a mobile user agent is randomnly selected. Fixes #1827027 [meta data request to amazon failed for some user agents](https://bugs.launchpad.net/calibre/+bug/1827027)

Amazon serves up different markup to mobile browsers, so ensure we dont
use them.
This commit is contained in:
Kovid Goyal 2019-04-30 16:59:08 +05:30
parent e6fd5e4c0c
commit a31151e864
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -22,7 +22,11 @@ from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.sources.base import Option, Source, fixauthors, fixcase from calibre.ebooks.metadata.sources.base import Option, Source, fixauthors, fixcase
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
from calibre.utils.random_ua import accept_header_for_ua, all_user_agents from calibre.utils.random_ua import accept_header_for_ua
def user_agent_is_ok(ua):
return 'Mobile/' not in ua and 'Mobile ' not in ua
class CaptchaError(Exception): class CaptchaError(Exception):
@ -33,9 +37,6 @@ class SearchFailed(ValueError):
pass pass
ua_index = -1
def parse_html(raw): def parse_html(raw):
try: try:
from html5_parser import parse from html5_parser import parse
@ -490,7 +491,7 @@ class Worker(Thread): # Get details {{{
return sanitize_title(self.totext(h1)) return sanitize_title(self.totext(h1))
tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]') tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')
if not tdiv: if not tdiv:
span = root.xpath('//*[id="ebooksTitle"]') span = root.xpath('//*[@id="ebooksTitle"]')
if span: if span:
return sanitize_title(self.totext(span[0])) return sanitize_title(self.totext(span[0]))
raise ValueError('No title block found') raise ValueError('No title block found')
@ -509,7 +510,7 @@ class Worker(Thread): # Get details {{{
'#byline .author .contributorNameID', '#byline .author .contributorNameID',
'#byline .author a.a-link-normal', '#byline .author a.a-link-normal',
'#bylineInfo .author .contributorNameID', '#bylineInfo .author .contributorNameID',
'#bylineInfo .author a.a-link-normal' '#bylineInfo .author a.a-link-normal',
): ):
matches = tuple(self.selector(sel)) matches = tuple(self.selector(sel))
if matches: if matches:
@ -861,7 +862,7 @@ class Worker(Thread): # Get details {{{
class Amazon(Source): class Amazon(Source):
name = 'Amazon.com' name = 'Amazon.com'
version = (1, 2, 7) version = (1, 2, 8)
minimum_calibre_version = (2, 82, 0) minimum_calibre_version = (2, 82, 0)
description = _('Downloads metadata and covers from Amazon') description = _('Downloads metadata and covers from Amazon')
@ -939,28 +940,25 @@ class Amazon(Source):
@property @property
def browser(self): def browser(self):
global ua_index br = self._browser
if self.use_search_engine: if br is None:
if self._browser is None: ua = 'Mobile '
while not user_agent_is_ok(ua):
ua = random_user_agent(allow_ie=False) ua = random_user_agent(allow_ie=False)
self._browser = br = browser(user_agent=ua) # ua = 'Mozilla/5.0 (Linux; Android 8.0.0; VTR-L29; rv:63.0) Gecko/20100101 Firefox/63.0'
br.set_handle_gzip(True) self._browser = br = browser(user_agent=ua)
br.set_handle_gzip(True)
if self.use_search_engine:
br.addheaders += [ br.addheaders += [
('Accept', accept_header_for_ua(ua)), ('Accept', accept_header_for_ua(ua)),
('Upgrade-insecure-requests', '1'), ('Upgrade-insecure-requests', '1'),
] ]
br = self._browser else:
else: br.addheaders += [
all_uas = all_user_agents() ('Accept', accept_header_for_ua(ua)),
ua_index = (ua_index + 1) % len(all_uas) ('Upgrade-insecure-requests', '1'),
ua = all_uas[ua_index] ('Referer', self.referrer_for_domain()),
self._browser = br = browser(user_agent=ua) ]
br.set_handle_gzip(True)
br.addheaders += [
('Accept', accept_header_for_ua(ua)),
('Upgrade-insecure-requests', '1'),
('Referer', self.referrer_for_domain()),
]
return br return br
def save_settings(self, *args, **kwargs): def save_settings(self, *args, **kwargs):