From a31151e864e7b8afcd90dde9ec355e611d45989a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 30 Apr 2019 16:59:08 +0530 Subject: [PATCH] Amazon metadata download: Fix some downloads failing when a mobile user agent is randomnly selected. Fixes #1827027 [meta data request to amazon failed for some user agents](https://bugs.launchpad.net/calibre/+bug/1827027) Amazon serves up different markup to mobile browsers, so ensure we dont use them. --- src/calibre/ebooks/metadata/sources/amazon.py | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 5e2a6ce822..eca960a97a 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -22,7 +22,11 @@ from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.sources.base import Option, Source, fixauthors, fixcase from calibre.utils.localization import canonicalize_lang -from calibre.utils.random_ua import accept_header_for_ua, all_user_agents +from calibre.utils.random_ua import accept_header_for_ua + + +def user_agent_is_ok(ua): + return 'Mobile/' not in ua and 'Mobile ' not in ua class CaptchaError(Exception): @@ -33,9 +37,6 @@ class SearchFailed(ValueError): pass -ua_index = -1 - - def parse_html(raw): try: from html5_parser import parse @@ -490,7 +491,7 @@ class Worker(Thread): # Get details {{{ return sanitize_title(self.totext(h1)) tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]') if not tdiv: - span = root.xpath('//*[id="ebooksTitle"]') + span = root.xpath('//*[@id="ebooksTitle"]') if span: return sanitize_title(self.totext(span[0])) raise ValueError('No title block found') @@ -509,7 +510,7 @@ class Worker(Thread): # Get details {{{ '#byline .author .contributorNameID', '#byline .author a.a-link-normal', '#bylineInfo .author .contributorNameID', - '#bylineInfo .author a.a-link-normal' + '#bylineInfo .author a.a-link-normal', ): matches = tuple(self.selector(sel)) if matches: @@ -861,7 +862,7 @@ class Worker(Thread): # Get details {{{ class Amazon(Source): name = 'Amazon.com' - version = (1, 2, 7) + version = (1, 2, 8) minimum_calibre_version = (2, 82, 0) description = _('Downloads metadata and covers from Amazon') @@ -939,28 +940,25 @@ class Amazon(Source): @property def browser(self): - global ua_index - if self.use_search_engine: - if self._browser is None: + br = self._browser + if br is None: + ua = 'Mobile ' + while not user_agent_is_ok(ua): ua = random_user_agent(allow_ie=False) - self._browser = br = browser(user_agent=ua) - br.set_handle_gzip(True) + # ua = 'Mozilla/5.0 (Linux; Android 8.0.0; VTR-L29; rv:63.0) Gecko/20100101 Firefox/63.0' + self._browser = br = browser(user_agent=ua) + br.set_handle_gzip(True) + if self.use_search_engine: br.addheaders += [ ('Accept', accept_header_for_ua(ua)), ('Upgrade-insecure-requests', '1'), ] - br = self._browser - else: - all_uas = all_user_agents() - ua_index = (ua_index + 1) % len(all_uas) - ua = all_uas[ua_index] - self._browser = br = browser(user_agent=ua) - br.set_handle_gzip(True) - br.addheaders += [ - ('Accept', accept_header_for_ua(ua)), - ('Upgrade-insecure-requests', '1'), - ('Referer', self.referrer_for_domain()), - ] + else: + br.addheaders += [ + ('Accept', accept_header_for_ua(ua)), + ('Upgrade-insecure-requests', '1'), + ('Referer', self.referrer_for_domain()), + ] return br def save_settings(self, *args, **kwargs):