mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Amazon metadata download: Fix some downloads failing when a mobile user agent is randomnly selected. Fixes #1827027 [meta data request to amazon failed for some user agents](https://bugs.launchpad.net/calibre/+bug/1827027)
Amazon serves up different markup to mobile browsers, so ensure we dont use them.
This commit is contained in:
parent
e6fd5e4c0c
commit
a31151e864
@ -22,7 +22,11 @@ from calibre.ebooks.metadata import check_isbn
|
|||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
from calibre.ebooks.metadata.sources.base import Option, Source, fixauthors, fixcase
|
from calibre.ebooks.metadata.sources.base import Option, Source, fixauthors, fixcase
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
from calibre.utils.random_ua import accept_header_for_ua, all_user_agents
|
from calibre.utils.random_ua import accept_header_for_ua
|
||||||
|
|
||||||
|
|
||||||
|
def user_agent_is_ok(ua):
|
||||||
|
return 'Mobile/' not in ua and 'Mobile ' not in ua
|
||||||
|
|
||||||
|
|
||||||
class CaptchaError(Exception):
|
class CaptchaError(Exception):
|
||||||
@ -33,9 +37,6 @@ class SearchFailed(ValueError):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
ua_index = -1
|
|
||||||
|
|
||||||
|
|
||||||
def parse_html(raw):
|
def parse_html(raw):
|
||||||
try:
|
try:
|
||||||
from html5_parser import parse
|
from html5_parser import parse
|
||||||
@ -490,7 +491,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
return sanitize_title(self.totext(h1))
|
return sanitize_title(self.totext(h1))
|
||||||
tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')
|
tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')
|
||||||
if not tdiv:
|
if not tdiv:
|
||||||
span = root.xpath('//*[id="ebooksTitle"]')
|
span = root.xpath('//*[@id="ebooksTitle"]')
|
||||||
if span:
|
if span:
|
||||||
return sanitize_title(self.totext(span[0]))
|
return sanitize_title(self.totext(span[0]))
|
||||||
raise ValueError('No title block found')
|
raise ValueError('No title block found')
|
||||||
@ -509,7 +510,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
'#byline .author .contributorNameID',
|
'#byline .author .contributorNameID',
|
||||||
'#byline .author a.a-link-normal',
|
'#byline .author a.a-link-normal',
|
||||||
'#bylineInfo .author .contributorNameID',
|
'#bylineInfo .author .contributorNameID',
|
||||||
'#bylineInfo .author a.a-link-normal'
|
'#bylineInfo .author a.a-link-normal',
|
||||||
):
|
):
|
||||||
matches = tuple(self.selector(sel))
|
matches = tuple(self.selector(sel))
|
||||||
if matches:
|
if matches:
|
||||||
@ -861,7 +862,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
class Amazon(Source):
|
class Amazon(Source):
|
||||||
|
|
||||||
name = 'Amazon.com'
|
name = 'Amazon.com'
|
||||||
version = (1, 2, 7)
|
version = (1, 2, 8)
|
||||||
minimum_calibre_version = (2, 82, 0)
|
minimum_calibre_version = (2, 82, 0)
|
||||||
description = _('Downloads metadata and covers from Amazon')
|
description = _('Downloads metadata and covers from Amazon')
|
||||||
|
|
||||||
@ -939,28 +940,25 @@ class Amazon(Source):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def browser(self):
|
def browser(self):
|
||||||
global ua_index
|
br = self._browser
|
||||||
if self.use_search_engine:
|
if br is None:
|
||||||
if self._browser is None:
|
ua = 'Mobile '
|
||||||
|
while not user_agent_is_ok(ua):
|
||||||
ua = random_user_agent(allow_ie=False)
|
ua = random_user_agent(allow_ie=False)
|
||||||
self._browser = br = browser(user_agent=ua)
|
# ua = 'Mozilla/5.0 (Linux; Android 8.0.0; VTR-L29; rv:63.0) Gecko/20100101 Firefox/63.0'
|
||||||
br.set_handle_gzip(True)
|
self._browser = br = browser(user_agent=ua)
|
||||||
|
br.set_handle_gzip(True)
|
||||||
|
if self.use_search_engine:
|
||||||
br.addheaders += [
|
br.addheaders += [
|
||||||
('Accept', accept_header_for_ua(ua)),
|
('Accept', accept_header_for_ua(ua)),
|
||||||
('Upgrade-insecure-requests', '1'),
|
('Upgrade-insecure-requests', '1'),
|
||||||
]
|
]
|
||||||
br = self._browser
|
else:
|
||||||
else:
|
br.addheaders += [
|
||||||
all_uas = all_user_agents()
|
('Accept', accept_header_for_ua(ua)),
|
||||||
ua_index = (ua_index + 1) % len(all_uas)
|
('Upgrade-insecure-requests', '1'),
|
||||||
ua = all_uas[ua_index]
|
('Referer', self.referrer_for_domain()),
|
||||||
self._browser = br = browser(user_agent=ua)
|
]
|
||||||
br.set_handle_gzip(True)
|
|
||||||
br.addheaders += [
|
|
||||||
('Accept', accept_header_for_ua(ua)),
|
|
||||||
('Upgrade-insecure-requests', '1'),
|
|
||||||
('Referer', self.referrer_for_domain()),
|
|
||||||
]
|
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def save_settings(self, *args, **kwargs):
|
def save_settings(self, *args, **kwargs):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user