mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Amazon metadata download: Support for yet another variant of amazon cover image markup
This commit is contained in:
parent
b91f42ac8d
commit
a623357b0c
@ -557,11 +557,11 @@ class Worker(Thread): # Get details {{{
|
|||||||
def parse_cover(self, root, raw=b""):
|
def parse_cover(self, root, raw=b""):
|
||||||
# Look for the image URL in javascript, using the first image in the
|
# Look for the image URL in javascript, using the first image in the
|
||||||
# image gallery as the cover
|
# image gallery as the cover
|
||||||
|
import json
|
||||||
imgpat = re.compile(r"""'imageGalleryData'\s*:\s*(\[\s*{.+])""")
|
imgpat = re.compile(r"""'imageGalleryData'\s*:\s*(\[\s*{.+])""")
|
||||||
for script in root.xpath('//script'):
|
for script in root.xpath('//script'):
|
||||||
m = imgpat.search(script.text or '')
|
m = imgpat.search(script.text or '')
|
||||||
if m is not None:
|
if m is not None:
|
||||||
import json
|
|
||||||
try:
|
try:
|
||||||
return json.loads(m.group(1))[0]['mainUrl']
|
return json.loads(m.group(1))[0]['mainUrl']
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -590,6 +590,25 @@ class Worker(Thread): # Get details {{{
|
|||||||
imgs = root.xpath('//div[@class="main-image-inner-wrapper"]/img[@src]')
|
imgs = root.xpath('//div[@class="main-image-inner-wrapper"]/img[@src]')
|
||||||
if not imgs:
|
if not imgs:
|
||||||
imgs = root.xpath('//div[@id="main-image-container"]//img[@src]')
|
imgs = root.xpath('//div[@id="main-image-container"]//img[@src]')
|
||||||
|
if not imgs:
|
||||||
|
imgs = root.xpath('//div[@id="mainImageContainer"]//img[@data-a-dynamic-image]')
|
||||||
|
for img in imgs:
|
||||||
|
try:
|
||||||
|
idata = json.loads(img.get('data-a-dynamic-image'))
|
||||||
|
except Exception:
|
||||||
|
imgs = ()
|
||||||
|
else:
|
||||||
|
mwidth = 0
|
||||||
|
try:
|
||||||
|
url = None
|
||||||
|
for iurl, (width, height) in idata.iteritems():
|
||||||
|
if width > mwidth:
|
||||||
|
mwidth = width
|
||||||
|
url = iurl
|
||||||
|
return url
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
for img in imgs:
|
for img in imgs:
|
||||||
src = img.get('src')
|
src = img.get('src')
|
||||||
if 'data:' in src:
|
if 'data:' in src:
|
||||||
@ -972,7 +991,6 @@ class Amazon(Source):
|
|||||||
if udata is not None:
|
if udata is not None:
|
||||||
# Try to directly get details page instead of running a search
|
# Try to directly get details page instead of running a search
|
||||||
domain, idtype, asin, durl = udata
|
domain, idtype, asin, durl = udata
|
||||||
durl = 'http://www.amazon.com/gp/product/' + asin
|
|
||||||
preparsed_root = parse_details_page(durl, log, timeout, br, domain)
|
preparsed_root = parse_details_page(durl, log, timeout, br, domain)
|
||||||
if preparsed_root is not None:
|
if preparsed_root is not None:
|
||||||
qasin = parse_asin(preparsed_root[1], log, durl)
|
qasin = parse_asin(preparsed_root[1], log, durl)
|
||||||
@ -1123,11 +1141,9 @@ if __name__ == '__main__': # tests {{{
|
|||||||
),
|
),
|
||||||
|
|
||||||
( # + in title and uses id="main-image" for cover
|
( # + in title and uses id="main-image" for cover
|
||||||
{'title':'C++ Concurrency in Action'},
|
{'identifiers':{'amazon':'1933988770'}},
|
||||||
[title_test('C++ Concurrency in Action: Practical Multithreading',
|
[title_test('C++ Concurrency in Action: Practical Multithreading', exact=True)]
|
||||||
exact=True),
|
),
|
||||||
]
|
|
||||||
),
|
|
||||||
|
|
||||||
|
|
||||||
( # noscript description
|
( # noscript description
|
||||||
|
Loading…
x
Reference in New Issue
Block a user