Amazon metadata download: Handle cover images loaded via javascript on the amazon.de site

This commit is contained in:
Kovid Goyal 2013-03-16 11:03:31 +05:30
parent b347e5091e
commit 225d78d581

View File

@ -302,7 +302,7 @@ class Worker(Thread): # Get details {{{
self.log.exception('Error parsing series for url: %r'%self.url) self.log.exception('Error parsing series for url: %r'%self.url)
try: try:
self.cover_url = self.parse_cover(root) self.cover_url = self.parse_cover(root, raw)
except: except:
self.log.exception('Error parsing cover for url: %r'%self.url) self.log.exception('Error parsing cover for url: %r'%self.url)
mi.has_cover = bool(self.cover_url) mi.has_cover = bool(self.cover_url)
@ -450,12 +450,18 @@ class Worker(Thread): # Get details {{{
ans = (s, i) ans = (s, i)
return ans return ans
def parse_cover(self, root, raw=b""):
def parse_cover(self, root):
imgs = root.xpath('//img[(@id="prodImage" or @id="original-main-image" or @id="main-image") and @src]') imgs = root.xpath('//img[(@id="prodImage" or @id="original-main-image" or @id="main-image") and @src]')
if not imgs:
imgs = root.xpath('//div[@class="main-image-inner-wrapper"]/img[@src]')
if imgs: if imgs:
src = imgs[0].get('src') src = imgs[0].get('src')
if '/no-image-avail' not in src: if 'loading-' in src:
js_img = re.search(br'"largeImage":"(http://[^"]+)",',raw)
if js_img:
src = js_img.group(1).decode('utf-8')
if ('/no-image-avail' not in src and 'loading-' not in src):
self.log('Found image: %s' % src)
parts = src.split('/') parts = src.split('/')
if len(parts) > 3: if len(parts) > 3:
bn = parts[-1] bn = parts[-1]