Amazon metadata download: Handle cover images loaded via javascript on the amazon.de site

2025-11-27 00:35:00 -05:00 · 2013-03-16 11:03:31 +05:30 · 2013-03-16 11:03:31 +05:30 · 225d78d581
commit 225d78d581
parent b347e5091e
1 changed files with 10 additions and 4 deletions
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -302,7 +302,7 @@ class Worker(Thread): # Get details {{{
            self.log.exception('Error parsing series for url: %r'%self.url)

        try:
-            self.cover_url = self.parse_cover(root)
+            self.cover_url = self.parse_cover(root, raw)
        except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)
@ -450,12 +450,18 @@ class Worker(Thread): # Get details {{{
                    ans = (s, i)
        return ans

-
-    def parse_cover(self, root):
+    def parse_cover(self, root, raw=b""):
        imgs = root.xpath('//img[(@id="prodImage" or @id="original-main-image" or @id="main-image") and @src]')
+        if not imgs:
+            imgs = root.xpath('//div[@class="main-image-inner-wrapper"]/img[@src]')
        if imgs:
            src = imgs[0].get('src')
-            if '/no-image-avail' not in src:
+            if 'loading-' in src:
+                js_img = re.search(br'"largeImage":"(http://[^"]+)",',raw)
+                if js_img:
+                    src = js_img.group(1).decode('utf-8')
+            if ('/no-image-avail' not in src and 'loading-' not in src):
+                self.log('Found image: %s' % src)
                parts = src.split('/')
                if len(parts) > 3:
                    bn = parts[-1]