Deal with Amazon changing their websites to use (randomly?) two different html constructs around the format text

2025-07-09 03:04:10 -04:00 · 2015-03-08 12:44:15 +01:00 · 2015-03-08 12:44:15 +01:00 · 993b9f6bcb
commit 993b9f6bcb
parent 0ca49a65f1
5 changed files with 46 additions and 20 deletions
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 6 # Needed for dynamic plugin loading
+store_version = 7 # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -59,6 +59,7 @@ class AmazonDEKindleStore(StorePlugin):
            allText = f.read()
            doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
            format_xpath2 = ''
            if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
                #print('grid form')
                data_xpath = '//div[contains(@class, "prod")]'
@ -89,8 +90,8 @@ class AmazonDEKindleStore(StorePlugin):
            elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
                #print('list form')
                data_xpath = '//li[@class="s-result-item"]'
-                format_xpath = (
+                format_xpath = './/a[contains(@class, "a-size-small")]/text()'
-                        './/h3[contains(@class, "s-inline")]/text()')
+                format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
                asin_xpath = '@data-asin'
                cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
                title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
@ -115,7 +116,11 @@ class AmazonDEKindleStore(StorePlugin):
                # if it isn't.
                format_ = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format_.lower():
-                    continue
+                    if format_xpath2:
                        format_ = ''.join(data.xpath(format_xpath2))
                        if 'kindle' not in format_.lower():
                            # print(etree.tostring(data, pretty_print=True))
                            continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
--- a/src/calibre/gui2/store/stores/amazon_es_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 6 # Needed for dynamic plugin loading
+store_version = 7 # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -58,6 +58,7 @@ class AmazonESKindleStore(StorePlugin):
            allText = f.read()
            doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
            format_xpath2 = ''
            if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
                #print('grid form')
                data_xpath = '//div[contains(@class, "prod")]'
@ -88,8 +89,8 @@ class AmazonESKindleStore(StorePlugin):
            elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
                #print('list form')
                data_xpath = '//li[@class="s-result-item"]'
-                format_xpath = (
+                format_xpath = './/a[contains(@class, "a-size-small")]/text()'
-                        './/h3[contains(@class, "s-inline")]/text()')
+                format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
                asin_xpath = '@data-asin'
                cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
                title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
@ -114,7 +115,11 @@ class AmazonESKindleStore(StorePlugin):
                # if it isn't.
                format_ = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format_.lower():
-                    continue
+                    if format_xpath2:
                        format_ = ''.join(data.xpath(format_xpath2))
                        if 'kindle' not in format_.lower():
                            # print(etree.tostring(data, pretty_print=True))
                            continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
--- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 6 # Needed for dynamic plugin loading
+store_version = 7 # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -55,6 +55,7 @@ class AmazonFRKindleStore(StorePlugin):
            allText = f.read()
            doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
            format_xpath2 = ''
            if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
                #print('grid form')
                data_xpath = '//div[contains(@class, "prod")]'
@ -85,8 +86,8 @@ class AmazonFRKindleStore(StorePlugin):
            elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
                #print('list form')
                data_xpath = '//li[@class="s-result-item"]'
-                format_xpath = (
+                format_xpath = './/a[contains(@class, "a-size-small")]/text()'
-                        './/h3[contains(@class, "s-inline")]/text()')
+                format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
                asin_xpath = '@data-asin'
                cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
                title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
@ -111,7 +112,11 @@ class AmazonFRKindleStore(StorePlugin):
                # if it isn't.
                format_ = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format_.lower():
-                    continue
+                    if format_xpath2:
                        format_ = ''.join(data.xpath(format_xpath2))
                        if 'kindle' not in format_.lower():
                            # print(etree.tostring(data, pretty_print=True))
                            continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
--- a/src/calibre/gui2/store/stores/amazon_it_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 6 # Needed for dynamic plugin loading
+store_version = 7 # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -58,6 +58,7 @@ class AmazonITKindleStore(StorePlugin):
            allText = f.read()
            doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
            format_xpath2 = ''
            if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
                #print('grid form')
                data_xpath = '//div[contains(@class, "prod")]'
@ -88,8 +89,8 @@ class AmazonITKindleStore(StorePlugin):
            elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
                #print('list form')
                data_xpath = '//li[@class="s-result-item"]'
-                format_xpath = (
+                format_xpath = './/a[contains(@class, "a-size-small")]/text()'
-                        './/h3[contains(@class, "s-inline")]/text()')
+                format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
                asin_xpath = '@data-asin'
                cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
                title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
@ -114,7 +115,11 @@ class AmazonITKindleStore(StorePlugin):
                # if it isn't.
                format_ = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format_.lower():
-                    continue
+                    if format_xpath2:
                        format_ = ''.join(data.xpath(format_xpath2))
                        if 'kindle' not in format_.lower():
                            # print(etree.tostring(data, pretty_print=True))
                            continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 6 # Needed for dynamic plugin loading
+store_version = 7 # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
 import re
 from contextlib import closing
 from lxml import html
 # from lxml import html
 from PyQt5.Qt import QUrl
@ -62,6 +63,7 @@ class AmazonUKKindleStore(StorePlugin):
            allText = f.read()
            doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
            format_xpath2 = ''
            if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
                #print('grid form')
                data_xpath = '//div[contains(@class, "prod")]'
@ -92,8 +94,8 @@ class AmazonUKKindleStore(StorePlugin):
            elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
                #print('list form')
                data_xpath = '//li[@class="s-result-item"]'
-                format_xpath = (
+                format_xpath = './/a[contains(@class, "a-size-small")]/text()'
-                        './/h3[contains(@class, "s-inline")]/text()')
+                format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
                asin_xpath = '@data-asin'
                cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
                title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
@ -118,7 +120,11 @@ class AmazonUKKindleStore(StorePlugin):
                # if it isn't.
                format_ = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format_.lower():
-                    continue
+                    if format_xpath2:
                        format_ = ''.join(data.xpath(format_xpath2))
                        if 'kindle' not in format_.lower():
                            # print(etree.tostring(data, pretty_print=True))
                            continue
                # We must have an asin otherwise we can't easily reference the
                # book later.