From 993b9f6bcb1024aaab7ec5c795ae43d8b88ad8d5 Mon Sep 17 00:00:00 2001 From: Charles Haley Date: Sun, 8 Mar 2015 12:44:15 +0100 Subject: [PATCH] Deal with Amazon changing their websites to use (randomly?) two different html constructs around the format text --- src/calibre/gui2/store/stores/amazon_de_plugin.py | 13 +++++++++---- src/calibre/gui2/store/stores/amazon_es_plugin.py | 13 +++++++++---- src/calibre/gui2/store/stores/amazon_fr_plugin.py | 13 +++++++++---- src/calibre/gui2/store/stores/amazon_it_plugin.py | 13 +++++++++---- src/calibre/gui2/store/stores/amazon_uk_plugin.py | 14 ++++++++++---- 5 files changed, 46 insertions(+), 20 deletions(-) diff --git a/src/calibre/gui2/store/stores/amazon_de_plugin.py b/src/calibre/gui2/store/stores/amazon_de_plugin.py index 898746dfd2..128410a9d4 100644 --- a/src/calibre/gui2/store/stores/amazon_de_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 6 # Needed for dynamic plugin loading +store_version = 7 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -59,6 +59,7 @@ class AmazonDEKindleStore(StorePlugin): allText = f.read() doc = html.fromstring(allText)#.decode('latin-1', 'replace')) + format_xpath2 = '' if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'): #print('grid form') data_xpath = '//div[contains(@class, "prod")]' @@ -89,8 +90,8 @@ class AmazonDEKindleStore(StorePlugin): elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'): #print('list form') data_xpath = '//li[@class="s-result-item"]' - format_xpath = ( - './/h3[contains(@class, "s-inline")]/text()') + format_xpath = './/a[contains(@class, "a-size-small")]/text()' + format_xpath2 = './/h3[contains(@class, "s-inline")]/text()' asin_xpath = '@data-asin' cover_xpath = './/img[contains(@class, "cfMarker")]/@src' title_xpath = './/h2[contains(@class, "s-access-title")]/text()' @@ -115,7 +116,11 @@ class AmazonDEKindleStore(StorePlugin): # if it isn't. format_ = ''.join(data.xpath(format_xpath)) if 'kindle' not in format_.lower(): - continue + if format_xpath2: + format_ = ''.join(data.xpath(format_xpath2)) + if 'kindle' not in format_.lower(): + # print(etree.tostring(data, pretty_print=True)) + continue # We must have an asin otherwise we can't easily reference the # book later. diff --git a/src/calibre/gui2/store/stores/amazon_es_plugin.py b/src/calibre/gui2/store/stores/amazon_es_plugin.py index f094b34f48..05d20368c0 100644 --- a/src/calibre/gui2/store/stores/amazon_es_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 6 # Needed for dynamic plugin loading +store_version = 7 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -58,6 +58,7 @@ class AmazonESKindleStore(StorePlugin): allText = f.read() doc = html.fromstring(allText)#.decode('latin-1', 'replace')) + format_xpath2 = '' if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'): #print('grid form') data_xpath = '//div[contains(@class, "prod")]' @@ -88,8 +89,8 @@ class AmazonESKindleStore(StorePlugin): elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'): #print('list form') data_xpath = '//li[@class="s-result-item"]' - format_xpath = ( - './/h3[contains(@class, "s-inline")]/text()') + format_xpath = './/a[contains(@class, "a-size-small")]/text()' + format_xpath2 = './/h3[contains(@class, "s-inline")]/text()' asin_xpath = '@data-asin' cover_xpath = './/img[contains(@class, "cfMarker")]/@src' title_xpath = './/h2[contains(@class, "s-access-title")]/text()' @@ -114,7 +115,11 @@ class AmazonESKindleStore(StorePlugin): # if it isn't. format_ = ''.join(data.xpath(format_xpath)) if 'kindle' not in format_.lower(): - continue + if format_xpath2: + format_ = ''.join(data.xpath(format_xpath2)) + if 'kindle' not in format_.lower(): + # print(etree.tostring(data, pretty_print=True)) + continue # We must have an asin otherwise we can't easily reference the # book later. diff --git a/src/calibre/gui2/store/stores/amazon_fr_plugin.py b/src/calibre/gui2/store/stores/amazon_fr_plugin.py index f953ab4802..17a2e4e7bb 100644 --- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 6 # Needed for dynamic plugin loading +store_version = 7 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -55,6 +55,7 @@ class AmazonFRKindleStore(StorePlugin): allText = f.read() doc = html.fromstring(allText)#.decode('latin-1', 'replace')) + format_xpath2 = '' if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'): #print('grid form') data_xpath = '//div[contains(@class, "prod")]' @@ -85,8 +86,8 @@ class AmazonFRKindleStore(StorePlugin): elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'): #print('list form') data_xpath = '//li[@class="s-result-item"]' - format_xpath = ( - './/h3[contains(@class, "s-inline")]/text()') + format_xpath = './/a[contains(@class, "a-size-small")]/text()' + format_xpath2 = './/h3[contains(@class, "s-inline")]/text()' asin_xpath = '@data-asin' cover_xpath = './/img[contains(@class, "cfMarker")]/@src' title_xpath = './/h2[contains(@class, "s-access-title")]/text()' @@ -111,7 +112,11 @@ class AmazonFRKindleStore(StorePlugin): # if it isn't. format_ = ''.join(data.xpath(format_xpath)) if 'kindle' not in format_.lower(): - continue + if format_xpath2: + format_ = ''.join(data.xpath(format_xpath2)) + if 'kindle' not in format_.lower(): + # print(etree.tostring(data, pretty_print=True)) + continue # We must have an asin otherwise we can't easily reference the # book later. diff --git a/src/calibre/gui2/store/stores/amazon_it_plugin.py b/src/calibre/gui2/store/stores/amazon_it_plugin.py index bcbbd4c384..382bb74714 100644 --- a/src/calibre/gui2/store/stores/amazon_it_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 6 # Needed for dynamic plugin loading +store_version = 7 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -58,6 +58,7 @@ class AmazonITKindleStore(StorePlugin): allText = f.read() doc = html.fromstring(allText)#.decode('latin-1', 'replace')) + format_xpath2 = '' if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'): #print('grid form') data_xpath = '//div[contains(@class, "prod")]' @@ -88,8 +89,8 @@ class AmazonITKindleStore(StorePlugin): elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'): #print('list form') data_xpath = '//li[@class="s-result-item"]' - format_xpath = ( - './/h3[contains(@class, "s-inline")]/text()') + format_xpath = './/a[contains(@class, "a-size-small")]/text()' + format_xpath2 = './/h3[contains(@class, "s-inline")]/text()' asin_xpath = '@data-asin' cover_xpath = './/img[contains(@class, "cfMarker")]/@src' title_xpath = './/h2[contains(@class, "s-access-title")]/text()' @@ -114,7 +115,11 @@ class AmazonITKindleStore(StorePlugin): # if it isn't. format_ = ''.join(data.xpath(format_xpath)) if 'kindle' not in format_.lower(): - continue + if format_xpath2: + format_ = ''.join(data.xpath(format_xpath2)) + if 'kindle' not in format_.lower(): + # print(etree.tostring(data, pretty_print=True)) + continue # We must have an asin otherwise we can't easily reference the # book later. diff --git a/src/calibre/gui2/store/stores/amazon_uk_plugin.py b/src/calibre/gui2/store/stores/amazon_uk_plugin.py index 460c8973c1..ad78bb7cc9 100644 --- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 6 # Needed for dynamic plugin loading +store_version = 7 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en' import re from contextlib import closing from lxml import html +# from lxml import html from PyQt5.Qt import QUrl @@ -62,6 +63,7 @@ class AmazonUKKindleStore(StorePlugin): allText = f.read() doc = html.fromstring(allText)#.decode('latin-1', 'replace')) + format_xpath2 = '' if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'): #print('grid form') data_xpath = '//div[contains(@class, "prod")]' @@ -92,8 +94,8 @@ class AmazonUKKindleStore(StorePlugin): elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'): #print('list form') data_xpath = '//li[@class="s-result-item"]' - format_xpath = ( - './/h3[contains(@class, "s-inline")]/text()') + format_xpath = './/a[contains(@class, "a-size-small")]/text()' + format_xpath2 = './/h3[contains(@class, "s-inline")]/text()' asin_xpath = '@data-asin' cover_xpath = './/img[contains(@class, "cfMarker")]/@src' title_xpath = './/h2[contains(@class, "s-access-title")]/text()' @@ -118,7 +120,11 @@ class AmazonUKKindleStore(StorePlugin): # if it isn't. format_ = ''.join(data.xpath(format_xpath)) if 'kindle' not in format_.lower(): - continue + if format_xpath2: + format_ = ''.join(data.xpath(format_xpath2)) + if 'kindle' not in format_.lower(): + # print(etree.tostring(data, pretty_print=True)) + continue # We must have an asin otherwise we can't easily reference the # book later.