Get Books: Update the amazon, waterstones and libri.de plugins to account for website changes

2025-11-13 10:06:59 -05:00 · 2013-01-16 08:55:23 +05:30 · 2013-01-16 08:55:23 +05:30 · 76582f2fe3
commit 76582f2fe3
parent 190af68ca6 21700463ce
8 changed files with 402 additions and 46 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1471,9 +1471,9 @@ class StoreLegimiStore(StoreBase):
    affiliate = True

 class StoreLibreDEStore(StoreBase):
-    name = 'Libri DE'
+    name = 'ebook.de'
    author = 'Charles Haley'
-    description = u'Sicher Bücher, Hörbücher und Downloads online bestellen.'
+    description = u'All Ihre Bücher immer dabei. Suchen, finden, kaufen: so einfach wie nie. ebook.de war libre.de'
    actual_plugin = 'calibre.gui2.store.stores.libri_de_plugin:LibreDEStore'

    headquarters = 'DE'
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
@ -7,9 +7,100 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
+from contextlib import closing
+from lxml import html

-class AmazonDEKindleStore(AmazonUKKindleStore):
+from PyQt4.Qt import QUrl
+
+from calibre.gui2.store import StorePlugin
+from calibre import browser
+from calibre.gui2 import open_url
+from calibre.gui2.store.search_result import SearchResult
+
+
+
+# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
+# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
+# when modified.
+
+class AmazonEUBase(StorePlugin):
+    '''
+    For comments on the implementation, please see amazon_plugin.py
+    '''
+
+    def open(self, parent=None, detail_item=None, external=False):
+
+        store_link = self.store_link % self.aff_id
+        if detail_item:
+            self.aff_id['asin'] = detail_item
+            store_link = self.store_link_details % self.aff_id
+        open_url(QUrl(store_link))
+
+    def search(self, query, max_results=10, timeout=60):
+        url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
+        br = browser()
+
+        counter = max_results
+        with closing(br.open(url, timeout=timeout)) as f:
+            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
+
+            data_xpath = '//div[contains(@class, "prod")]'
+            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
+            asin_xpath = '@name'
+            cover_xpath = './/img[@class="productImage"]/@src'
+            title_xpath = './/h3[@class="newaps"]/a//text()'
+            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
+            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
+
+            for data in doc.xpath(data_xpath):
+                if counter <= 0:
+                    break
+
+                # Even though we are searching digital-text only Amazon will still
+                # put in results for non Kindle books (author pages). Se we need
+                # to explicitly check if the item is a Kindle book and ignore it
+                # if it isn't.
+                format_ = ''.join(data.xpath(format_xpath))
+                if 'kindle' not in format_.lower():
+                    continue
+
+                # We must have an asin otherwise we can't easily reference the
+                # book later.
+                asin = data.xpath(asin_xpath)
+                if asin:
+                    asin = asin[0]
+                else:
+                    continue
+
+                cover_url = ''.join(data.xpath(cover_xpath))
+
+                title = ''.join(data.xpath(title_xpath))
+                author = ''.join(data.xpath(author_xpath))
+                try:
+                    if self.author_article:
+                        author = author.split(self.author_article, 1)[1].split(" (")[0]
+                except:
+                    pass
+
+                price = ''.join(data.xpath(price_xpath))
+
+                counter -= 1
+
+                s = SearchResult()
+                s.cover_url = cover_url.strip()
+                s.title = title.strip()
+                s.author = author.strip()
+                s.price = price.strip()
+                s.detail_item = asin.strip()
+                s.drm = SearchResult.DRM_UNKNOWN
+                s.formats = 'Kindle'
+
+                yield s
+
+    def get_details(self, search_result, timeout):
+        pass
+
+class AmazonDEKindleStore(AmazonEUBase):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
--- a/src/calibre/gui2/store/stores/amazon_es_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py
@ -7,9 +7,99 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
+from contextlib import closing
+from lxml import html

-class AmazonESKindleStore(AmazonUKKindleStore):
+from PyQt4.Qt import QUrl
+
+from calibre.gui2.store import StorePlugin
+from calibre import browser
+from calibre.gui2 import open_url
+from calibre.gui2.store.search_result import SearchResult
+
+
+# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
+# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
+# when modified.
+
+class AmazonEUBase(StorePlugin):
+    '''
+    For comments on the implementation, please see amazon_plugin.py
+    '''
+
+    def open(self, parent=None, detail_item=None, external=False):
+
+        store_link = self.store_link % self.aff_id
+        if detail_item:
+            self.aff_id['asin'] = detail_item
+            store_link = self.store_link_details % self.aff_id
+        open_url(QUrl(store_link))
+
+    def search(self, query, max_results=10, timeout=60):
+        url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
+        br = browser()
+
+        counter = max_results
+        with closing(br.open(url, timeout=timeout)) as f:
+            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
+
+            data_xpath = '//div[contains(@class, "prod")]'
+            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
+            asin_xpath = '@name'
+            cover_xpath = './/img[@class="productImage"]/@src'
+            title_xpath = './/h3[@class="newaps"]/a//text()'
+            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
+            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
+
+            for data in doc.xpath(data_xpath):
+                if counter <= 0:
+                    break
+
+                # Even though we are searching digital-text only Amazon will still
+                # put in results for non Kindle books (author pages). Se we need
+                # to explicitly check if the item is a Kindle book and ignore it
+                # if it isn't.
+                format_ = ''.join(data.xpath(format_xpath))
+                if 'kindle' not in format_.lower():
+                    continue
+
+                # We must have an asin otherwise we can't easily reference the
+                # book later.
+                asin = data.xpath(asin_xpath)
+                if asin:
+                    asin = asin[0]
+                else:
+                    continue
+
+                cover_url = ''.join(data.xpath(cover_xpath))
+
+                title = ''.join(data.xpath(title_xpath))
+                author = ''.join(data.xpath(author_xpath))
+                try:
+                    if self.author_article:
+                        author = author.split(self.author_article, 1)[1].split(" (")[0]
+                except:
+                    pass
+
+                price = ''.join(data.xpath(price_xpath))
+
+                counter -= 1
+
+                s = SearchResult()
+                s.cover_url = cover_url.strip()
+                s.title = title.strip()
+                s.author = author.strip()
+                s.price = price.strip()
+                s.detail_item = asin.strip()
+                s.drm = SearchResult.DRM_UNKNOWN
+                s.formats = 'Kindle'
+
+                yield s
+
+    def get_details(self, search_result, timeout):
+        pass
+
+class AmazonESKindleStore(AmazonEUBase):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
--- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py
@ -8,9 +8,100 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'


-from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
+from contextlib import closing
+from lxml import html

-class AmazonFRKindleStore(AmazonUKKindleStore):
+from PyQt4.Qt import QUrl
+
+from calibre.gui2.store import StorePlugin
+from calibre import browser
+from calibre.gui2 import open_url
+from calibre.gui2.store.search_result import SearchResult
+
+
+
+# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
+# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
+# when modified.
+
+class AmazonEUBase(StorePlugin):
+    '''
+    For comments on the implementation, please see amazon_plugin.py
+    '''
+
+    def open(self, parent=None, detail_item=None, external=False):
+
+        store_link = self.store_link % self.aff_id
+        if detail_item:
+            self.aff_id['asin'] = detail_item
+            store_link = self.store_link_details % self.aff_id
+        open_url(QUrl(store_link))
+
+    def search(self, query, max_results=10, timeout=60):
+        url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
+        br = browser()
+
+        counter = max_results
+        with closing(br.open(url, timeout=timeout)) as f:
+            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
+
+            data_xpath = '//div[contains(@class, "prod")]'
+            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
+            asin_xpath = '@name'
+            cover_xpath = './/img[@class="productImage"]/@src'
+            title_xpath = './/h3[@class="newaps"]/a//text()'
+            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
+            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
+
+            for data in doc.xpath(data_xpath):
+                if counter <= 0:
+                    break
+
+                # Even though we are searching digital-text only Amazon will still
+                # put in results for non Kindle books (author pages). Se we need
+                # to explicitly check if the item is a Kindle book and ignore it
+                # if it isn't.
+                format_ = ''.join(data.xpath(format_xpath))
+                if 'kindle' not in format_.lower():
+                    continue
+
+                # We must have an asin otherwise we can't easily reference the
+                # book later.
+                asin = data.xpath(asin_xpath)
+                if asin:
+                    asin = asin[0]
+                else:
+                    continue
+
+                cover_url = ''.join(data.xpath(cover_xpath))
+
+                title = ''.join(data.xpath(title_xpath))
+                author = ''.join(data.xpath(author_xpath))
+                try:
+                    if self.author_article:
+                        author = author.split(self.author_article, 1)[1].split(" (")[0]
+                except:
+                    pass
+
+                price = ''.join(data.xpath(price_xpath))
+
+                counter -= 1
+
+                s = SearchResult()
+                s.cover_url = cover_url.strip()
+                s.title = title.strip()
+                s.author = author.strip()
+                s.price = price.strip()
+                s.detail_item = asin.strip()
+                s.drm = SearchResult.DRM_UNKNOWN
+                s.formats = 'Kindle'
+
+                yield s
+
+    def get_details(self, search_result, timeout):
+        pass
+
+class AmazonFRKindleStore(AmazonEUBase):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
--- a/src/calibre/gui2/store/stores/amazon_it_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py
@ -7,9 +7,100 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
+from contextlib import closing
+from lxml import html

-class AmazonITKindleStore(AmazonUKKindleStore):
+from PyQt4.Qt import QUrl
+
+from calibre.gui2.store import StorePlugin
+from calibre import browser
+from calibre.gui2 import open_url
+from calibre.gui2.store.search_result import SearchResult
+
+
+# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
+# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
+# when modified.
+
+class AmazonEUBase(StorePlugin):
+    '''
+    For comments on the implementation, please see amazon_plugin.py
+    '''
+
+    def open(self, parent=None, detail_item=None, external=False):
+
+        store_link = self.store_link % self.aff_id
+        if detail_item:
+            self.aff_id['asin'] = detail_item
+            store_link = self.store_link_details % self.aff_id
+        open_url(QUrl(store_link))
+
+    def search(self, query, max_results=10, timeout=60):
+        url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
+        br = browser()
+
+        counter = max_results
+        with closing(br.open(url, timeout=timeout)) as f:
+            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
+
+            data_xpath = '//div[contains(@class, "prod")]'
+            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
+            asin_xpath = '@name'
+            cover_xpath = './/img[@class="productImage"]/@src'
+            title_xpath = './/h3[@class="newaps"]/a//text()'
+            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
+            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
+
+            for data in doc.xpath(data_xpath):
+                if counter <= 0:
+                    break
+
+                # Even though we are searching digital-text only Amazon will still
+                # put in results for non Kindle books (author pages). Se we need
+                # to explicitly check if the item is a Kindle book and ignore it
+                # if it isn't.
+                format_ = ''.join(data.xpath(format_xpath))
+                if 'kindle' not in format_.lower():
+                    continue
+
+                # We must have an asin otherwise we can't easily reference the
+                # book later.
+                asin = data.xpath(asin_xpath)
+                if asin:
+                    asin = asin[0]
+                else:
+                    continue
+
+                cover_url = ''.join(data.xpath(cover_xpath))
+
+                title = ''.join(data.xpath(title_xpath))
+                author = ''.join(data.xpath(author_xpath))
+                try:
+                    if self.author_article:
+                        author = author.split(self.author_article, 1)[1].split(" (")[0]
+                except:
+                    pass
+
+                price = ''.join(data.xpath(price_xpath))
+
+                counter -= 1
+
+                s = SearchResult()
+                s.cover_url = cover_url.strip()
+                s.title = title.strip()
+                s.author = author.strip()
+                s.price = price.strip()
+                s.detail_item = asin.strip()
+                s.drm = SearchResult.DRM_UNKNOWN
+                s.formats = 'Kindle'
+
+                yield s
+
+    def get_details(self, search_result, timeout):
+        pass
+
+
+class AmazonITKindleStore(AmazonEUBase):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
--- a/src/calibre/gui2/store/stores/amazon_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_plugin.py
@ -7,8 +7,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-import random
-import re
 from contextlib import closing

 from lxml import html
@ -131,7 +129,7 @@ class AmazonKindleStore(StorePlugin):

            data_xpath = '//div[contains(@class, "prod")]'
            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
-            asin_xpath = './/div[@class="image"]/a[1]'
+            asin_xpath = '@name'
            cover_xpath = './/img[@class="productImage"]/@src'
            title_xpath = './/h3[@class="newaps"]/a//text()'
            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
@ -151,15 +149,9 @@ class AmazonKindleStore(StorePlugin):

                # We must have an asin otherwise we can't easily reference the
                # book later.
-                asin_href = None
-                asin_a = data.xpath(asin_xpath)
-                if asin_a:
-                    asin_href = asin_a[0].get('href', '')
-                    m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
-                    if m:
-                        asin = m.group('asin')
-                    else:
-                        continue
+                asin = data.xpath(asin_xpath)
+                if asin:
+                    asin = asin[0]
                else:
                    continue

--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
@ -7,8 +7,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-import re
-
 from contextlib import closing
 from lxml import html

@ -19,19 +17,12 @@ from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult

-class AmazonUKKindleStore(StorePlugin):
-    aff_id = {'tag': 'calcharles-21'}
-    store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
-                  'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
-                  'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
-                  'linkCode=ur2&camp=1634&creative=19450')
-    store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
-                          'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
-                          'linkCode=ur2&camp=1634&creative=6738')
-    search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='

-    author_article = 'by '
+# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
+# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
+# when modified.

+class AmazonEUBase(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
@ -54,7 +45,7 @@ class AmazonUKKindleStore(StorePlugin):

            data_xpath = '//div[contains(@class, "prod")]'
            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
-            asin_xpath = './/div[@class="image"]/a[1]'
+            asin_xpath = '@name'
            cover_xpath = './/img[@class="productImage"]/@src'
            title_xpath = './/h3[@class="newaps"]/a//text()'
            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
@ -74,15 +65,9 @@ class AmazonUKKindleStore(StorePlugin):

                # We must have an asin otherwise we can't easily reference the
                # book later.
-                asin_href = None
-                asin_a = data.xpath(asin_xpath)
-                if asin_a:
-                    asin_href = asin_a[0].get('href', '')
-                    m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
-                    if m:
-                        asin = m.group('asin')
-                    else:
-                        continue
+                asin = data.xpath(asin_xpath)
+                if asin:
+                    asin = asin[0]
                else:
                    continue

@ -113,3 +98,17 @@ class AmazonUKKindleStore(StorePlugin):

    def get_details(self, search_result, timeout):
        pass
+
+class AmazonUKKindleStore(AmazonEUBase):
+    aff_id = {'tag': 'calcharles-21'}
+    store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
+                  'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
+                  'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
+                  'linkCode=ur2&camp=1634&creative=19450')
+    store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
+                          'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
+                          'linkCode=ur2&camp=1634&creative=6738')
+    search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
+
+    author_article = 'by '
+
--- a/src/calibre/gui2/store/stores/waterstones_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/waterstones_uk_plugin.py
@ -41,7 +41,7 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
            d.exec_()

    def search(self, query, max_results=10, timeout=60):
-        url = 'http://www.waterstones.com/waterstonesweb/advancedSearch.do?buttonClicked=1&format=3757&bookkeywords=' + urllib2.quote(query)
+        url = 'http://www.waterstones.com/waterstonesweb/simpleSearch.do?simpleSearchString=ebook+' + urllib2.quote(query)

        br = browser()

@ -56,6 +56,8 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
                if not id:
                    continue
                cover_url = ''.join(data.xpath('.//div[@class="image"]/a/img/@src'))
+                if not cover_url.startswith("http"):
+                    cover_url = 'http://www.waterstones.com' + cover_url
                title = ''.join(data.xpath('./div/div/h2/a/text()'))
                author = ', '.join(data.xpath('.//p[@class="byAuthor"]/a/text()'))
                price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceRed2"]/text()'))