diff --git a/src/calibre/gui2/store/stores/amazon_de_plugin.py b/src/calibre/gui2/store/stores/amazon_de_plugin.py index 71ed8b0491..06bc571494 100644 --- a/src/calibre/gui2/store/stores/amazon_de_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py @@ -7,6 +7,7 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' +import re from contextlib import closing from lxml import html @@ -49,7 +50,7 @@ class AmazonEUBase(StorePlugin): asin_xpath = '@name' cover_xpath = './/img[@class="productImage"]/@src' title_xpath = './/h3[@class="newaps"]/a//text()' - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' for data in doc.xpath(data_xpath): @@ -57,7 +58,7 @@ class AmazonEUBase(StorePlugin): break # Even though we are searching digital-text only Amazon will still - # put in results for non Kindle books (author pages). Se we need + # put in results for non Kindle books (authors pages). Se we need # to explicitly check if the item is a Kindle book and ignore it # if it isn't. format_ = ''.join(data.xpath(format_xpath)) @@ -75,12 +76,13 @@ class AmazonEUBase(StorePlugin): cover_url = ''.join(data.xpath(cover_xpath)) title = ''.join(data.xpath(title_xpath)) - author = ''.join(data.xpath(author_xpath)) - try: - if self.author_article: - author = author.split(self.author_article, 1)[1].split(" (")[0] - except: - pass + + authors = ''.join(data.xpath(author_xpath)) + authors = re.sub('^' + self.author_article, '', authors) + authors = re.sub(self.and_word, ' & ', authors) + mo = re.match(r'(.*)(\(\d.*)$', authors) + if mo: + authors = mo.group(1).strip() price = ''.join(data.xpath(price_xpath)) @@ -89,7 +91,7 @@ class AmazonEUBase(StorePlugin): s = SearchResult() s.cover_url = cover_url.strip() s.title = title.strip() - s.author = author.strip() + s.author = authors.strip() s.price = price.strip() s.detail_item = asin.strip() s.drm = SearchResult.DRM_UNKNOWN @@ -115,3 +117,5 @@ class AmazonDEKindleStore(AmazonEUBase): search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords=' author_article = 'von ' + + and_word = ' und ' \ No newline at end of file diff --git a/src/calibre/gui2/store/stores/amazon_es_plugin.py b/src/calibre/gui2/store/stores/amazon_es_plugin.py index d613ced2a5..0254b953c4 100644 --- a/src/calibre/gui2/store/stores/amazon_es_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py @@ -7,6 +7,7 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' +import re from contextlib import closing from lxml import html @@ -48,7 +49,7 @@ class AmazonEUBase(StorePlugin): asin_xpath = '@name' cover_xpath = './/img[@class="productImage"]/@src' title_xpath = './/h3[@class="newaps"]/a//text()' - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' for data in doc.xpath(data_xpath): @@ -56,7 +57,7 @@ class AmazonEUBase(StorePlugin): break # Even though we are searching digital-text only Amazon will still - # put in results for non Kindle books (author pages). Se we need + # put in results for non Kindle books (authors pages). Se we need # to explicitly check if the item is a Kindle book and ignore it # if it isn't. format_ = ''.join(data.xpath(format_xpath)) @@ -74,12 +75,13 @@ class AmazonEUBase(StorePlugin): cover_url = ''.join(data.xpath(cover_xpath)) title = ''.join(data.xpath(title_xpath)) - author = ''.join(data.xpath(author_xpath)) - try: - if self.author_article: - author = author.split(self.author_article, 1)[1].split(" (")[0] - except: - pass + + authors = ''.join(data.xpath(author_xpath)) + authors = re.sub('^' + self.author_article, '', authors) + authors = re.sub(self.and_word, ' & ', authors) + mo = re.match(r'(.*)(\(\d.*)$', authors) + if mo: + authors = mo.group(1).strip() price = ''.join(data.xpath(price_xpath)) @@ -88,7 +90,7 @@ class AmazonEUBase(StorePlugin): s = SearchResult() s.cover_url = cover_url.strip() s.title = title.strip() - s.author = author.strip() + s.author = authors.strip() s.price = price.strip() s.detail_item = asin.strip() s.drm = SearchResult.DRM_UNKNOWN @@ -113,3 +115,5 @@ class AmazonESKindleStore(AmazonEUBase): search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords=' author_article = 'de ' + + and_word = ' y ' \ No newline at end of file diff --git a/src/calibre/gui2/store/stores/amazon_fr_plugin.py b/src/calibre/gui2/store/stores/amazon_fr_plugin.py index 22e5d8ec8e..30f6b6f51e 100644 --- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py @@ -7,7 +7,7 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' - +import re from contextlib import closing from lxml import html @@ -50,7 +50,7 @@ class AmazonEUBase(StorePlugin): asin_xpath = '@name' cover_xpath = './/img[@class="productImage"]/@src' title_xpath = './/h3[@class="newaps"]/a//text()' - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' for data in doc.xpath(data_xpath): @@ -58,7 +58,7 @@ class AmazonEUBase(StorePlugin): break # Even though we are searching digital-text only Amazon will still - # put in results for non Kindle books (author pages). Se we need + # put in results for non Kindle books (authors pages). Se we need # to explicitly check if the item is a Kindle book and ignore it # if it isn't. format_ = ''.join(data.xpath(format_xpath)) @@ -76,12 +76,13 @@ class AmazonEUBase(StorePlugin): cover_url = ''.join(data.xpath(cover_xpath)) title = ''.join(data.xpath(title_xpath)) - author = ''.join(data.xpath(author_xpath)) - try: - if self.author_article: - author = author.split(self.author_article, 1)[1].split(" (")[0] - except: - pass + + authors = ''.join(data.xpath(author_xpath)) + authors = re.sub('^' + self.author_article, '', authors) + authors = re.sub(self.and_word, ' & ', authors) + mo = re.match(r'(.*)(\(\d.*)$', authors) + if mo: + authors = mo.group(1).strip() price = ''.join(data.xpath(price_xpath)) @@ -90,7 +91,7 @@ class AmazonEUBase(StorePlugin): s = SearchResult() s.cover_url = cover_url.strip() s.title = title.strip() - s.author = author.strip() + s.author = authors.strip() s.price = price.strip() s.detail_item = asin.strip() s.drm = SearchResult.DRM_UNKNOWN @@ -112,3 +113,5 @@ class AmazonFRKindleStore(AmazonEUBase): search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords=' author_article = 'de ' + + and_word = ' et ' diff --git a/src/calibre/gui2/store/stores/amazon_it_plugin.py b/src/calibre/gui2/store/stores/amazon_it_plugin.py index 14c571e8e1..53028cf192 100644 --- a/src/calibre/gui2/store/stores/amazon_it_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py @@ -7,6 +7,7 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' +import re from contextlib import closing from lxml import html @@ -48,7 +49,7 @@ class AmazonEUBase(StorePlugin): asin_xpath = '@name' cover_xpath = './/img[@class="productImage"]/@src' title_xpath = './/h3[@class="newaps"]/a//text()' - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' for data in doc.xpath(data_xpath): @@ -56,7 +57,7 @@ class AmazonEUBase(StorePlugin): break # Even though we are searching digital-text only Amazon will still - # put in results for non Kindle books (author pages). Se we need + # put in results for non Kindle books (authors pages). Se we need # to explicitly check if the item is a Kindle book and ignore it # if it isn't. format_ = ''.join(data.xpath(format_xpath)) @@ -74,12 +75,13 @@ class AmazonEUBase(StorePlugin): cover_url = ''.join(data.xpath(cover_xpath)) title = ''.join(data.xpath(title_xpath)) - author = ''.join(data.xpath(author_xpath)) - try: - if self.author_article: - author = author.split(self.author_article, 1)[1].split(" (")[0] - except: - pass + + authors = ''.join(data.xpath(author_xpath)) + authors = re.sub('^' + self.author_article, '', authors) + authors = re.sub(self.and_word, ' & ', authors) + mo = re.match(r'(.*)(\(\d.*)$', authors) + if mo: + authors = mo.group(1).strip() price = ''.join(data.xpath(price_xpath)) @@ -88,7 +90,7 @@ class AmazonEUBase(StorePlugin): s = SearchResult() s.cover_url = cover_url.strip() s.title = title.strip() - s.author = author.strip() + s.author = authors.strip() s.price = price.strip() s.detail_item = asin.strip() s.drm = SearchResult.DRM_UNKNOWN @@ -99,7 +101,6 @@ class AmazonEUBase(StorePlugin): def get_details(self, search_result, timeout): pass - class AmazonITKindleStore(AmazonEUBase): ''' For comments on the implementation, please see amazon_plugin.py @@ -114,3 +115,5 @@ class AmazonITKindleStore(AmazonEUBase): search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords=' author_article = 'di ' + + and_word = ' e ' \ No newline at end of file diff --git a/src/calibre/gui2/store/stores/amazon_uk_plugin.py b/src/calibre/gui2/store/stores/amazon_uk_plugin.py index 0abc19f92e..b5951a533f 100644 --- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py @@ -7,6 +7,7 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' +import re from contextlib import closing from lxml import html @@ -48,7 +49,7 @@ class AmazonEUBase(StorePlugin): asin_xpath = '@name' cover_xpath = './/img[@class="productImage"]/@src' title_xpath = './/h3[@class="newaps"]/a//text()' - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' for data in doc.xpath(data_xpath): @@ -56,7 +57,7 @@ class AmazonEUBase(StorePlugin): break # Even though we are searching digital-text only Amazon will still - # put in results for non Kindle books (author pages). Se we need + # put in results for non Kindle books (authors pages). Se we need # to explicitly check if the item is a Kindle book and ignore it # if it isn't. format_ = ''.join(data.xpath(format_xpath)) @@ -74,12 +75,13 @@ class AmazonEUBase(StorePlugin): cover_url = ''.join(data.xpath(cover_xpath)) title = ''.join(data.xpath(title_xpath)) - author = ''.join(data.xpath(author_xpath)) - try: - if self.author_article: - author = author.split(self.author_article, 1)[1].split(" (")[0] - except: - pass + + authors = ''.join(data.xpath(author_xpath)) + authors = re.sub('^' + self.author_article, '', authors) + authors = re.sub(self.and_word, ' & ', authors) + mo = re.match(r'(.*)(\(\d.*)$', authors) + if mo: + authors = mo.group(1).strip() price = ''.join(data.xpath(price_xpath)) @@ -88,7 +90,7 @@ class AmazonEUBase(StorePlugin): s = SearchResult() s.cover_url = cover_url.strip() s.title = title.strip() - s.author = author.strip() + s.author = authors.strip() s.price = price.strip() s.detail_item = asin.strip() s.drm = SearchResult.DRM_UNKNOWN @@ -112,3 +114,5 @@ class AmazonUKKindleStore(AmazonEUBase): author_article = 'by ' + and_word = ' and ' + diff --git a/src/calibre/gui2/store/stores/foyles_uk_plugin.py b/src/calibre/gui2/store/stores/foyles_uk_plugin.py index 7c224f4f70..199b15bc2e 100644 --- a/src/calibre/gui2/store/stores/foyles_uk_plugin.py +++ b/src/calibre/gui2/store/stores/foyles_uk_plugin.py @@ -41,7 +41,7 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin): d.exec_() def search(self, query, max_results=10, timeout=60): - url = 'http://ebooks.foyles.co.uk/search_for-' + urllib2.quote(query) + url = 'http://ebooks.foyles.co.uk/catalog/search/?query=' + urllib2.quote(query) br = browser() @@ -58,7 +58,7 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin): cover_url = ''.join(data.xpath('.//p[@class="doc-cover"]/a/img/@src')) title = ''.join(data.xpath('.//span[@class="title"]/a/text()')) author = ', '.join(data.xpath('.//span[@class="author"]/span[@class="author"]/text()')) - price = ''.join(data.xpath('.//span[@class="price"]/text()')) + price = ''.join(data.xpath('.//span[@itemprop="price"]/text()')) format_ = ''.join(data.xpath('.//p[@class="doc-meta-format"]/span[last()]/text()')) format_, ign, drm = format_.partition(' ') drm = SearchResult.DRM_LOCKED if 'DRM' in drm else SearchResult.DRM_UNLOCKED