From 4ba929734310982ab23bb79055403491a1b09f6b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 25 Aug 2023 11:54:24 +0530 Subject: [PATCH] Get books: Update ebooks.com plugin for website changes. Fixes #2023046 [Get Books unable to find book from ebooks.com](https://bugs.launchpad.net/calibre/+bug/2023046) --- .../gui2/store/stores/ebooks_com_plugin.py | 139 ++++++++++-------- 1 file changed, 74 insertions(+), 65 deletions(-) diff --git a/src/calibre/gui2/store/stores/ebooks_com_plugin.py b/src/calibre/gui2/store/stores/ebooks_com_plugin.py index f82ff0c902..9be545df11 100644 --- a/src/calibre/gui2/store/stores/ebooks_com_plugin.py +++ b/src/calibre/gui2/store/stores/ebooks_com_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals -store_version = 3 # Needed for dynamic plugin loading +store_version = 4 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -9,13 +9,12 @@ __docformat__ = 'restructuredtext en' import re from contextlib import closing + try: from urllib.parse import quote_plus except ImportError: from urllib import quote_plus -from lxml import html - from qt.core import QUrl from calibre import browser, url_slash_cleaner @@ -26,6 +25,66 @@ from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.web_store_dialog import WebStoreDialog +def absolutize(url): + if url.startswith('/'): + url = 'https://www.ebooks.com' + url + return url + + +def search_ec(query, max_results=10, timeout=60, write_html_to=''): + import json + from urllib.parse import parse_qs, urlparse + url = 'https://www.ebooks.com/SearchApp/SearchResults.net?term=' + quote_plus(query) + br = browser() + with closing(br.open(url, timeout=timeout)) as f: + raw = f.read() + if write_html_to: + with open(write_html_to, 'wb') as d: + d.write(raw) + api = re.search(r'data-endpoint="(/api/search/.+?)"', raw.decode('utf-8')).group(1) + counter = max_results + url = absolutize(api) + cc = parse_qs(urlparse(url).query)['CountryCode'][0] + with closing(br.open(url, timeout=timeout)) as f: + raw = f.read() + if write_html_to: + with open(write_html_to + '.json', 'wb') as d: + d.write(raw) + data = json.loads(raw) + for book in data['books']: + if counter <= 0: + break + counter -= 1 + s = SearchResult() + s.cover_url = absolutize(book['image_url']) + s.title = book['title'] + s.author = ' & '.join(x['name'] for x in book['authors']) + s.price = book['price'] + s.detail_item = absolutize(book['book_url']) + s.ebooks_com_api_url = 'https://www.ebooks.com/api/book/?bookId={}&countryCode={}'.format(book["id"], cc) + s.drm = SearchResult.DRM_UNKNOWN + yield s + + +def ec_details(search_result, timeout=30, write_data_to=''): + import json + br = browser() + with closing(br.open(search_result.ebooks_com_api_url, timeout=timeout)) as f: + raw = f.read() + if write_data_to: + with open(write_data_to, 'wb') as d: + d.write(raw) + data = json.loads(raw) + if 'drm' in data and 'drm_free' in data['drm']: + search_result.drm = SearchResult.DRM_UNLOCKED if data['drm']['drm_free'] else SearchResult.DRM_LOCKED + fmts = [] + for x in data['information']['formats']: + x = x.split()[0] + fmts.append(x) + if fmts: + search_result.formats = ', '.join(fmts).upper() + + class EbookscomStore(BasicStoreConfig, StorePlugin): def open(self, parent=None, detail_item=None, external=False): @@ -47,68 +106,18 @@ class EbookscomStore(BasicStoreConfig, StorePlugin): d.exec() def search(self, query, max_results=10, timeout=60): - url = 'http://www.ebooks.com/SearchApp/SearchResults.net?term=' + quote_plus(query) - - br = browser() - - counter = max_results - with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read()) - for data in doc.xpath('//div[@id="results"]//li'): - if counter <= 0: - break - - id = ''.join(data.xpath('.//a[1]/@href')) - mo = re.search(r'\d+', id) - if not mo: - continue - id = mo.group() - - cover_url = ''.join(data.xpath('.//div[contains(@class, "img")]//img/@src')) - - title = ''.join(data.xpath( - 'descendant::span[@class="book-title"]/a/text()')).strip() - author = ', '.join(data.xpath( - 'descendant::span[@class="author"]/a/text()')).strip() - if not title or not author: - continue - - price = ''.join(data.xpath( - './/span[starts-with(text(), "US$") or' - ' starts-with(text(), "€") or starts-with(text(), "CA$") or' - ' starts-with(text(), "AU$") or starts-with(text(), "£")]/text()')).strip() - - counter -= 1 - - s = SearchResult() - s.cover_url = cover_url - s.title = title.strip() - s.author = author.strip() - s.price = price.strip() - s.detail_item = '?url=http://www.ebooks.com/cj.asp?IID=' + id.strip() + '&cjsku=' + id.strip() - - yield s + yield from search_ec(query, max_results, timeout) def get_details(self, search_result, timeout): - url = 'http://www.ebooks.com/ebooks/book_display.asp?IID=' - - mo = re.search(r'\?IID=(?P\d+)', search_result.detail_item) - if mo: - id = mo.group('id') - if not id: - return - - br = browser() - with closing(br.open(url + id, timeout=timeout)) as nf: - pdoc = html.fromstring(nf.read()) - - search_result.drm = SearchResult.DRM_UNLOCKED - permissions = ' '.join(pdoc.xpath('//div[@class="permissions-items"]//text()')) - if 'off' in permissions: - search_result.drm = SearchResult.DRM_LOCKED - - fdata = pdoc.xpath('//div[contains(@class, "more-links") and contains(@class, "more-links-info")]/div//span/text()') - if len(fdata) > 1: - search_result.formats = ', '.join(fdata[1:]) - + ec_details(search_result, timeout) return True + + +if __name__ == '__main__': + import sys + results = tuple(search_ec(' '.join(sys.argv[1:]), write_html_to='/t/ec.html')) + for result in results: + print(result) + ec_details(results[0], write_data_to='/t/ecd.json') + print('-'*80) + print(results[0])