Get books: Update ebooks.com plugin for website changes. Fixes #2023046 [Get Books unable to find book from ebooks.com](https://bugs.launchpad.net/calibre/+bug/2023046)

This commit is contained in:
Kovid Goyal 2023-08-25 11:54:24 +05:30
parent c392095ef2
commit 4ba9297343
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
store_version = 3 # Needed for dynamic plugin loading
store_version = 4 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -9,13 +9,12 @@ __docformat__ = 'restructuredtext en'
import re
from contextlib import closing
try:
from urllib.parse import quote_plus
except ImportError:
from urllib import quote_plus
from lxml import html
from qt.core import QUrl
from calibre import browser, url_slash_cleaner
@ -26,6 +25,66 @@ from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
def absolutize(url):
if url.startswith('/'):
url = 'https://www.ebooks.com' + url
return url
def search_ec(query, max_results=10, timeout=60, write_html_to=''):
import json
from urllib.parse import parse_qs, urlparse
url = 'https://www.ebooks.com/SearchApp/SearchResults.net?term=' + quote_plus(query)
br = browser()
with closing(br.open(url, timeout=timeout)) as f:
raw = f.read()
if write_html_to:
with open(write_html_to, 'wb') as d:
d.write(raw)
api = re.search(r'data-endpoint="(/api/search/.+?)"', raw.decode('utf-8')).group(1)
counter = max_results
url = absolutize(api)
cc = parse_qs(urlparse(url).query)['CountryCode'][0]
with closing(br.open(url, timeout=timeout)) as f:
raw = f.read()
if write_html_to:
with open(write_html_to + '.json', 'wb') as d:
d.write(raw)
data = json.loads(raw)
for book in data['books']:
if counter <= 0:
break
counter -= 1
s = SearchResult()
s.cover_url = absolutize(book['image_url'])
s.title = book['title']
s.author = ' & '.join(x['name'] for x in book['authors'])
s.price = book['price']
s.detail_item = absolutize(book['book_url'])
s.ebooks_com_api_url = 'https://www.ebooks.com/api/book/?bookId={}&countryCode={}'.format(book["id"], cc)
s.drm = SearchResult.DRM_UNKNOWN
yield s
def ec_details(search_result, timeout=30, write_data_to=''):
import json
br = browser()
with closing(br.open(search_result.ebooks_com_api_url, timeout=timeout)) as f:
raw = f.read()
if write_data_to:
with open(write_data_to, 'wb') as d:
d.write(raw)
data = json.loads(raw)
if 'drm' in data and 'drm_free' in data['drm']:
search_result.drm = SearchResult.DRM_UNLOCKED if data['drm']['drm_free'] else SearchResult.DRM_LOCKED
fmts = []
for x in data['information']['formats']:
x = x.split()[0]
fmts.append(x)
if fmts:
search_result.formats = ', '.join(fmts).upper()
class EbookscomStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
@ -47,68 +106,18 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
d.exec()
def search(self, query, max_results=10, timeout=60):
url = 'http://www.ebooks.com/SearchApp/SearchResults.net?term=' + quote_plus(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@id="results"]//li'):
if counter <= 0:
break
id = ''.join(data.xpath('.//a[1]/@href'))
mo = re.search(r'\d+', id)
if not mo:
continue
id = mo.group()
cover_url = ''.join(data.xpath('.//div[contains(@class, "img")]//img/@src'))
title = ''.join(data.xpath(
'descendant::span[@class="book-title"]/a/text()')).strip()
author = ', '.join(data.xpath(
'descendant::span[@class="author"]/a/text()')).strip()
if not title or not author:
continue
price = ''.join(data.xpath(
'.//span[starts-with(text(), "US$") or'
' starts-with(text(), "") or starts-with(text(), "CA$") or'
' starts-with(text(), "AU$") or starts-with(text(), "£")]/text()')).strip()
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = '?url=http://www.ebooks.com/cj.asp?IID=' + id.strip() + '&cjsku=' + id.strip()
yield s
yield from search_ec(query, max_results, timeout)
def get_details(self, search_result, timeout):
url = 'http://www.ebooks.com/ebooks/book_display.asp?IID='
mo = re.search(r'\?IID=(?P<id>\d+)', search_result.detail_item)
if mo:
id = mo.group('id')
if not id:
return
br = browser()
with closing(br.open(url + id, timeout=timeout)) as nf:
pdoc = html.fromstring(nf.read())
search_result.drm = SearchResult.DRM_UNLOCKED
permissions = ' '.join(pdoc.xpath('//div[@class="permissions-items"]//text()'))
if 'off' in permissions:
search_result.drm = SearchResult.DRM_LOCKED
fdata = pdoc.xpath('//div[contains(@class, "more-links") and contains(@class, "more-links-info")]/div//span/text()')
if len(fdata) > 1:
search_result.formats = ', '.join(fdata[1:])
ec_details(search_result, timeout)
return True
if __name__ == '__main__':
import sys
results = tuple(search_ec(' '.join(sys.argv[1:]), write_html_to='/t/ec.html'))
for result in results:
print(result)
ec_details(results[0], write_data_to='/t/ecd.json')
print('-'*80)
print(results[0])