mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Get books: Update ebooks.com plugin for website changes. Fixes #2023046 [Get Books unable to find book from ebooks.com](https://bugs.launchpad.net/calibre/+bug/2023046)
This commit is contained in:
parent
c392095ef2
commit
4ba9297343
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
store_version = 3 # Needed for dynamic plugin loading
|
||||
store_version = 4 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -9,13 +9,12 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
from contextlib import closing
|
||||
|
||||
try:
|
||||
from urllib.parse import quote_plus
|
||||
except ImportError:
|
||||
from urllib import quote_plus
|
||||
|
||||
from lxml import html
|
||||
|
||||
from qt.core import QUrl
|
||||
|
||||
from calibre import browser, url_slash_cleaner
|
||||
@ -26,6 +25,66 @@ from calibre.gui2.store.search_result import SearchResult
|
||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
||||
|
||||
|
||||
def absolutize(url):
|
||||
if url.startswith('/'):
|
||||
url = 'https://www.ebooks.com' + url
|
||||
return url
|
||||
|
||||
|
||||
def search_ec(query, max_results=10, timeout=60, write_html_to=''):
|
||||
import json
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
url = 'https://www.ebooks.com/SearchApp/SearchResults.net?term=' + quote_plus(query)
|
||||
br = browser()
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
raw = f.read()
|
||||
if write_html_to:
|
||||
with open(write_html_to, 'wb') as d:
|
||||
d.write(raw)
|
||||
api = re.search(r'data-endpoint="(/api/search/.+?)"', raw.decode('utf-8')).group(1)
|
||||
counter = max_results
|
||||
url = absolutize(api)
|
||||
cc = parse_qs(urlparse(url).query)['CountryCode'][0]
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
raw = f.read()
|
||||
if write_html_to:
|
||||
with open(write_html_to + '.json', 'wb') as d:
|
||||
d.write(raw)
|
||||
data = json.loads(raw)
|
||||
for book in data['books']:
|
||||
if counter <= 0:
|
||||
break
|
||||
counter -= 1
|
||||
s = SearchResult()
|
||||
s.cover_url = absolutize(book['image_url'])
|
||||
s.title = book['title']
|
||||
s.author = ' & '.join(x['name'] for x in book['authors'])
|
||||
s.price = book['price']
|
||||
s.detail_item = absolutize(book['book_url'])
|
||||
s.ebooks_com_api_url = 'https://www.ebooks.com/api/book/?bookId={}&countryCode={}'.format(book["id"], cc)
|
||||
s.drm = SearchResult.DRM_UNKNOWN
|
||||
yield s
|
||||
|
||||
|
||||
def ec_details(search_result, timeout=30, write_data_to=''):
|
||||
import json
|
||||
br = browser()
|
||||
with closing(br.open(search_result.ebooks_com_api_url, timeout=timeout)) as f:
|
||||
raw = f.read()
|
||||
if write_data_to:
|
||||
with open(write_data_to, 'wb') as d:
|
||||
d.write(raw)
|
||||
data = json.loads(raw)
|
||||
if 'drm' in data and 'drm_free' in data['drm']:
|
||||
search_result.drm = SearchResult.DRM_UNLOCKED if data['drm']['drm_free'] else SearchResult.DRM_LOCKED
|
||||
fmts = []
|
||||
for x in data['information']['formats']:
|
||||
x = x.split()[0]
|
||||
fmts.append(x)
|
||||
if fmts:
|
||||
search_result.formats = ', '.join(fmts).upper()
|
||||
|
||||
|
||||
class EbookscomStore(BasicStoreConfig, StorePlugin):
|
||||
|
||||
def open(self, parent=None, detail_item=None, external=False):
|
||||
@ -47,68 +106,18 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
|
||||
d.exec()
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
url = 'http://www.ebooks.com/SearchApp/SearchResults.net?term=' + quote_plus(query)
|
||||
|
||||
br = browser()
|
||||
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
doc = html.fromstring(f.read())
|
||||
for data in doc.xpath('//div[@id="results"]//li'):
|
||||
if counter <= 0:
|
||||
break
|
||||
|
||||
id = ''.join(data.xpath('.//a[1]/@href'))
|
||||
mo = re.search(r'\d+', id)
|
||||
if not mo:
|
||||
continue
|
||||
id = mo.group()
|
||||
|
||||
cover_url = ''.join(data.xpath('.//div[contains(@class, "img")]//img/@src'))
|
||||
|
||||
title = ''.join(data.xpath(
|
||||
'descendant::span[@class="book-title"]/a/text()')).strip()
|
||||
author = ', '.join(data.xpath(
|
||||
'descendant::span[@class="author"]/a/text()')).strip()
|
||||
if not title or not author:
|
||||
continue
|
||||
|
||||
price = ''.join(data.xpath(
|
||||
'.//span[starts-with(text(), "US$") or'
|
||||
' starts-with(text(), "€") or starts-with(text(), "CA$") or'
|
||||
' starts-with(text(), "AU$") or starts-with(text(), "£")]/text()')).strip()
|
||||
|
||||
counter -= 1
|
||||
|
||||
s = SearchResult()
|
||||
s.cover_url = cover_url
|
||||
s.title = title.strip()
|
||||
s.author = author.strip()
|
||||
s.price = price.strip()
|
||||
s.detail_item = '?url=http://www.ebooks.com/cj.asp?IID=' + id.strip() + '&cjsku=' + id.strip()
|
||||
|
||||
yield s
|
||||
yield from search_ec(query, max_results, timeout)
|
||||
|
||||
def get_details(self, search_result, timeout):
|
||||
url = 'http://www.ebooks.com/ebooks/book_display.asp?IID='
|
||||
|
||||
mo = re.search(r'\?IID=(?P<id>\d+)', search_result.detail_item)
|
||||
if mo:
|
||||
id = mo.group('id')
|
||||
if not id:
|
||||
return
|
||||
|
||||
br = browser()
|
||||
with closing(br.open(url + id, timeout=timeout)) as nf:
|
||||
pdoc = html.fromstring(nf.read())
|
||||
|
||||
search_result.drm = SearchResult.DRM_UNLOCKED
|
||||
permissions = ' '.join(pdoc.xpath('//div[@class="permissions-items"]//text()'))
|
||||
if 'off' in permissions:
|
||||
search_result.drm = SearchResult.DRM_LOCKED
|
||||
|
||||
fdata = pdoc.xpath('//div[contains(@class, "more-links") and contains(@class, "more-links-info")]/div//span/text()')
|
||||
if len(fdata) > 1:
|
||||
search_result.formats = ', '.join(fdata[1:])
|
||||
|
||||
ec_details(search_result, timeout)
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
results = tuple(search_ec(' '.join(sys.argv[1:]), write_html_to='/t/ec.html'))
|
||||
for result in results:
|
||||
print(result)
|
||||
ec_details(results[0], write_data_to='/t/ecd.json')
|
||||
print('-'*80)
|
||||
print(results[0])
|
||||
|
Loading…
x
Reference in New Issue
Block a user