From c98ad9a233fbe2ed61e2610f01aec7e6e3199e9e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 3 Apr 2022 08:22:11 +0530 Subject: [PATCH] Implement live loading of common amazon code --- src/calibre/gui2/store/amazon_base.py | 109 ++++++------------------ src/calibre/gui2/store/amazon_live.py | 94 ++++++++++++++++++++ src/calibre/gui2/store/search/search.py | 3 + src/calibre/gui2/ui.py | 3 + src/calibre/live.py | 6 ++ 5 files changed, 133 insertions(+), 82 deletions(-) create mode 100644 src/calibre/gui2/store/amazon_live.py diff --git a/src/calibre/gui2/store/amazon_base.py b/src/calibre/gui2/store/amazon_base.py index a08d3d40b0..281815efb2 100644 --- a/src/calibre/gui2/store/amazon_base.py +++ b/src/calibre/gui2/store/amazon_base.py @@ -2,13 +2,32 @@ # vim:fileencoding=utf-8 # License: GPL v3 Copyright: 2022, Kovid Goyal -from lxml import etree, html from qt.core import QUrl -from urllib.parse import urlencode +from threading import Lock +from time import monotonic from calibre.gui2 import open_url -from calibre.gui2.store.search_result import SearchResult -from calibre.scraper.simple import read_url + + +lock = Lock() +cached_mod = None +cached_time = -10000000 + + +def live_module(): + global cached_time, cached_mod + with lock: + now = monotonic() + if now - cached_time > 3600: + cached_mod = None + if cached_mod is None: + from calibre.live import load_module, Strategy + cached_mod = load_module('calibre.gui2.store.amazon_live', strategy=Strategy.fast) + return cached_mod + + +def get_method(name): + return getattr(live_module(), name) class AmazonStore: @@ -24,92 +43,18 @@ class AmazonStore: DRM_FREE_TEXT = 'Unlimited' FIELD_KEYWORDS = 'k' - def search_amazon(self, query, max_results=10, timeout=60, write_html_to=None): - field_keywords = self.FIELD_KEYWORDS - uquery = self.SEARCH_BASE_QUERY.copy() - uquery[field_keywords] = query - - def asbytes(x): - if isinstance(x, type('')): - x = x.encode('utf-8') - return x - uquery = {asbytes(k):asbytes(v) for k, v in uquery.items()} - url = self.SEARCH_BASE_URL + '?' + urlencode(uquery) - - counter = max_results - raw = read_url(self.scraper_storage, url, timeout=timeout) - if write_html_to is not None: - with open(write_html_to, 'w') as f: - f.write(raw) - doc = html.fromstring(raw) - for result in doc.xpath('//div[contains(@class, "s-result-list")]//div[@data-index and @data-asin]'): - kformat = ''.join(result.xpath('.//a[contains(text(), "{}")]//text()'.format(self.KINDLE_EDITION))) - # Even though we are searching digital-text only Amazon will still - # put in results for non Kindle books (author pages). So we need - # to explicitly check if the item is a Kindle book and ignore it - # if it isn't. - if 'kindle' not in kformat.lower(): - continue - asin = result.get('data-asin') - if not asin: - continue - - cover_url = ''.join(result.xpath('.//img/@src')) - title = etree.tostring(result.xpath('.//h2')[0], method='text', encoding='unicode') - adiv = result.xpath('.//div[contains(@class, "a-color-secondary")]')[0] - aparts = etree.tostring(adiv, method='text', encoding='unicode').split() - idx = aparts.index(self.BY) - author = ' '.join(aparts[idx+1:]).split('|')[0].strip() - price = '' - for span in result.xpath('.//span[contains(@class, "a-price")]/span[contains(@class, "a-offscreen")]'): - q = ''.join(span.xpath('./text()')) - if q: - price = q - break - - counter -= 1 - - s = SearchResult() - s.cover_url = cover_url.strip() - s.title = title.strip() - s.author = author.strip() - s.detail_item = asin.strip() - s.price = price.strip() - s.formats = 'Kindle' - - yield s - - def get_details_amazon(self, search_result, timeout): - url = self.DETAILS_URL + search_result.detail_item - raw = read_url(self.scraper_storage, url, timeout=timeout) - idata = html.fromstring(raw) - return self.parse_details_amazon(idata, search_result) - - def parse_details_amazon(self, idata, search_result): - if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + - self.DRM_SEARCH_TEXT + '")])'): - if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + - self.DRM_FREE_TEXT + '") and contains(b, "' + - self.DRM_SEARCH_TEXT + '")])'): - search_result.drm = SearchResult.DRM_UNLOCKED - else: - search_result.drm = SearchResult.DRM_UNKNOWN - else: - search_result.drm = SearchResult.DRM_LOCKED - return True - def open(self, parent=None, detail_item=None, external=False): - store_link = (self.DETAILS_URL + detail_item) if detail_item else self.STORE_LINK + store_link = get_method('get_store_link_amazon')(self, detail_item) open_url(QUrl(store_link)) def search(self, query, max_results=10, timeout=60): - for result in self.search_amazon(query, max_results=max_results, timeout=timeout): + for result in get_method('search_amazon')(self, query, max_results=max_results, timeout=timeout): yield result def get_details(self, search_result, timeout): - return self.get_details_amazon(search_result, timeout) + return get_method('get_details_amazon')(self, search_result, timeout) def develop_plugin(self): import sys - for result in self.search_amazon(' '.join(sys.argv[1:]), write_html_to='/t/amazon.html'): + for result in get_method('search_amazon')(self, ' '.join(sys.argv[1:]), write_html_to='/t/amazon.html'): print(result) diff --git a/src/calibre/gui2/store/amazon_live.py b/src/calibre/gui2/store/amazon_live.py new file mode 100644 index 0000000000..848e21eae9 --- /dev/null +++ b/src/calibre/gui2/store/amazon_live.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2022, Kovid Goyal + + +from lxml import etree, html +from urllib.parse import urlencode + +from calibre.scraper.simple import read_url +from calibre.gui2.store.search_result import SearchResult + + +module_version = 1 # needed for live updates + + +def search_amazon(self, query, max_results=10, timeout=60, write_html_to=None): + field_keywords = self.FIELD_KEYWORDS + uquery = self.SEARCH_BASE_QUERY.copy() + uquery[field_keywords] = query + + def asbytes(x): + if isinstance(x, type('')): + x = x.encode('utf-8') + return x + uquery = {asbytes(k):asbytes(v) for k, v in uquery.items()} + url = self.SEARCH_BASE_URL + '?' + urlencode(uquery) + + counter = max_results + raw = read_url(self.scraper_storage, url, timeout=timeout) + if write_html_to is not None: + with open(write_html_to, 'w') as f: + f.write(raw) + doc = html.fromstring(raw) + for result in doc.xpath('//div[contains(@class, "s-result-list")]//div[@data-index and @data-asin]'): + kformat = ''.join(result.xpath('.//a[contains(text(), "{}")]//text()'.format(self.KINDLE_EDITION))) + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). So we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + if 'kindle' not in kformat.lower(): + continue + asin = result.get('data-asin') + if not asin: + continue + + cover_url = ''.join(result.xpath('.//img/@src')) + title = etree.tostring(result.xpath('.//h2')[0], method='text', encoding='unicode') + adiv = result.xpath('.//div[contains(@class, "a-color-secondary")]')[0] + aparts = etree.tostring(adiv, method='text', encoding='unicode').split() + idx = aparts.index(self.BY) + author = ' '.join(aparts[idx+1:]).split('|')[0].strip() + price = '' + for span in result.xpath('.//span[contains(@class, "a-price")]/span[contains(@class, "a-offscreen")]'): + q = ''.join(span.xpath('./text()')) + if q: + price = q + break + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.detail_item = asin.strip() + s.price = price.strip() + s.formats = 'Kindle' + + yield s + + +def parse_details_amazon(self, idata, search_result): + if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + + self.DRM_SEARCH_TEXT + '")])'): + if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + + self.DRM_FREE_TEXT + '") and contains(b, "' + + self.DRM_SEARCH_TEXT + '")])'): + search_result.drm = SearchResult.DRM_UNLOCKED + else: + search_result.drm = SearchResult.DRM_UNKNOWN + else: + search_result.drm = SearchResult.DRM_LOCKED + return True + + +def get_details_amazon(self, search_result, timeout): + url = self.DETAILS_URL + search_result.detail_item + raw = read_url(self.scraper_storage, url, timeout=timeout) + idata = html.fromstring(raw) + return parse_details_amazon(self, idata, search_result) + + +def get_store_link_amazon(self, detail_item): + return (self.DETAILS_URL + detail_item) if detail_item else self.STORE_LINK diff --git a/src/calibre/gui2/store/search/search.py b/src/calibre/gui2/store/search/search.py index 822e78393e..b200ba2a0c 100644 --- a/src/calibre/gui2/store/search/search.py +++ b/src/calibre/gui2/store/search/search.py @@ -50,6 +50,9 @@ class SearchDialog(QDialog, Ui_Dialog): self.results_view.model().cover_pool.set_thread_count(self.cover_thread_count) self.results_view.model().details_pool.set_thread_count(self.details_thread_count) self.results_view.setCursor(Qt.CursorShape.PointingHandCursor) + # needed for live updates of amazon_live.py + from calibre.live import start_worker + start_worker() # Check for results and hung threads. self.checker = QTimer() diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index a0b90fe8cb..dd99108402 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -1187,9 +1187,12 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ wait_for_cleanup = cleanup_overseers() from calibre.db.delete_service import shutdown shutdown() + from calibre.live import async_stop_worker + wait_for_stop = async_stop_worker() time.sleep(2) self.istores.join() wait_for_cleanup() + wait_for_stop() return True def run_wizard(self, *args): diff --git a/src/calibre/live.py b/src/calibre/live.py index 9dd6c6510b..02aa05586e 100644 --- a/src/calibre/live.py +++ b/src/calibre/live.py @@ -58,6 +58,12 @@ def stop_worker(timeout=2*DEFAULT_TIMEOUT): w.join(timeout) +def async_stop_worker(): + t = Thread(name='StopLiveDownloadWorker', target=stop_worker, daemon=True) + t.start() + return t.join + + def report_failure(full_name): print(f'Failed to download live module {full_name}', file=sys.stderr) import traceback