Implement live loading of common amazon code

2025-08-11 09:13:57 -04:00 · 2022-04-03 08:22:11 +05:30 · 2022-04-03 08:22:11 +05:30 · c98ad9a233
commit c98ad9a233
parent 61b24aef56
5 changed files with 133 additions and 82 deletions
--- a/src/calibre/gui2/store/amazon_base.py
+++ b/src/calibre/gui2/store/amazon_base.py
@ -2,13 +2,32 @@
 # vim:fileencoding=utf-8
 # License: GPL v3 Copyright: 2022, Kovid Goyal <kovid at kovidgoyal.net>

-from lxml import etree, html
 from qt.core import QUrl
-from urllib.parse import urlencode
+from threading import Lock
+from time import monotonic

 from calibre.gui2 import open_url
-from calibre.gui2.store.search_result import SearchResult
-from calibre.scraper.simple import read_url
+
+
+lock = Lock()
+cached_mod = None
+cached_time = -10000000
+
+
+def live_module():
+    global cached_time, cached_mod
+    with lock:
+        now = monotonic()
+        if now - cached_time > 3600:
+            cached_mod = None
+        if cached_mod is None:
+            from calibre.live import load_module, Strategy
+            cached_mod = load_module('calibre.gui2.store.amazon_live', strategy=Strategy.fast)
+        return cached_mod
+
+
+def get_method(name):
+    return getattr(live_module(), name)


 class AmazonStore:
@ -24,92 +43,18 @@ class AmazonStore:
    DRM_FREE_TEXT = 'Unlimited'
    FIELD_KEYWORDS = 'k'

-    def search_amazon(self, query, max_results=10, timeout=60, write_html_to=None):
-        field_keywords = self.FIELD_KEYWORDS
-        uquery = self.SEARCH_BASE_QUERY.copy()
-        uquery[field_keywords] = query
-
-        def asbytes(x):
-            if isinstance(x, type('')):
-                x = x.encode('utf-8')
-            return x
-        uquery = {asbytes(k):asbytes(v) for k, v in uquery.items()}
-        url = self.SEARCH_BASE_URL + '?' + urlencode(uquery)
-
-        counter = max_results
-        raw = read_url(self.scraper_storage, url, timeout=timeout)
-        if write_html_to is not None:
-            with open(write_html_to, 'w') as f:
-                f.write(raw)
-        doc = html.fromstring(raw)
-        for result in doc.xpath('//div[contains(@class, "s-result-list")]//div[@data-index and @data-asin]'):
-            kformat = ''.join(result.xpath('.//a[contains(text(), "{}")]//text()'.format(self.KINDLE_EDITION)))
-            # Even though we are searching digital-text only Amazon will still
-            # put in results for non Kindle books (author pages). So we need
-            # to explicitly check if the item is a Kindle book and ignore it
-            # if it isn't.
-            if 'kindle' not in kformat.lower():
-                continue
-            asin = result.get('data-asin')
-            if not asin:
-                continue
-
-            cover_url = ''.join(result.xpath('.//img/@src'))
-            title = etree.tostring(result.xpath('.//h2')[0], method='text', encoding='unicode')
-            adiv = result.xpath('.//div[contains(@class, "a-color-secondary")]')[0]
-            aparts = etree.tostring(adiv, method='text', encoding='unicode').split()
-            idx = aparts.index(self.BY)
-            author = ' '.join(aparts[idx+1:]).split('|')[0].strip()
-            price = ''
-            for span in result.xpath('.//span[contains(@class, "a-price")]/span[contains(@class, "a-offscreen")]'):
-                q = ''.join(span.xpath('./text()'))
-                if q:
-                    price = q
-                    break
-
-            counter -= 1
-
-            s = SearchResult()
-            s.cover_url = cover_url.strip()
-            s.title = title.strip()
-            s.author = author.strip()
-            s.detail_item = asin.strip()
-            s.price = price.strip()
-            s.formats = 'Kindle'
-
-            yield s
-
-    def get_details_amazon(self, search_result, timeout):
-        url = self.DETAILS_URL + search_result.detail_item
-        raw = read_url(self.scraper_storage, url, timeout=timeout)
-        idata = html.fromstring(raw)
-        return self.parse_details_amazon(idata, search_result)
-
-    def parse_details_amazon(self, idata, search_result):
-        if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
-                        self.DRM_SEARCH_TEXT + '")])'):
-            if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
-                            self.DRM_FREE_TEXT + '") and contains(b, "' +
-                            self.DRM_SEARCH_TEXT + '")])'):
-                search_result.drm = SearchResult.DRM_UNLOCKED
-            else:
-                search_result.drm = SearchResult.DRM_UNKNOWN
-        else:
-            search_result.drm = SearchResult.DRM_LOCKED
-        return True
-
    def open(self, parent=None, detail_item=None, external=False):
-        store_link = (self.DETAILS_URL + detail_item) if detail_item else self.STORE_LINK
+        store_link = get_method('get_store_link_amazon')(self, detail_item)
        open_url(QUrl(store_link))

    def search(self, query, max_results=10, timeout=60):
-        for result in self.search_amazon(query, max_results=max_results, timeout=timeout):
+        for result in get_method('search_amazon')(self, query, max_results=max_results, timeout=timeout):
            yield result

    def get_details(self, search_result, timeout):
-        return self.get_details_amazon(search_result, timeout)
+        return get_method('get_details_amazon')(self, search_result, timeout)

    def develop_plugin(self):
        import sys
-        for result in self.search_amazon(' '.join(sys.argv[1:]), write_html_to='/t/amazon.html'):
+        for result in get_method('search_amazon')(self, ' '.join(sys.argv[1:]), write_html_to='/t/amazon.html'):
            print(result)
--- a/src/calibre/gui2/store/amazon_live.py
+++ b/src/calibre/gui2/store/amazon_live.py
@ -0,0 +1,94 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+# License: GPL v3 Copyright: 2022, Kovid Goyal <kovid at kovidgoyal.net>
+
+
+from lxml import etree, html
+from urllib.parse import urlencode
+
+from calibre.scraper.simple import read_url
+from calibre.gui2.store.search_result import SearchResult
+
+
+module_version = 1  # needed for live updates
+
+
+def search_amazon(self, query, max_results=10, timeout=60, write_html_to=None):
+    field_keywords = self.FIELD_KEYWORDS
+    uquery = self.SEARCH_BASE_QUERY.copy()
+    uquery[field_keywords] = query
+
+    def asbytes(x):
+        if isinstance(x, type('')):
+            x = x.encode('utf-8')
+        return x
+    uquery = {asbytes(k):asbytes(v) for k, v in uquery.items()}
+    url = self.SEARCH_BASE_URL + '?' + urlencode(uquery)
+
+    counter = max_results
+    raw = read_url(self.scraper_storage, url, timeout=timeout)
+    if write_html_to is not None:
+        with open(write_html_to, 'w') as f:
+            f.write(raw)
+    doc = html.fromstring(raw)
+    for result in doc.xpath('//div[contains(@class, "s-result-list")]//div[@data-index and @data-asin]'):
+        kformat = ''.join(result.xpath('.//a[contains(text(), "{}")]//text()'.format(self.KINDLE_EDITION)))
+        # Even though we are searching digital-text only Amazon will still
+        # put in results for non Kindle books (author pages). So we need
+        # to explicitly check if the item is a Kindle book and ignore it
+        # if it isn't.
+        if 'kindle' not in kformat.lower():
+            continue
+        asin = result.get('data-asin')
+        if not asin:
+            continue
+
+        cover_url = ''.join(result.xpath('.//img/@src'))
+        title = etree.tostring(result.xpath('.//h2')[0], method='text', encoding='unicode')
+        adiv = result.xpath('.//div[contains(@class, "a-color-secondary")]')[0]
+        aparts = etree.tostring(adiv, method='text', encoding='unicode').split()
+        idx = aparts.index(self.BY)
+        author = ' '.join(aparts[idx+1:]).split('|')[0].strip()
+        price = ''
+        for span in result.xpath('.//span[contains(@class, "a-price")]/span[contains(@class, "a-offscreen")]'):
+            q = ''.join(span.xpath('./text()'))
+            if q:
+                price = q
+                break
+
+        counter -= 1
+
+        s = SearchResult()
+        s.cover_url = cover_url.strip()
+        s.title = title.strip()
+        s.author = author.strip()
+        s.detail_item = asin.strip()
+        s.price = price.strip()
+        s.formats = 'Kindle'
+
+        yield s
+
+
+def parse_details_amazon(self, idata, search_result):
+    if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
+                    self.DRM_SEARCH_TEXT + '")])'):
+        if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
+                        self.DRM_FREE_TEXT + '") and contains(b, "' +
+                        self.DRM_SEARCH_TEXT + '")])'):
+            search_result.drm = SearchResult.DRM_UNLOCKED
+        else:
+            search_result.drm = SearchResult.DRM_UNKNOWN
+    else:
+        search_result.drm = SearchResult.DRM_LOCKED
+    return True
+
+
+def get_details_amazon(self, search_result, timeout):
+    url = self.DETAILS_URL + search_result.detail_item
+    raw = read_url(self.scraper_storage, url, timeout=timeout)
+    idata = html.fromstring(raw)
+    return parse_details_amazon(self, idata, search_result)
+
+
+def get_store_link_amazon(self, detail_item):
+    return (self.DETAILS_URL + detail_item) if detail_item else self.STORE_LINK
--- a/src/calibre/gui2/store/search/search.py
+++ b/src/calibre/gui2/store/search/search.py
@ -50,6 +50,9 @@ class SearchDialog(QDialog, Ui_Dialog):
        self.results_view.model().cover_pool.set_thread_count(self.cover_thread_count)
        self.results_view.model().details_pool.set_thread_count(self.details_thread_count)
        self.results_view.setCursor(Qt.CursorShape.PointingHandCursor)
+        # needed for live updates of amazon_live.py
+        from calibre.live import start_worker
+        start_worker()

        # Check for results and hung threads.
        self.checker = QTimer()
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -1187,9 +1187,12 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin,  # {{{
        wait_for_cleanup = cleanup_overseers()
        from calibre.db.delete_service import shutdown
        shutdown()
+        from calibre.live import async_stop_worker
+        wait_for_stop = async_stop_worker()
        time.sleep(2)
        self.istores.join()
        wait_for_cleanup()
+        wait_for_stop()
        return True

    def run_wizard(self, *args):
--- a/src/calibre/live.py
+++ b/src/calibre/live.py
@ -58,6 +58,12 @@ def stop_worker(timeout=2*DEFAULT_TIMEOUT):
            w.join(timeout)


+def async_stop_worker():
+    t = Thread(name='StopLiveDownloadWorker', target=stop_worker, daemon=True)
+    t.start()
+    return t.join
+
+
 def report_failure(full_name):
    print(f'Failed to download live module {full_name}', file=sys.stderr)
    import traceback