GetBooks; Update Google Books plugin for website changes

2025-07-09 03:04:10 -04:00 · 2017-03-07 09:34:58 +05:30 · 2017-03-07 09:34:58 +05:30 · f086a48a4a
commit f086a48a4a
parent c4a949bfd7
1 changed files with 54 additions and 35 deletions
--- a/src/calibre/gui2/store/stores/google_books_plugin.py
+++ b/src/calibre/gui2/store/stores/google_books_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 3  # Needed for dynamic plugin loading
+store_version = 4  # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -11,9 +11,9 @@ import urllib
 from contextlib import closing
 from lxml import html
 from PyQt5.Qt import QUrl
 import html5lib
 from calibre import browser, url_slash_cleaner
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
@ -22,6 +22,49 @@ from calibre.gui2.store.search_result import SearchResult
 from calibre.gui2.store.web_store_dialog import WebStoreDialog
 def parse_html(raw):
    return html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml')
 def search_google(query, max_results=10, timeout=60, write_html_to=None):
    url = 'https://www.google.com/search?tbm=bks&q=' + urllib.quote_plus(query)
    br = browser()
    counter = max_results
    with closing(br.open(url, timeout=timeout)) as f:
        raw = f.read()
        doc = parse_html(raw)
        if write_html_to is not None:
            praw = html.tostring(doc, encoding='utf-8')
            open(write_html_to, 'wb').write(praw)
        for data in doc.xpath('//div[@id="rso"]//div[@class="g"]'):
            if counter <= 0:
                break
            id = ''.join(data.xpath('.//h3/a/@href'))
            if not id:
                continue
            title = ''.join(data.xpath('.//h3/a//text()'))
            authors = data.xpath('descendant::div[@class="s"]//a[@class="fl" and @href]//text()')
            while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'):
                authors = authors[:-1]
            if not authors:
                continue
            author = ' & '.join(authors)
            counter -= 1
            s = SearchResult()
            s.title = title.strip()
            s.author = author.strip()
            s.detail_item = id.strip()
            s.drm = SearchResult.DRM_UNKNOWN
            yield s
 class GoogleBooksStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
@ -35,43 +78,13 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
            d.exec_()
    def search(self, query, max_results=10, timeout=60):
-        url = 'https://www.google.com/search?tbm=bks&q=' + urllib.quote_plus(query)
+        for result in search_google(query, max_results=max_results, timeout=timeout):
-
+            yield result
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ol/li'):
                if counter <= 0:
                    break
                id = ''.join(data.xpath('.//h3/a/@href'))
                if not id:
                    continue
                title = ''.join(data.xpath('.//h3/a//text()'))
                authors = data.xpath('.//span[contains(@class, "f")]//a//text()')
                while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'):
                    authors = authors[:-1]
                if not authors:
                    continue
                author = ', '.join(authors)
                counter -= 1
                s = SearchResult()
                s.title = title.strip()
                s.author = author.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                yield s
    def get_details(self, search_result, timeout):
        br = browser()
        with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
-            doc = html.fromstring(nf.read())
+            doc = parse_html(nf.read())
            search_result.cover_url = ''.join(doc.xpath('//div[@class="sidebarcover"]//img/@src'))
@ -90,3 +103,9 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
                search_result.formats = _('Unknown')
        return True
 if __name__ == '__main__':
    import sys
    for result in search_google(' '.join(sys.argv[1:]), write_html_to='/t/google.html'):
        print (result)