From f086a48a4a105a2e46b9a4c3786a0d7b0f53448e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 7 Mar 2017 09:34:58 +0530
Subject: [PATCH] GetBooks; Update Google Books plugin for website changes

---
 .../gui2/store/stores/google_books_plugin.py  | 89 +++++++++++--------
 1 file changed, 54 insertions(+), 35 deletions(-)

diff --git a/src/calibre/gui2/store/stores/google_books_plugin.py b/src/calibre/gui2/store/stores/google_books_plugin.py
index c16fca163f..d225a84e8e 100644
--- a/src/calibre/gui2/store/stores/google_books_plugin.py
+++ b/src/calibre/gui2/store/stores/google_books_plugin.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 3  # Needed for dynamic plugin loading
+store_version = 4  # Needed for dynamic plugin loading
 
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@@ -11,9 +11,9 @@ import urllib
 from contextlib import closing
 
 from lxml import html
-
 from PyQt5.Qt import QUrl
 
+import html5lib
 from calibre import browser, url_slash_cleaner
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
@@ -22,6 +22,49 @@ from calibre.gui2.store.search_result import SearchResult
 from calibre.gui2.store.web_store_dialog import WebStoreDialog
 
 
+def parse_html(raw):
+    return html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml')
+
+
+def search_google(query, max_results=10, timeout=60, write_html_to=None):
+    url = 'https://www.google.com/search?tbm=bks&q=' + urllib.quote_plus(query)
+
+    br = browser()
+
+    counter = max_results
+    with closing(br.open(url, timeout=timeout)) as f:
+        raw = f.read()
+        doc = parse_html(raw)
+        if write_html_to is not None:
+            praw = html.tostring(doc, encoding='utf-8')
+            open(write_html_to, 'wb').write(praw)
+        for data in doc.xpath('//div[@id="rso"]//div[@class="g"]'):
+            if counter <= 0:
+                break
+
+            id = ''.join(data.xpath('.//h3/a/@href'))
+            if not id:
+                continue
+
+            title = ''.join(data.xpath('.//h3/a//text()'))
+            authors = data.xpath('descendant::div[@class="s"]//a[@class="fl" and @href]//text()')
+            while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'):
+                authors = authors[:-1]
+            if not authors:
+                continue
+            author = ' & '.join(authors)
+
+            counter -= 1
+
+            s = SearchResult()
+            s.title = title.strip()
+            s.author = author.strip()
+            s.detail_item = id.strip()
+            s.drm = SearchResult.DRM_UNKNOWN
+
+            yield s
+
+
 class GoogleBooksStore(BasicStoreConfig, StorePlugin):
 
     def open(self, parent=None, detail_item=None, external=False):
@@ -35,43 +78,13 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
             d.exec_()
 
     def search(self, query, max_results=10, timeout=60):
-        url = 'https://www.google.com/search?tbm=bks&q=' + urllib.quote_plus(query)
-
-        br = browser()
-
-        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
-            for data in doc.xpath('//ol/li'):
-                if counter <= 0:
-                    break
-
-                id = ''.join(data.xpath('.//h3/a/@href'))
-                if not id:
-                    continue
-
-                title = ''.join(data.xpath('.//h3/a//text()'))
-                authors = data.xpath('.//span[contains(@class, "f")]//a//text()')
-                while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'):
-                    authors = authors[:-1]
-                if not authors:
-                    continue
-                author = ', '.join(authors)
-
-                counter -= 1
-
-                s = SearchResult()
-                s.title = title.strip()
-                s.author = author.strip()
-                s.detail_item = id.strip()
-                s.drm = SearchResult.DRM_UNKNOWN
-
-                yield s
+        for result in search_google(query, max_results=max_results, timeout=timeout):
+            yield result
 
     def get_details(self, search_result, timeout):
         br = browser()
         with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
-            doc = html.fromstring(nf.read())
+            doc = parse_html(nf.read())
 
             search_result.cover_url = ''.join(doc.xpath('//div[@class="sidebarcover"]//img/@src'))
 
@@ -90,3 +103,9 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
                 search_result.formats = _('Unknown')
 
         return True
+
+
+if __name__ == '__main__':
+    import sys
+    for result in search_google(' '.join(sys.argv[1:]), write_html_to='/t/google.html'):
+        print (result)