GetBooks; Update Google Books plugin for website changes

This commit is contained in:
Kovid Goyal 2017-03-07 09:34:58 +05:30
parent c4a949bfd7
commit f086a48a4a

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 3 # Needed for dynamic plugin loading
store_version = 4 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -11,9 +11,9 @@ import urllib
from contextlib import closing
from lxml import html
from PyQt5.Qt import QUrl
import html5lib
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
@ -22,6 +22,49 @@ from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
def parse_html(raw):
return html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml')
def search_google(query, max_results=10, timeout=60, write_html_to=None):
url = 'https://www.google.com/search?tbm=bks&q=' + urllib.quote_plus(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
raw = f.read()
doc = parse_html(raw)
if write_html_to is not None:
praw = html.tostring(doc, encoding='utf-8')
open(write_html_to, 'wb').write(praw)
for data in doc.xpath('//div[@id="rso"]//div[@class="g"]'):
if counter <= 0:
break
id = ''.join(data.xpath('.//h3/a/@href'))
if not id:
continue
title = ''.join(data.xpath('.//h3/a//text()'))
authors = data.xpath('descendant::div[@class="s"]//a[@class="fl" and @href]//text()')
while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'):
authors = authors[:-1]
if not authors:
continue
author = ' & '.join(authors)
counter -= 1
s = SearchResult()
s.title = title.strip()
s.author = author.strip()
s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNKNOWN
yield s
class GoogleBooksStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
@ -35,43 +78,13 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'https://www.google.com/search?tbm=bks&q=' + urllib.quote_plus(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//ol/li'):
if counter <= 0:
break
id = ''.join(data.xpath('.//h3/a/@href'))
if not id:
continue
title = ''.join(data.xpath('.//h3/a//text()'))
authors = data.xpath('.//span[contains(@class, "f")]//a//text()')
while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'):
authors = authors[:-1]
if not authors:
continue
author = ', '.join(authors)
counter -= 1
s = SearchResult()
s.title = title.strip()
s.author = author.strip()
s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNKNOWN
yield s
for result in search_google(query, max_results=max_results, timeout=timeout):
yield result
def get_details(self, search_result, timeout):
br = browser()
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
doc = html.fromstring(nf.read())
doc = parse_html(nf.read())
search_result.cover_url = ''.join(doc.xpath('//div[@class="sidebarcover"]//img/@src'))
@ -90,3 +103,9 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
search_result.formats = _('Unknown')
return True
if __name__ == '__main__':
import sys
for result in search_google(' '.join(sys.argv[1:]), write_html_to='/t/google.html'):
print (result)