mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
GetBooks; Update Google Books plugin for website changes
This commit is contained in:
parent
c4a949bfd7
commit
f086a48a4a
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 3 # Needed for dynamic plugin loading
|
store_version = 4 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
@ -11,9 +11,9 @@ import urllib
|
|||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
from PyQt5.Qt import QUrl
|
from PyQt5.Qt import QUrl
|
||||||
|
|
||||||
|
import html5lib
|
||||||
from calibre import browser, url_slash_cleaner
|
from calibre import browser, url_slash_cleaner
|
||||||
from calibre.gui2 import open_url
|
from calibre.gui2 import open_url
|
||||||
from calibre.gui2.store import StorePlugin
|
from calibre.gui2.store import StorePlugin
|
||||||
@ -22,6 +22,49 @@ from calibre.gui2.store.search_result import SearchResult
|
|||||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
||||||
|
|
||||||
|
|
||||||
|
def parse_html(raw):
|
||||||
|
return html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml')
|
||||||
|
|
||||||
|
|
||||||
|
def search_google(query, max_results=10, timeout=60, write_html_to=None):
|
||||||
|
url = 'https://www.google.com/search?tbm=bks&q=' + urllib.quote_plus(query)
|
||||||
|
|
||||||
|
br = browser()
|
||||||
|
|
||||||
|
counter = max_results
|
||||||
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
|
raw = f.read()
|
||||||
|
doc = parse_html(raw)
|
||||||
|
if write_html_to is not None:
|
||||||
|
praw = html.tostring(doc, encoding='utf-8')
|
||||||
|
open(write_html_to, 'wb').write(praw)
|
||||||
|
for data in doc.xpath('//div[@id="rso"]//div[@class="g"]'):
|
||||||
|
if counter <= 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
id = ''.join(data.xpath('.//h3/a/@href'))
|
||||||
|
if not id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = ''.join(data.xpath('.//h3/a//text()'))
|
||||||
|
authors = data.xpath('descendant::div[@class="s"]//a[@class="fl" and @href]//text()')
|
||||||
|
while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'):
|
||||||
|
authors = authors[:-1]
|
||||||
|
if not authors:
|
||||||
|
continue
|
||||||
|
author = ' & '.join(authors)
|
||||||
|
|
||||||
|
counter -= 1
|
||||||
|
|
||||||
|
s = SearchResult()
|
||||||
|
s.title = title.strip()
|
||||||
|
s.author = author.strip()
|
||||||
|
s.detail_item = id.strip()
|
||||||
|
s.drm = SearchResult.DRM_UNKNOWN
|
||||||
|
|
||||||
|
yield s
|
||||||
|
|
||||||
|
|
||||||
class GoogleBooksStore(BasicStoreConfig, StorePlugin):
|
class GoogleBooksStore(BasicStoreConfig, StorePlugin):
|
||||||
|
|
||||||
def open(self, parent=None, detail_item=None, external=False):
|
def open(self, parent=None, detail_item=None, external=False):
|
||||||
@ -35,43 +78,13 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
|
|||||||
d.exec_()
|
d.exec_()
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = 'https://www.google.com/search?tbm=bks&q=' + urllib.quote_plus(query)
|
for result in search_google(query, max_results=max_results, timeout=timeout):
|
||||||
|
yield result
|
||||||
br = browser()
|
|
||||||
|
|
||||||
counter = max_results
|
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
|
||||||
doc = html.fromstring(f.read())
|
|
||||||
for data in doc.xpath('//ol/li'):
|
|
||||||
if counter <= 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
id = ''.join(data.xpath('.//h3/a/@href'))
|
|
||||||
if not id:
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//h3/a//text()'))
|
|
||||||
authors = data.xpath('.//span[contains(@class, "f")]//a//text()')
|
|
||||||
while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'):
|
|
||||||
authors = authors[:-1]
|
|
||||||
if not authors:
|
|
||||||
continue
|
|
||||||
author = ', '.join(authors)
|
|
||||||
|
|
||||||
counter -= 1
|
|
||||||
|
|
||||||
s = SearchResult()
|
|
||||||
s.title = title.strip()
|
|
||||||
s.author = author.strip()
|
|
||||||
s.detail_item = id.strip()
|
|
||||||
s.drm = SearchResult.DRM_UNKNOWN
|
|
||||||
|
|
||||||
yield s
|
|
||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
def get_details(self, search_result, timeout):
|
||||||
br = browser()
|
br = browser()
|
||||||
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
|
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
|
||||||
doc = html.fromstring(nf.read())
|
doc = parse_html(nf.read())
|
||||||
|
|
||||||
search_result.cover_url = ''.join(doc.xpath('//div[@class="sidebarcover"]//img/@src'))
|
search_result.cover_url = ''.join(doc.xpath('//div[@class="sidebarcover"]//img/@src'))
|
||||||
|
|
||||||
@ -90,3 +103,9 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
|
|||||||
search_result.formats = _('Unknown')
|
search_result.formats = _('Unknown')
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
for result in search_google(' '.join(sys.argv[1:]), write_html_to='/t/google.html'):
|
||||||
|
print (result)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user