Metadata download: Add plugin to download book covers from bigbooksearch.com

This commit is contained in:
Kovid Goyal 2013-04-06 12:05:54 +05:30
parent e7cfdba57c
commit 20d970c362
4 changed files with 62 additions and 3 deletions

View File

@ -758,8 +758,9 @@ from calibre.ebooks.metadata.sources.overdrive import OverDrive
from calibre.ebooks.metadata.sources.douban import Douban from calibre.ebooks.metadata.sources.douban import Douban
from calibre.ebooks.metadata.sources.ozon import Ozon from calibre.ebooks.metadata.sources.ozon import Ozon
from calibre.ebooks.metadata.sources.google_images import GoogleImages from calibre.ebooks.metadata.sources.google_images import GoogleImages
from calibre.ebooks.metadata.sources.big_book_search import BigBookSearch
plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon] plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon, BigBookSearch]
# }}} # }}}

View File

@ -91,7 +91,7 @@ def restore_plugin_state_to_default(plugin_or_name):
config['enabled_plugins'] = ep config['enabled_plugins'] = ep
default_disabled_plugins = set([ default_disabled_plugins = set([
'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images', 'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images', 'Big Book Search',
]) ])
def is_disabled(plugin): def is_disabled(plugin):

View File

@ -31,7 +31,7 @@ msprefs.defaults['find_first_edition_date'] = False
# Google covers are often poor quality (scans/errors) but they have high # Google covers are often poor quality (scans/errors) but they have high
# resolution, so they trump covers from better sources. So make sure they # resolution, so they trump covers from better sources. So make sure they
# are only used if no other covers are found. # are only used if no other covers are found.
msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2} msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2, 'Big Book Search':2}
def create_log(ostream=None): def create_log(ostream=None):
from calibre.utils.logging import ThreadSafeLog, FileStream from calibre.utils.logging import ThreadSafeLog, FileStream

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.metadata.sources.base import Source, Option
def get_urls(br, tokens):
from urllib import quote_plus
from mechanize import Request
from lxml import html
escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()]
q = b'+'.join(escaped)
url = 'http://bigbooksearch.com/books/'+q
br.open(url).read()
req = Request('http://bigbooksearch.com/query.php?SearchIndex=books&Keywords=%s&ItemPage=1'%q)
req.add_header('X-Requested-With', 'XMLHttpRequest')
req.add_header('Referer', url)
raw = br.open(req).read()
root = html.fromstring(raw.decode('utf-8'))
urls = [i.get('src') for i in root.xpath('//img[@src]')]
return urls
class BigBookSearch(Source):
name = 'Big Book Search'
description = _('Downloads multiple book covers from Amazon. Useful to find alternate covers.')
capabilities = frozenset(['cover'])
config_help_message = _('Configure the Big Book Search plugin')
can_get_multiple_covers = True
options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'),
_('The maximum number of covers to process from the search result')),
)
supports_gzip_transfer_encoding = True
def download_cover(self, log, result_queue, abort,
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
if not title:
return
br = self.browser
tokens = tuple(self.get_title_tokens(title)) + tuple(self.get_author_tokens(authors))
urls = get_urls(br, tokens)
self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
def test():
from calibre import browser
import pprint
br = browser()
urls = get_urls(br, ['consider', 'phlebas', 'banks'])
pprint.pprint(urls)
if __name__ == '__main__':
test()