Metadata download: Add plugin to download book covers from bigbooksearch.com

2025-08-30 23:00:21 -04:00 · 2013-04-06 12:05:54 +05:30 · 2013-04-06 12:05:54 +05:30 · 20d970c362
commit 20d970c362
parent e7cfdba57c
4 changed files with 62 additions and 3 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -758,8 +758,9 @@ from calibre.ebooks.metadata.sources.overdrive import OverDrive
 from calibre.ebooks.metadata.sources.douban import Douban
 from calibre.ebooks.metadata.sources.ozon import Ozon
 from calibre.ebooks.metadata.sources.google_images import GoogleImages
 from calibre.ebooks.metadata.sources.big_book_search import BigBookSearch
-plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
+plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon, BigBookSearch]
 # }}}
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -91,7 +91,7 @@ def restore_plugin_state_to_default(plugin_or_name):
    config['enabled_plugins'] = ep
 default_disabled_plugins = set([
-    'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images',
+    'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images', 'Big Book Search',
 ])
 def is_disabled(plugin):
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -31,7 +31,7 @@ msprefs.defaults['find_first_edition_date'] = False
 # Google covers are often poor quality (scans/errors) but they have high
 # resolution, so they trump covers from better sources. So make sure they
 # are only used if no other covers are found.
-msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2}
+msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2, 'Big Book Search':2}
 def create_log(ostream=None):
    from calibre.utils.logging import ThreadSafeLog, FileStream
--- a/src/calibre/ebooks/metadata/sources/big_book_search.py
+++ b/src/calibre/ebooks/metadata/sources/big_book_search.py
@ -0,0 +1,58 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.metadata.sources.base import Source, Option
 def get_urls(br, tokens):
    from urllib import quote_plus
    from mechanize import Request
    from lxml import html
    escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()]
    q = b'+'.join(escaped)
    url = 'http://bigbooksearch.com/books/'+q
    br.open(url).read()
    req = Request('http://bigbooksearch.com/query.php?SearchIndex=books&Keywords=%s&ItemPage=1'%q)
    req.add_header('X-Requested-With', 'XMLHttpRequest')
    req.add_header('Referer', url)
    raw = br.open(req).read()
    root = html.fromstring(raw.decode('utf-8'))
    urls = [i.get('src') for i in root.xpath('//img[@src]')]
    return urls
 class BigBookSearch(Source):
    name = 'Big Book Search'
    description = _('Downloads multiple book covers from Amazon. Useful to find alternate covers.')
    capabilities = frozenset(['cover'])
    config_help_message = _('Configure the Big Book Search plugin')
    can_get_multiple_covers = True
    options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'),
                      _('The maximum number of covers to process from the search result')),
    )
    supports_gzip_transfer_encoding = True
    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
        if not title:
            return
        br = self.browser
        tokens = tuple(self.get_title_tokens(title)) + tuple(self.get_author_tokens(authors))
        urls = get_urls(br, tokens)
        self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
 def test():
    from calibre import browser
    import pprint
    br = browser()
    urls = get_urls(br, ['consider', 'phlebas', 'banks'])
    pprint.pprint(urls)
 if __name__ == '__main__':
    test()