Metadata download: Add plugin to download book covers from bigbooksearch.com

2025-08-30 23:00:21 -04:00 · 2013-04-06 12:05:54 +05:30 · 2013-04-06 12:05:54 +05:30 · 20d970c362
commit 20d970c362
parent e7cfdba57c
4 changed files with 62 additions and 3 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -758,8 +758,9 @@ from calibre.ebooks.metadata.sources.overdrive import OverDrive
 from calibre.ebooks.metadata.sources.douban import Douban
 from calibre.ebooks.metadata.sources.ozon import Ozon
 from calibre.ebooks.metadata.sources.google_images import GoogleImages
+from calibre.ebooks.metadata.sources.big_book_search import BigBookSearch

-plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
+plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon, BigBookSearch]

 # }}}

--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -91,7 +91,7 @@ def restore_plugin_state_to_default(plugin_or_name):
    config['enabled_plugins'] = ep

 default_disabled_plugins = set([
-    'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images',
+    'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images', 'Big Book Search',
 ])

 def is_disabled(plugin):
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -31,7 +31,7 @@ msprefs.defaults['find_first_edition_date'] = False
 # Google covers are often poor quality (scans/errors) but they have high
 # resolution, so they trump covers from better sources. So make sure they
 # are only used if no other covers are found.
-msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2}
+msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2, 'Big Book Search':2}

 def create_log(ostream=None):
    from calibre.utils.logging import ThreadSafeLog, FileStream
--- a/src/calibre/ebooks/metadata/sources/big_book_search.py
+++ b/src/calibre/ebooks/metadata/sources/big_book_search.py
@ -0,0 +1,58 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.ebooks.metadata.sources.base import Source, Option
+
+def get_urls(br, tokens):
+    from urllib import quote_plus
+    from mechanize import Request
+    from lxml import html
+    escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()]
+    q = b'+'.join(escaped)
+    url = 'http://bigbooksearch.com/books/'+q
+    br.open(url).read()
+    req = Request('http://bigbooksearch.com/query.php?SearchIndex=books&Keywords=%s&ItemPage=1'%q)
+    req.add_header('X-Requested-With', 'XMLHttpRequest')
+    req.add_header('Referer', url)
+    raw = br.open(req).read()
+    root = html.fromstring(raw.decode('utf-8'))
+    urls = [i.get('src') for i in root.xpath('//img[@src]')]
+    return urls
+
+class BigBookSearch(Source):
+
+    name = 'Big Book Search'
+    description = _('Downloads multiple book covers from Amazon. Useful to find alternate covers.')
+    capabilities = frozenset(['cover'])
+    config_help_message = _('Configure the Big Book Search plugin')
+    can_get_multiple_covers = True
+    options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'),
+                      _('The maximum number of covers to process from the search result')),
+    )
+    supports_gzip_transfer_encoding = True
+
+    def download_cover(self, log, result_queue, abort,
+            title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
+        if not title:
+            return
+        br = self.browser
+        tokens = tuple(self.get_title_tokens(title)) + tuple(self.get_author_tokens(authors))
+        urls = get_urls(br, tokens)
+        self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
+
+def test():
+    from calibre import browser
+    import pprint
+    br = browser()
+    urls = get_urls(br, ['consider', 'phlebas', 'banks'])
+    pprint.pprint(urls)
+
+if __name__ == '__main__':
+    test()
+