From 6c0afb84d197cda053e194cac1c50ff01e14d752 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 31 Jul 2010 19:55:12 -0600 Subject: [PATCH] New plugin based framework for downloading covers --- src/calibre/customize/builtins.py | 8 +- src/calibre/customize/ui.py | 10 ++ src/calibre/devices/misc.py | 19 +-- src/calibre/ebooks/metadata/covers.py | 232 ++++++++++++++++++++++++++ src/calibre/manual/plugins.rst | 5 + 5 files changed, 254 insertions(+), 20 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 1387f162af..998bfa7b1e 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -460,19 +460,22 @@ from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK from calibre.devices.edge.driver import EDGE from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS from calibre.devices.sne.driver import SNE -from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, PROMEDIA +from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.kobo.driver import KOBO from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \ LibraryThing from calibre.ebooks.metadata.douban import DoubanBooks +from calibre.ebooks.metadata.covers import OpenLibraryCovers, \ + LibraryThingCovers from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX from calibre.ebooks.epub.fix.unmanifested import Unmanifested from calibre.ebooks.epub.fix.epubcheck import Epubcheck plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, - LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, Epubcheck] + LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, + Epubcheck, OpenLibraryCovers, LibraryThingCovers] plugins += [ ComicInput, EPUBInput, @@ -564,7 +567,6 @@ plugins += [ MENTOR, SWEEX, PDNOVEL, - PROMEDIA, ITUNES, ] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py index 31f4c69c0f..7b70bfbb4b 100644 --- a/src/calibre/customize/ui.py +++ b/src/calibre/customize/ui.py @@ -13,6 +13,7 @@ from calibre.customize.builtins import plugins as builtin_plugins from calibre.constants import numeric_version as version, iswindows, isosx from calibre.devices.interface import DevicePlugin from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.metadata.covers import CoverDownload from calibre.ebooks.metadata.fetch import MetadataSource from calibre.utils.config import make_config_dir, Config, ConfigProxy, \ plugin_dir, OptionParser, prefs @@ -234,6 +235,15 @@ def migrate_isbndb_key(): if key: prefs.set('isbndb_com_key', '') set_isbndb_key(key) + +def cover_sources(): + customization = config['plugin_customization'] + for plugin in _initialized_plugins: + if isinstance(plugin, CoverDownload): + if not is_disabled(plugin): + plugin.site_customization = customization.get(plugin.name, '') + yield plugin + # }}} # Metadata read/write {{{ diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index 6b421ad17a..ed2a46b0b5 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -46,12 +46,13 @@ class AVANT(USBMS): BCD = [0x0319] VENDOR_NAME = 'E-BOOK' - WINDOWS_MAIN_MEM = 'READER' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'READER' EBOOK_DIR_MAIN = '' SUPPORTS_SUB_DIRS = True class SWEEX(USBMS): + # Identical to the Promedia name = 'Sweex Device Interface' gui_name = 'Sweex' description = _('Communicate with the Sweex MM300') @@ -96,20 +97,4 @@ class PDNOVEL(USBMS): with open('%s.jpg' % os.path.join(path, filename), 'wb') as coverfile: coverfile.write(coverdata[2]) -class PROMEDIA(USBMS): - - name = 'Promedia eBook Reader' - gui_name = 'Promedia' - description = _('Communicate with the Promedia eBook reader') - author = 'Kovid Goyal' - supported_platforms = ['windows', 'linux', 'osx'] - FORMATS = ['epub', 'rtf', 'pdf'] - - VENDOR_ID = [0x525] - PRODUCT_ID = [0xa4a5] - BCD = [0x319] - - EBOOK_DIR_MAIN = 'calibre' - SUPPORTS_SUB_DIRS = True - diff --git a/src/calibre/ebooks/metadata/covers.py b/src/calibre/ebooks/metadata/covers.py index af213d1a6c..31053a3cd0 100644 --- a/src/calibre/ebooks/metadata/covers.py +++ b/src/calibre/ebooks/metadata/covers.py @@ -5,11 +5,243 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import traceback, socket, re, sys +from functools import partial +from threading import Thread, Event +from Queue import Queue, Empty + +import mechanize from calibre.customize import Plugin +from calibre import browser, prints +from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.constants import preferred_encoding, DEBUG class CoverDownload(Plugin): + ''' + These plugins are used to download covers for books. + ''' supported_platforms = ['windows', 'osx', 'linux'] author = 'Kovid Goyal' type = _('Cover download') + + def has_cover(self, mi, ans, timeout=5.): + ''' + Check if the book described by mi has a cover. Call ans.set() if it + does. Do nothing if it doesn't. + + :param mi: MetaInformation object + :param timeout: timeout in seconds + :param ans: A threading.Event object + ''' + raise NotImplementedError() + + def get_covers(self, mi, result_queue, abort, timeout=5.): + ''' + Download covers for books described by the mi object. Downloaded covers + must be put into the result_queue. If more than one cover is available, + the plugin should continue downloading them and putting them into + result_queue until abort.is_set() returns True. + + :param mi: MetaInformation object + :param result_queue: A multithreaded Queue + :param abort: A threading.Event object + :param timeout: timeout in seconds + ''' + raise NotImplementedError() + + def exception_to_string(self, ex): + try: + return unicode(ex) + except: + try: + return str(ex).decode(preferred_encoding, 'replace') + except: + return repr(ex) + + def debug(self, *args, **kwargs): + if DEBUG: + prints('\t'+self.name+':', *args, **kwargs) + + + +class HeadRequest(mechanize.Request): + + def get_method(self): + return 'HEAD' + +class OpenLibraryCovers(CoverDownload): + 'Download covers from openlibrary.org' + + OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false' + name = 'openlibrary.org covers' + description = _('Download covers from openlibrary.org') + author = 'Kovid Goyal' + + def has_cover(self, mi, ans, timeout=5.): + if not mi.isbn: + return False + br = browser() + br.set_handle_redirect(False) + try: + br.open_novisit(HeadRequest(self.OPENLIBRARY%mi.isbn), timeout=timeout) + self.debug('cover for', mi.isbn, 'found') + ans.set() + except Exception, e: + if callable(getattr(e, 'getcode', None)) and e.getcode() == 302: + self.debug('cover for', mi.isbn, 'found') + ans.set() + else: + self.debug(e) + + def get_covers(self, mi, result_queue, abort, timeout=5.): + if not mi.isbn: + return + br = browser() + try: + ans = br.open(self.OPENLIBRARY%mi.isbn, timeout=timeout).read() + result_queue.put((True, ans, 'jpg', self.name)) + except Exception, e: + if callable(getattr(e, 'getcode', None)) and e.getcode() == 404: + result_queue.put((False, _('ISBN: %s not found')%mi.isbn, '', self.name)) + else: + result_queue.put((False, self.exception_to_string(e), + traceback.format_exc(), self.name)) + +class LibraryThingCovers(CoverDownload): + + name = 'librarything.com covers' + description = _('Download covers from librarything.com') + author = 'Kovid Goyal' + + LIBRARYTHING = 'http://www.librarything.com/isbn/' + + def get_cover_url(self, isbn, br, timeout=5.): + try: + src = br.open_novisit('http://www.librarything.com/isbn/'+isbn, + timeout=timeout).read().decode('utf-8', 'replace') + except Exception, err: + if isinstance(getattr(err, 'args', [None])[0], socket.timeout): + err = Exception(_('LibraryThing.com timed out. Try again later.')) + raise err + else: + s = BeautifulSoup(src) + url = s.find('td', attrs={'class':'left'}) + if url is None: + if s.find('div', attrs={'class':'highloadwarning'}) is not None: + raise Exception(_('Could not fetch cover as server is experiencing high load. Please try again later.')) + raise Exception(_('ISBN: %s not found')%isbn) + url = url.find('img') + if url is None: + raise Exception(_('LibraryThing.com server error. Try again later.')) + url = re.sub(r'_S[XY]\d+', '', url['src']) + return url + + def has_cover(self, mi, ans, timeout=5.): + if not mi.isbn: + return False + br = browser() + try: + self.get_cover_url(mi.isbn, br, timeout=timeout) + self.debug('cover for', mi.isbn, 'found') + ans.set() + except Exception, e: + self.debug(e) + + def get_covers(self, mi, result_queue, abort, timeout=5.): + if not mi.isbn: + return + br = browser() + try: + url = self.get_cover_url(mi.isbn, br, timeout=timeout) + cover_data = br.open_novisit(url).read() + result_queue.put((True, cover_data, 'jpg', self.name)) + except Exception, e: + result_queue.put((False, self.exception_to_string(e), + traceback.format_exc(), self.name)) + +def check_for_cover(mi, timeout=5.): + from calibre.customize.ui import cover_sources + ans = Event() + checkers = [partial(p.has_cover, mi, ans, timeout=timeout) for p in + cover_sources()] + workers = [Thread(target=c) for c in checkers] + for w in workers: + w.daemon = True + w.start() + while not ans.is_set(): + ans.wait(0.1) + if sum([int(w.is_alive()) for w in workers]) == 0: + break + return ans.is_set() + +def download_covers(mi, result_queue, max_covers=50, timeout=5.): + from calibre.customize.ui import cover_sources + abort = Event() + temp = Queue() + getters = [partial(p.get_covers, mi, temp, abort, timeout=timeout) for p in + cover_sources()] + workers = [Thread(target=c) for c in getters] + for w in workers: + w.daemon = True + w.start() + count = 0 + while count < max_covers: + try: + result = temp.get_nowait() + if result[0]: + count += 1 + result_queue.put(result) + except Empty: + pass + if sum([int(w.is_alive()) for w in workers]) == 0: + break + + abort.set() + + while True: + try: + result = temp.get_nowait() + count += 1 + result_queue.put(result) + except Empty: + break + +def download_cover(mi, timeout=5.): + results = Queue() + download_covers(mi, results, max_covers=1, timeout=timeout) + errors, ans = [], None + while True: + try: + x = results.get_nowait() + if x[0]: + ans = x[1] + else: + errors.append(x) + except Empty: + break + return ans, errors + + +def test(isbns): + from calibre.ebooks.metadata import MetaInformation + mi = MetaInformation('test', ['test']) + for isbn in isbns: + prints('Testing ISBN:', isbn) + mi.isbn = isbn + found = check_for_cover(mi) + prints('Has cover:', found) + ans, errors = download_cover(mi) + if ans is not None: + prints('Cover downloaded') + else: + prints('Download failed:') + for err in errors: + prints('\t', err[-1]+':', err[1]) + print '\n' + + +if __name__ == '__main__': + isbns = sys.argv[1:] + ['9781591025412', '9780307272119'] + test(isbns) diff --git a/src/calibre/manual/plugins.rst b/src/calibre/manual/plugins.rst index 4a4d5c72f5..26e544d766 100644 --- a/src/calibre/manual/plugins.rst +++ b/src/calibre/manual/plugins.rst @@ -71,6 +71,11 @@ Metadata download plugins :members: :member-order: bysource +.. autoclass:: calibre.ebooks.metadata.covers.CoverDownload + :show-inheritance: + :members: + :member-order: bysource + Conversion plugins --------------------