diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 900ef33a4c..261f637ac5 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -832,11 +832,10 @@ from calibre.ebooks.metadata.sources.google import GoogleBooks from calibre.ebooks.metadata.sources.amazon import Amazon from calibre.ebooks.metadata.sources.edelweiss import Edelweiss from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary -from calibre.ebooks.metadata.sources.overdrive import OverDrive from calibre.ebooks.metadata.sources.google_images import GoogleImages from calibre.ebooks.metadata.sources.big_book_search import BigBookSearch -plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, OverDrive, BigBookSearch] +plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, BigBookSearch] # }}} diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py deleted file mode 100644 index 5a02738dd6..0000000000 --- a/src/calibre/ebooks/metadata/sources/overdrive.py +++ /dev/null @@ -1,486 +0,0 @@ -#!/usr/bin/env python -from __future__ import absolute_import, division, print_function, unicode_literals - -__license__ = 'GPL v3' -__copyright__ = '2011, Kovid Goyal kovid@kovidgoyal.net' -__docformat__ = 'restructuredtext en' - -''' -Fetch metadata using Overdrive Content Reserve -''' -import re, random, copy, json -from threading import RLock -try: - from queue import Empty, Queue -except ImportError: - from Queue import Empty, Queue - - -from calibre.ebooks.metadata import check_isbn -from calibre.ebooks.metadata.sources.base import Source, Option -from calibre.ebooks.metadata.book.base import Metadata - -ovrdrv_data_cache = {} -cache_lock = RLock() -base_url = 'https://search.overdrive.com/' - - -class OverDrive(Source): - - name = 'Overdrive' - version = (1, 0, 1) - minimum_calibre_version = (2, 80, 0) - description = _('Downloads metadata and covers from Overdrive\'s Content Reserve') - - capabilities = frozenset(['identify', 'cover']) - touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate', - 'comments', 'publisher', 'identifier:isbn', 'series', 'series_index', - 'languages', 'identifier:overdrive']) - has_html_comments = True - supports_gzip_transfer_encoding = False - cached_cover_url_is_reliable = True - - options = ( - Option('get_full_metadata', 'bool', True, - _('Download all metadata (slow)'), - _('Enable this option to gather all metadata available from Overdrive.')), - ) - - config_help_message = '
'+_('Additional metadata can be taken from Overdrive\'s book detail'
- ' page. This includes a limited set of tags used by libraries, comments, language,'
- ' and the e-book ISBN. Collecting this data is disabled by default due to the extra'
- ' time required. Check the download all metadata option below to'
- ' enable downloading this data.')
-
- def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
- identifiers={}, timeout=30):
- ovrdrv_id = identifiers.get('overdrive', None)
- isbn = identifiers.get('isbn', None)
-
- br = self.browser
- ovrdrv_data = self.to_ovrdrv_data(br, log, title, authors, ovrdrv_id)
- if ovrdrv_data:
- title = ovrdrv_data[8]
- authors = ovrdrv_data[6]
- mi = Metadata(title, authors)
- self.parse_search_results(ovrdrv_data, mi)
- if ovrdrv_id is None:
- ovrdrv_id = ovrdrv_data[7]
-
- if self.prefs['get_full_metadata']:
- self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log)
-
- if isbn is not None:
- self.cache_isbn_to_identifier(isbn, ovrdrv_id)
-
- result_queue.put(mi)
-
- return None
- # }}}
-
- def download_cover(self, log, result_queue, abort, # {{{
- title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
- import mechanize
- cached_url = self.get_cached_cover_url(identifiers)
- if cached_url is None:
- log.info('No cached cover found, running identify')
- rq = Queue()
- self.identify(log, rq, abort, title=title, authors=authors,
- identifiers=identifiers)
- if abort.is_set():
- return
- results = []
- while True:
- try:
- results.append(rq.get_nowait())
- except Empty:
- break
- results.sort(key=self.identify_results_keygen(
- title=title, authors=authors, identifiers=identifiers))
- for mi in results:
- cached_url = self.get_cached_cover_url(mi.identifiers)
- if cached_url is not None:
- break
- if cached_url is None:
- log.info('No cover found')
- return
-
- if abort.is_set():
- return
-
- ovrdrv_id = identifiers.get('overdrive', None)
- br = self.browser
- req = mechanize.Request(cached_url)
- if ovrdrv_id is not None:
- referer = self.get_base_referer()+'ContentDetails-Cover.htm?ID='+ovrdrv_id
- req.add_header('referer', referer)
-
- log('Downloading cover from:', cached_url)
- try:
- cdata = br.open_novisit(req, timeout=timeout).read()
- result_queue.put((self, cdata))
- except:
- log.exception('Failed to download cover from:', cached_url)
- # }}}
-
- def get_cached_cover_url(self, identifiers): # {{{
- url = None
- ovrdrv_id = identifiers.get('overdrive', None)
- if ovrdrv_id is None:
- isbn = identifiers.get('isbn', None)
- if isbn is not None:
- ovrdrv_id = self.cached_isbn_to_identifier(isbn)
- if ovrdrv_id is not None:
- url = self.cached_identifier_to_cover_url(ovrdrv_id)
-
- return url
- # }}}
-
- def get_base_referer(self): # to be used for passing referrer headers to cover download
- choices = [
- 'https://overdrive.chipublib.org/82DC601D-7DDE-4212-B43A-09D821935B01/10/375/en/',
- 'https://emedia.clevnet.org/9D321DAD-EC0D-490D-BFD8-64AE2C96ECA8/10/241/en/',
- 'https://singapore.lib.overdrive.com/F11D55BE-A917-4D63-8111-318E88B29740/10/382/en/',
- 'https://ebooks.nypl.org/20E48048-A377-4520-BC43-F8729A42A424/10/257/en/',
- 'https://spl.lib.overdrive.com/5875E082-4CB2-4689-9426-8509F354AFEF/10/335/en/'
- ]
- return choices[random.randint(0, len(choices)-1)]
-
- def format_results(self, reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid):
- fix_slashes = re.compile(r'\\/')
- thumbimage = fix_slashes.sub('/', thumbimage)
- worldcatlink = fix_slashes.sub('/', worldcatlink)
- cover_url = re.sub(r'(?P(Ima?g(eType-)?))200', r'\g
100', thumbimage)
- social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
- series_num = ''
- if not series:
- if subtitle:
- title = od_title+': '+subtitle
- else:
- title = od_title
- else:
- title = od_title
- m = re.search("([0-9]+$)", subtitle)
- if m:
- series_num = float(m.group(1))
- return [cover_url, social_metadata_url, worldcatlink, series, series_num, publisher, creators, reserveid, title]
-
- def safe_query(self, br, query_url, post=''):
- '''
- The query must be initialized by loading an empty search results page
- this page attempts to set a cookie that Mechanize doesn't like
- copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar
- '''
- import mechanize
- goodcookies = br._ua_handlers['_cookies'].cookiejar
- clean_cj = mechanize.CookieJar()
- cookies_to_copy = []
- for cookie in goodcookies:
- copied_cookie = copy.deepcopy(cookie)
- cookies_to_copy.append(copied_cookie)
- for copied_cookie in cookies_to_copy:
- clean_cj.set_cookie(copied_cookie)
-
- if post:
- br.open_novisit(query_url, post)
- else:
- br.open_novisit(query_url)
-
- br.set_cookiejar(clean_cj)
-
- def overdrive_search(self, br, log, q, title, author):
- import mechanize
- # re-initialize the cookiejar to so that it's clean
- clean_cj = mechanize.CookieJar()
- br.set_cookiejar(clean_cj)
- q_query = q+'default.aspx/SearchByKeyword'
- q_init_search = q+'SearchResults.aspx'
- # get first author as string - convert this to a proper cleanup function later
- author_tokens = list(self.get_author_tokens(author,
- only_first_author=True))
- title_tokens = list(self.get_title_tokens(title,
- strip_joiners=False, strip_subtitle=True))
-
- xref_q = ''
- if len(author_tokens) <= 1:
- initial_q = ' '.join(title_tokens)
- xref_q = '+'.join(author_tokens)
- else:
- initial_q = ' '.join(author_tokens)
- for token in title_tokens:
- if len(xref_q) < len(token):
- xref_q = token
-
- log.error('Initial query is %s'%initial_q)
- log.error('Cross reference query is %s'%xref_q)
-
- q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
- query = '{"szKeyword":"'+initial_q+'"}'
-
- # main query, requires specific Content Type header
- req = mechanize.Request(q_query)
- req.add_header('Content-Type', 'application/json; charset=utf-8')
- br.open_novisit(req, query)
-
- # initiate the search without messing up the cookiejar
- self.safe_query(br, q_init_search)
-
- # get the search results object
- results = False
- iterations = 0
- while results is False:
- iterations += 1
- xreq = mechanize.Request(q_xref)
- xreq.add_header('X-Requested-With', 'XMLHttpRequest')
- xreq.add_header('Referer', q_init_search)
- xreq.add_header('Accept', 'application/json, text/javascript, */*')
- raw = br.open_novisit(xreq).read()
- for m in re.finditer(type('')(r'"iTotalDisplayRecords":(?P