From 4ae11fa29565c5e7f3625fb4cc11c7cb4ef1a0a8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Feb 2017 13:00:36 +0530 Subject: [PATCH] pep8 --- src/calibre/ebooks/metadata/sources/google.py | 240 +++++++++++------- 1 file changed, 148 insertions(+), 92 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index 9cff2cc528..2fbc60ff70 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -1,37 +1,34 @@ #!/usr/bin/env python2 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import (unicode_literals, division, absolute_import, - print_function) +# License: GPLv3 Copyright: 2011, Kovid Goyal +from __future__ import absolute_import, division, print_function, unicode_literals -__license__ = 'GPL v3' -__copyright__ = '2011, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - -import time, hashlib +import hashlib +import time from functools import partial -from Queue import Queue, Empty +from Queue import Empty, Queue -from calibre.ebooks.metadata import check_isbn -from calibre.ebooks.metadata.sources.base import Source -from calibre.ebooks.metadata.book.base import Metadata +from calibre import as_unicode from calibre.ebooks.chardet import xml_to_unicode +from calibre.ebooks.metadata import check_isbn +from calibre.ebooks.metadata.book.base import Metadata +from calibre.ebooks.metadata.sources.base import Source from calibre.utils.cleantext import clean_ascii_chars from calibre.utils.localization import canonicalize_lang -from calibre import as_unicode NAMESPACES = { - 'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/', - 'atom' : 'http://www.w3.org/2005/Atom', - 'dc' : 'http://purl.org/dc/terms', - 'gd' : 'http://schemas.google.com/g/2005' - } + 'openSearch': 'http://a9.com/-/spec/opensearchrss/1.0/', + 'atom': 'http://www.w3.org/2005/Atom', + 'dc': 'http://purl.org/dc/terms', + 'gd': 'http://schemas.google.com/g/2005' +} def get_details(browser, url, timeout): # {{{ try: raw = browser.open_novisit(url, timeout=timeout).read() except Exception as e: - gc = getattr(e, 'getcode', lambda : -1) + gc = getattr(e, 'getcode', lambda: -1) if gc() != 403: raise # Google is throttling us, wait a little @@ -39,6 +36,8 @@ def get_details(browser, url, timeout): # {{{ raw = browser.open_novisit(url, timeout=timeout).read() return raw + + # }}} @@ -49,17 +48,17 @@ def to_metadata(browser, log, entry_, timeout): # {{{ # total_results = XPath('//openSearch:totalResults') # start_index = XPath('//openSearch:startIndex') # items_per_page = XPath('//openSearch:itemsPerPage') - entry = XPath('//atom:entry') - entry_id = XPath('descendant::atom:id') - creator = XPath('descendant::dc:creator') - identifier = XPath('descendant::dc:identifier') - title = XPath('descendant::dc:title') - date = XPath('descendant::dc:date') - publisher = XPath('descendant::dc:publisher') - subject = XPath('descendant::dc:subject') - description = XPath('descendant::dc:description') - language = XPath('descendant::dc:language') - rating = XPath('descendant::gd:rating[@average]') + entry = XPath('//atom:entry') + entry_id = XPath('descendant::atom:id') + creator = XPath('descendant::dc:creator') + identifier = XPath('descendant::dc:identifier') + title = XPath('descendant::dc:title') + date = XPath('descendant::dc:date') + publisher = XPath('descendant::dc:publisher') + subject = XPath('descendant::dc:subject') + description = XPath('descendant::dc:description') + language = XPath('descendant::dc:language') + rating = XPath('descendant::gd:rating[@average]') def get_text(extra, x): try: @@ -83,11 +82,12 @@ def to_metadata(browser, log, entry_, timeout): # {{{ return None mi = Metadata(title_, authors) - mi.identifiers = {'google':google_id} + mi.identifiers = {'google': google_id} try: raw = get_details(browser, id_url, timeout) - feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw), - strip_encoding_pats=True)[0]) + feed = etree.fromstring( + xml_to_unicode(clean_ascii_chars(raw), strip_encoding_pats=True)[0] + ) extra = entry(feed)[0] except: log.exception('Failed to get additional details for', mi.title) @@ -135,7 +135,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{ default = utcnow().replace(day=15) mi.pubdate = parse_date(pubdate, assume_utc=True, default=default) except: - log.error('Failed to parse pubdate %r'%pubdate) + log.error('Failed to parse pubdate %r' % pubdate) # Ratings for x in rating(extra): @@ -149,11 +149,14 @@ def to_metadata(browser, log, entry_, timeout): # {{{ # Cover mi.has_google_cover = None for x in extra.xpath( - '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'): + '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]' + ): mi.has_google_cover = x.get('href') break return mi + + # }}} @@ -162,21 +165,23 @@ class GoogleBooks(Source): name = 'Google' description = _('Downloads metadata and covers from Google Books') - capabilities = frozenset(['identify', 'cover']) - touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate', - 'comments', 'publisher', 'identifier:isbn', 'rating', - 'identifier:google', 'languages']) + capabilities = frozenset({'identify', 'cover'}) + touched_fields = frozenset({ + 'title', 'authors', 'tags', 'pubdate', 'comments', 'publisher', + 'identifier:isbn', 'rating', 'identifier:google', 'languages' + }) supports_gzip_transfer_encoding = True cached_cover_url_is_reliable = False GOOGLE_COVER = 'https://books.google.com/books?id=%s&printsec=frontcover&img=1' - DUMMY_IMAGE_MD5 = frozenset(['0de4383ebad0adad5eeb8975cd796657']) + DUMMY_IMAGE_MD5 = frozenset({'0de4383ebad0adad5eeb8975cd796657'}) def get_book_url(self, identifiers): # {{{ goog = identifiers.get('google', None) if goog is not None: - return ('google', goog, 'https://books.google.com/books?id=%s'%goog) + return ('google', goog, 'https://books.google.com/books?id=%s' % goog) + # }}} def create_query(self, log, title=None, authors=None, identifiers={}): # {{{ @@ -185,39 +190,55 @@ class GoogleBooks(Source): isbn = check_isbn(identifiers.get('isbn', None)) q = '' if isbn is not None: - q += 'isbn:'+isbn + q += 'isbn:' + isbn elif title or authors: + def build_term(prefix, parts): - return ' '.join('in'+prefix + ':' + x for x in parts) + return ' '.join('in' + prefix + ':' + x for x in parts) + title_tokens = list(self.get_title_tokens(title)) if title_tokens: q += build_term('title', title_tokens) - author_tokens = self.get_author_tokens(authors, - only_first_author=True) + author_tokens = self.get_author_tokens(authors, only_first_author=True) if author_tokens: - q += ('+' if q else '') + build_term('author', - author_tokens) + q += ('+' if q else '') + build_term('author', author_tokens) if isinstance(q, unicode): q = q.encode('utf-8') if not q: return None - return BASE_URL+urlencode({ - 'q':q, - 'max-results':20, - 'start-index':1, - 'min-viewability':'none', - }) + return BASE_URL + urlencode({ + 'q': q, + 'max-results': 20, + 'start-index': 1, + 'min-viewability': 'none', + }) + # }}} - def download_cover(self, log, result_queue, abort, # {{{ - title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): + def download_cover( # {{{ + self, + log, + result_queue, + abort, + title=None, + authors=None, + identifiers={}, + timeout=30, + get_best_cover=False + ): cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.info('No cached cover found, running identify') rq = Queue() - self.identify(log, rq, abort, title=title, authors=authors, - identifiers=identifiers) + self.identify( + log, + rq, + abort, + title=title, + authors=authors, + identifiers=identifiers + ) if abort.is_set(): return results = [] @@ -226,8 +247,11 @@ class GoogleBooks(Source): results.append(rq.get_nowait()) except Empty: break - results.sort(key=self.identify_results_keygen( - title=title, authors=authors, identifiers=identifiers)) + results.sort( + key=self.identify_results_keygen( + title=title, authors=authors, identifiers=identifiers + ) + ) for mi in results: cached_url = self.get_cached_cover_url(mi.identifiers) if cached_url is not None: @@ -263,10 +287,18 @@ class GoogleBooks(Source): url = self.cached_identifier_to_cover_url(goog) return url + # }}} - def get_all_details(self, br, log, entries, abort, # {{{ - result_queue, timeout): + def get_all_details( # {{{ + self, + br, + log, + entries, + abort, + result_queue, + timeout + ): from lxml import etree for relevance, i in enumerate(entries): try: @@ -277,26 +309,37 @@ class GoogleBooks(Source): for isbn in getattr(ans, 'all_isbns', []): self.cache_isbn_to_identifier(isbn, goog) if getattr(ans, 'has_google_cover', False): - self.cache_identifier_to_cover_url(goog, - self.GOOGLE_COVER%goog) + self.cache_identifier_to_cover_url( + goog, self.GOOGLE_COVER % goog + ) self.clean_downloaded_metadata(ans) result_queue.put(ans) except: log.exception( - 'Failed to get metadata for identify entry:', - etree.tostring(i)) + 'Failed to get metadata for identify entry:', etree.tostring(i) + ) if abort.is_set(): break + # }}} - def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ - identifiers={}, timeout=30): + def identify( # {{{ + self, + log, + result_queue, + abort, + title=None, + authors=None, + identifiers={}, + timeout=30 + ): from lxml import etree XPath = partial(etree.XPath, namespaces=NAMESPACES) - entry = XPath('//atom:entry') + entry = XPath('//atom:entry') - query = self.create_query(log, title=title, authors=authors, - identifiers=identifiers) + query = self.create_query( + log, title=title, authors=authors, identifiers=identifiers + ) if not query: log.error('Insufficient metadata to construct query') return @@ -304,13 +347,15 @@ class GoogleBooks(Source): try: raw = br.open_novisit(query, timeout=timeout).read() except Exception as e: - log.exception('Failed to make identify query: %r'%query) + log.exception('Failed to make identify query: %r' % query) return as_unicode(e) try: parser = etree.XMLParser(recover=True, no_network=True) - feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw), - strip_encoding_pats=True)[0], parser=parser) + feed = etree.fromstring( + xml_to_unicode(clean_ascii_chars(raw), strip_encoding_pats=True)[0], + parser=parser + ) entries = entry(feed) except Exception as e: log.exception('Failed to parse identify results') @@ -318,34 +363,45 @@ class GoogleBooks(Source): if not entries and identifiers and title and authors and \ not abort.is_set(): - return self.identify(log, result_queue, abort, title=title, - authors=authors, timeout=timeout) + return self.identify( + log, + result_queue, + abort, + title=title, + authors=authors, + timeout=timeout + ) # There is no point running these queries in threads as google # throttles requests returning 403 Forbidden errors self.get_all_details(br, log, entries, abort, result_queue, timeout) return None + # }}} + if __name__ == '__main__': # tests {{{ # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py - from calibre.ebooks.metadata.sources.test import (test_identify_plugin, - title_test, authors_test) - test_identify_plugin(GoogleBooks.name, - [ - - - ( - {'identifiers':{'isbn': '0743273567'}, 'title':'Great Gatsby', - 'authors':['Fitzgerald']}, - [title_test('The great gatsby', exact=True), - authors_test(['F. Scott Fitzgerald'])] - ), - - ( - {'title': 'Flatland', 'authors':['Abbott']}, - [title_test('Flatland', exact=False)] - ), - ]) + from calibre.ebooks.metadata.sources.test import ( + test_identify_plugin, title_test, authors_test + ) + test_identify_plugin( + GoogleBooks.name, [ + ({ + 'identifiers': { + 'isbn': '0743273567' + }, + 'title': 'Great Gatsby', + 'authors': ['Fitzgerald'] + }, [ + title_test('The great gatsby', exact=True), + authors_test(['F. Scott Fitzgerald']) + ]), + ({ + 'title': 'Flatland', + 'authors': ['Abbott'] + }, [title_test('Flatland', exact=False)]), + ] + ) # }}}