New google and amazon metadata plugins finished

This commit is contained in:
Kovid Goyal 2011-03-16 22:00:45 -06:00
parent b3a633d3ed
commit c53f66f752
4 changed files with 31 additions and 17 deletions

View File

@ -227,6 +227,11 @@ class Metadata(object):
if val: if val:
identifiers[typ] = val identifiers[typ] = val
def has_identifier(self, typ):
identifiers = object.__getattribute__(self,
'_data')['identifiers']
return typ in identifiers
# field-oriented interface. Intended to be the same as in LibraryDatabase # field-oriented interface. Intended to be the same as in LibraryDatabase
def standard_field_keys(self): def standard_field_keys(self):

View File

@ -22,7 +22,7 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html from calibre.library.comments import sanitize_comments_html
from calibre.utils.date import parse_date from calibre.utils.date import parse_date
class Worker(Thread): class Worker(Thread): # {{{
''' '''
Get book details from amazons book page in a separate thread Get book details from amazons book page in a separate thread
@ -253,7 +253,7 @@ class Worker(Thread):
ans = x.tail.strip() ans = x.tail.strip()
if ans == 'English': if ans == 'English':
return 'en' return 'en'
# }}}
class Amazon(Source): class Amazon(Source):
@ -270,7 +270,7 @@ class Amazon(Source):
'de' : _('Germany'), 'de' : _('Germany'),
} }
def create_query(self, log, title=None, authors=None, identifiers={}): def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
domain = self.prefs.get('domain', 'com') domain = self.prefs.get('domain', 'com')
# See the amazon detailed search page to get all options # See the amazon detailed search page to get all options
@ -313,8 +313,9 @@ class Amazon(Source):
url = 'http://www.amazon.%s/s/?'%domain + urlencode(utf8q) url = 'http://www.amazon.%s/s/?'%domain + urlencode(utf8q)
return url return url
# }}}
def identify(self, log, result_queue, abort, title=None, authors=None, def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=20): identifiers={}, timeout=20):
''' '''
Note this method will retry without identifiers automatically if no Note this method will retry without identifiers automatically if no
@ -416,6 +417,7 @@ class Amazon(Source):
w.cover_url) w.cover_url)
return None return None
# }}}
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -42,7 +42,7 @@ subject = XPath('descendant::dc:subject')
description = XPath('descendant::dc:description') description = XPath('descendant::dc:description')
language = XPath('descendant::dc:language') language = XPath('descendant::dc:language')
def get_details(browser, url, timeout): def get_details(browser, url, timeout): # {{{
try: try:
raw = browser.open_novisit(url, timeout=timeout).read() raw = browser.open_novisit(url, timeout=timeout).read()
except Exception as e: except Exception as e:
@ -54,8 +54,9 @@ def get_details(browser, url, timeout):
raw = browser.open_novisit(url, timeout=timeout).read() raw = browser.open_novisit(url, timeout=timeout).read()
return raw return raw
# }}}
def to_metadata(browser, log, entry_, timeout): def to_metadata(browser, log, entry_, timeout): # {{{
def get_text(extra, x): def get_text(extra, x):
try: try:
@ -94,12 +95,6 @@ def to_metadata(browser, log, entry_, timeout):
#mi.language = get_text(extra, language) #mi.language = get_text(extra, language)
mi.publisher = get_text(extra, publisher) mi.publisher = get_text(extra, publisher)
# Author sort
for x in creator(extra):
for key, val in x.attrib.items():
if key.endswith('file-as') and val and val.strip():
mi.author_sort = val
break
# ISBN # ISBN
isbns = [] isbns = []
for x in identifier(extra): for x in identifier(extra):
@ -137,7 +132,7 @@ def to_metadata(browser, log, entry_, timeout):
return mi return mi
# }}}
class GoogleBooks(Source): class GoogleBooks(Source):
@ -146,10 +141,10 @@ class GoogleBooks(Source):
capabilities = frozenset(['identify']) capabilities = frozenset(['identify'])
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate', touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
'comments', 'publisher', 'author_sort', 'identifier:isbn', 'comments', 'publisher', 'identifier:isbn',
'identifier:google']) # language currently disabled 'identifier:google']) # language currently disabled
def create_query(self, log, title=None, authors=None, identifiers={}): def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
BASE_URL = 'http://books.google.com/books/feeds/volumes?' BASE_URL = 'http://books.google.com/books/feeds/volumes?'
isbn = check_isbn(identifiers.get('isbn', None)) isbn = check_isbn(identifiers.get('isbn', None))
q = '' q = ''
@ -177,6 +172,7 @@ class GoogleBooks(Source):
'start-index':1, 'start-index':1,
'min-viewability':'none', 'min-viewability':'none',
}) })
# }}}
def cover_url_from_identifiers(self, identifiers): def cover_url_from_identifiers(self, identifiers):
goog = identifiers.get('google', None) goog = identifiers.get('google', None)
@ -209,11 +205,11 @@ class GoogleBooks(Source):
if abort.is_set(): if abort.is_set():
break break
def identify(self, log, result_queue, abort, title=None, authors=None, def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=20): identifiers={}, timeout=20):
query = self.create_query(log, title=title, authors=authors, query = self.create_query(log, title=title, authors=authors,
identifiers=identifiers) identifiers=identifiers)
br = self.browser() br = self.browser
try: try:
raw = br.open_novisit(query, timeout=timeout).read() raw = br.open_novisit(query, timeout=timeout).read()
except Exception, e: except Exception, e:
@ -234,6 +230,7 @@ class GoogleBooks(Source):
self.get_all_details(br, log, entries, abort, result_queue, timeout) self.get_all_details(br, log, entries, abort, result_queue, timeout)
return None return None
# }}}
if __name__ == '__main__': if __name__ == '__main__':
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py

View File

@ -102,6 +102,16 @@ def test_identify_plugin(name, tests):
prints('Log saved to', lf) prints('Log saved to', lf)
raise SystemExit(1) raise SystemExit(1)
for key in plugin.touched_fields:
if key.startswith('identifier:'):
key = key.partition(':')[-1]
if not match_found.has_identifier(key):
prints('Failed to find identifier:', key)
raise SystemExit(1)
elif match_found.is_null(key):
prints('Failed to find', key)
raise SystemExit(1)
prints('Average time per query', sum(times)/len(times)) prints('Average time per query', sum(times)/len(times))
if os.stat(lf).st_size > 10: if os.stat(lf).st_size > 10: