mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
New google and amazon metadata plugins finished
This commit is contained in:
parent
b3a633d3ed
commit
c53f66f752
@ -227,6 +227,11 @@ class Metadata(object):
|
|||||||
if val:
|
if val:
|
||||||
identifiers[typ] = val
|
identifiers[typ] = val
|
||||||
|
|
||||||
|
def has_identifier(self, typ):
|
||||||
|
identifiers = object.__getattribute__(self,
|
||||||
|
'_data')['identifiers']
|
||||||
|
return typ in identifiers
|
||||||
|
|
||||||
# field-oriented interface. Intended to be the same as in LibraryDatabase
|
# field-oriented interface. Intended to be the same as in LibraryDatabase
|
||||||
|
|
||||||
def standard_field_keys(self):
|
def standard_field_keys(self):
|
||||||
|
@ -22,7 +22,7 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.library.comments import sanitize_comments_html
|
from calibre.library.comments import sanitize_comments_html
|
||||||
from calibre.utils.date import parse_date
|
from calibre.utils.date import parse_date
|
||||||
|
|
||||||
class Worker(Thread):
|
class Worker(Thread): # {{{
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Get book details from amazons book page in a separate thread
|
Get book details from amazons book page in a separate thread
|
||||||
@ -253,7 +253,7 @@ class Worker(Thread):
|
|||||||
ans = x.tail.strip()
|
ans = x.tail.strip()
|
||||||
if ans == 'English':
|
if ans == 'English':
|
||||||
return 'en'
|
return 'en'
|
||||||
|
# }}}
|
||||||
|
|
||||||
class Amazon(Source):
|
class Amazon(Source):
|
||||||
|
|
||||||
@ -270,7 +270,7 @@ class Amazon(Source):
|
|||||||
'de' : _('Germany'),
|
'de' : _('Germany'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def create_query(self, log, title=None, authors=None, identifiers={}):
|
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||||
domain = self.prefs.get('domain', 'com')
|
domain = self.prefs.get('domain', 'com')
|
||||||
|
|
||||||
# See the amazon detailed search page to get all options
|
# See the amazon detailed search page to get all options
|
||||||
@ -313,8 +313,9 @@ class Amazon(Source):
|
|||||||
url = 'http://www.amazon.%s/s/?'%domain + urlencode(utf8q)
|
url = 'http://www.amazon.%s/s/?'%domain + urlencode(utf8q)
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
def identify(self, log, result_queue, abort, title=None, authors=None,
|
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||||
identifiers={}, timeout=20):
|
identifiers={}, timeout=20):
|
||||||
'''
|
'''
|
||||||
Note this method will retry without identifiers automatically if no
|
Note this method will retry without identifiers automatically if no
|
||||||
@ -416,6 +417,7 @@ class Amazon(Source):
|
|||||||
w.cover_url)
|
w.cover_url)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -42,7 +42,7 @@ subject = XPath('descendant::dc:subject')
|
|||||||
description = XPath('descendant::dc:description')
|
description = XPath('descendant::dc:description')
|
||||||
language = XPath('descendant::dc:language')
|
language = XPath('descendant::dc:language')
|
||||||
|
|
||||||
def get_details(browser, url, timeout):
|
def get_details(browser, url, timeout): # {{{
|
||||||
try:
|
try:
|
||||||
raw = browser.open_novisit(url, timeout=timeout).read()
|
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -54,8 +54,9 @@ def get_details(browser, url, timeout):
|
|||||||
raw = browser.open_novisit(url, timeout=timeout).read()
|
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||||
|
|
||||||
return raw
|
return raw
|
||||||
|
# }}}
|
||||||
|
|
||||||
def to_metadata(browser, log, entry_, timeout):
|
def to_metadata(browser, log, entry_, timeout): # {{{
|
||||||
|
|
||||||
def get_text(extra, x):
|
def get_text(extra, x):
|
||||||
try:
|
try:
|
||||||
@ -94,12 +95,6 @@ def to_metadata(browser, log, entry_, timeout):
|
|||||||
#mi.language = get_text(extra, language)
|
#mi.language = get_text(extra, language)
|
||||||
mi.publisher = get_text(extra, publisher)
|
mi.publisher = get_text(extra, publisher)
|
||||||
|
|
||||||
# Author sort
|
|
||||||
for x in creator(extra):
|
|
||||||
for key, val in x.attrib.items():
|
|
||||||
if key.endswith('file-as') and val and val.strip():
|
|
||||||
mi.author_sort = val
|
|
||||||
break
|
|
||||||
# ISBN
|
# ISBN
|
||||||
isbns = []
|
isbns = []
|
||||||
for x in identifier(extra):
|
for x in identifier(extra):
|
||||||
@ -137,7 +132,7 @@ def to_metadata(browser, log, entry_, timeout):
|
|||||||
|
|
||||||
|
|
||||||
return mi
|
return mi
|
||||||
|
# }}}
|
||||||
|
|
||||||
class GoogleBooks(Source):
|
class GoogleBooks(Source):
|
||||||
|
|
||||||
@ -146,10 +141,10 @@ class GoogleBooks(Source):
|
|||||||
|
|
||||||
capabilities = frozenset(['identify'])
|
capabilities = frozenset(['identify'])
|
||||||
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
|
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
|
||||||
'comments', 'publisher', 'author_sort', 'identifier:isbn',
|
'comments', 'publisher', 'identifier:isbn',
|
||||||
'identifier:google']) # language currently disabled
|
'identifier:google']) # language currently disabled
|
||||||
|
|
||||||
def create_query(self, log, title=None, authors=None, identifiers={}):
|
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||||
BASE_URL = 'http://books.google.com/books/feeds/volumes?'
|
BASE_URL = 'http://books.google.com/books/feeds/volumes?'
|
||||||
isbn = check_isbn(identifiers.get('isbn', None))
|
isbn = check_isbn(identifiers.get('isbn', None))
|
||||||
q = ''
|
q = ''
|
||||||
@ -177,6 +172,7 @@ class GoogleBooks(Source):
|
|||||||
'start-index':1,
|
'start-index':1,
|
||||||
'min-viewability':'none',
|
'min-viewability':'none',
|
||||||
})
|
})
|
||||||
|
# }}}
|
||||||
|
|
||||||
def cover_url_from_identifiers(self, identifiers):
|
def cover_url_from_identifiers(self, identifiers):
|
||||||
goog = identifiers.get('google', None)
|
goog = identifiers.get('google', None)
|
||||||
@ -209,11 +205,11 @@ class GoogleBooks(Source):
|
|||||||
if abort.is_set():
|
if abort.is_set():
|
||||||
break
|
break
|
||||||
|
|
||||||
def identify(self, log, result_queue, abort, title=None, authors=None,
|
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||||
identifiers={}, timeout=20):
|
identifiers={}, timeout=20):
|
||||||
query = self.create_query(log, title=title, authors=authors,
|
query = self.create_query(log, title=title, authors=authors,
|
||||||
identifiers=identifiers)
|
identifiers=identifiers)
|
||||||
br = self.browser()
|
br = self.browser
|
||||||
try:
|
try:
|
||||||
raw = br.open_novisit(query, timeout=timeout).read()
|
raw = br.open_novisit(query, timeout=timeout).read()
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
@ -234,6 +230,7 @@ class GoogleBooks(Source):
|
|||||||
self.get_all_details(br, log, entries, abort, result_queue, timeout)
|
self.get_all_details(br, log, entries, abort, result_queue, timeout)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
# }}}
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
|
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
|
||||||
|
@ -102,6 +102,16 @@ def test_identify_plugin(name, tests):
|
|||||||
prints('Log saved to', lf)
|
prints('Log saved to', lf)
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
for key in plugin.touched_fields:
|
||||||
|
if key.startswith('identifier:'):
|
||||||
|
key = key.partition(':')[-1]
|
||||||
|
if not match_found.has_identifier(key):
|
||||||
|
prints('Failed to find identifier:', key)
|
||||||
|
raise SystemExit(1)
|
||||||
|
elif match_found.is_null(key):
|
||||||
|
prints('Failed to find', key)
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
prints('Average time per query', sum(times)/len(times))
|
prints('Average time per query', sum(times)/len(times))
|
||||||
|
|
||||||
if os.stat(lf).st_size > 10:
|
if os.stat(lf).st_size > 10:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user