mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
First working version of Douban book plugin.
This commit is contained in:
parent
fabef627e3
commit
ea4b5b9054
@ -25,14 +25,8 @@ from calibre import as_unicode
|
|||||||
NAMESPACES = {
|
NAMESPACES = {
|
||||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||||
'atom' : 'http://www.w3.org/2005/Atom',
|
'atom' : 'http://www.w3.org/2005/Atom',
|
||||||
'dc' : 'http://purl.org/dc/terms',
|
'db': 'http://www.douban.com/xmlns/',
|
||||||
'gd' : 'http://schemas.google.com/g/2005'
|
'gd': 'http://schemas.google.com/g/2005'
|
||||||
}
|
|
||||||
|
|
||||||
NAMESPACES = {
|
|
||||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
|
||||||
'atom' : 'http://www.w3.org/2005/Atom',
|
|
||||||
'db': 'http://www.douban.com/xmlns/'
|
|
||||||
}
|
}
|
||||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||||
total_results = XPath('//openSearch:totalResults')
|
total_results = XPath('//openSearch:totalResults')
|
||||||
@ -47,6 +41,8 @@ isbn = XPath("descendant::db:attribute[@name='isbn13']")
|
|||||||
date = XPath("descendant::db:attribute[@name='pubdate']")
|
date = XPath("descendant::db:attribute[@name='pubdate']")
|
||||||
creator = XPath("descendant::db:attribute[@name='author']")
|
creator = XPath("descendant::db:attribute[@name='author']")
|
||||||
tag = XPath("descendant::db:tag")
|
tag = XPath("descendant::db:tag")
|
||||||
|
rating = XPath("descendant::gd:rating[@name='average']")
|
||||||
|
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
|
||||||
|
|
||||||
def get_details(browser, url, timeout): # {{{
|
def get_details(browser, url, timeout): # {{{
|
||||||
try:
|
try:
|
||||||
@ -77,7 +73,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
|||||||
|
|
||||||
|
|
||||||
id_url = entry_id(entry_)[0].text
|
id_url = entry_id(entry_)[0].text
|
||||||
google_id = id_url.split('/')[-1]
|
douban_id = id_url.split('/')[-1]
|
||||||
title_ = ': '.join([x.text for x in title(entry_)]).strip()
|
title_ = ': '.join([x.text for x in title(entry_)]).strip()
|
||||||
authors = [x.text.strip() for x in creator(entry_) if x.text]
|
authors = [x.text.strip() for x in creator(entry_) if x.text]
|
||||||
if not authors:
|
if not authors:
|
||||||
@ -87,7 +83,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
mi = Metadata(title_, authors)
|
mi = Metadata(title_, authors)
|
||||||
mi.identifiers = {'google':google_id}
|
mi.identifiers = {'douban':douban_id}
|
||||||
try:
|
try:
|
||||||
raw = get_details(browser, id_url, timeout)
|
raw = get_details(browser, id_url, timeout)
|
||||||
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
|
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
|
||||||
@ -103,13 +99,9 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
|||||||
|
|
||||||
# ISBN
|
# ISBN
|
||||||
isbns = []
|
isbns = []
|
||||||
for x in identifier(extra):
|
for x in [t.text for t in isbn(extra)]:
|
||||||
t = str(x.text).strip()
|
if check_isbn(x):
|
||||||
if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
|
isbns.append(x)
|
||||||
if t[:5].upper() == 'ISBN:':
|
|
||||||
t = check_isbn(t[5:])
|
|
||||||
if t:
|
|
||||||
isbns.append(t)
|
|
||||||
if isbns:
|
if isbns:
|
||||||
mi.isbn = sorted(isbns, key=len)[-1]
|
mi.isbn = sorted(isbns, key=len)[-1]
|
||||||
mi.all_isbns = isbns
|
mi.all_isbns = isbns
|
||||||
@ -139,21 +131,23 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
|||||||
log.error('Failed to parse pubdate %r'%pubdate)
|
log.error('Failed to parse pubdate %r'%pubdate)
|
||||||
|
|
||||||
# Ratings
|
# Ratings
|
||||||
for x in rating(extra):
|
if rating(extra):
|
||||||
try:
|
try:
|
||||||
mi.rating = float(x.get('average'))
|
mi.rating = float(rating(extra).text) / 2.0
|
||||||
if mi.rating > 5:
|
|
||||||
mi.rating /= 2
|
|
||||||
except:
|
except:
|
||||||
log.exception('Failed to parse rating')
|
log.exception('Failed to parse rating')
|
||||||
|
mi.rating = 0
|
||||||
|
|
||||||
# Cover
|
# Cover
|
||||||
mi.has_google_cover = None
|
mi.has_douban_cover = None
|
||||||
for x in extra.xpath(
|
u = cover_url(extra)
|
||||||
'//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'):
|
print(u)
|
||||||
mi.has_google_cover = x.get('href')
|
if u:
|
||||||
break
|
u = u[0].replace('/spic/', '/lpic/');
|
||||||
|
print(u)
|
||||||
|
# If URL contains "book-default", the book doesn't have a cover
|
||||||
|
if u.find('book-default') == -1:
|
||||||
|
mi.has_douban_cover = u
|
||||||
return mi
|
return mi
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
@ -172,6 +166,7 @@ class Douban(Source):
|
|||||||
cached_cover_url_is_reliable = True
|
cached_cover_url_is_reliable = True
|
||||||
|
|
||||||
DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
|
DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
|
||||||
|
DOUBAN_ID_URL = 'http://api.douban.com/book/subject/%s'
|
||||||
# GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
|
# GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
|
||||||
|
|
||||||
# DUMMY_IMAGE_MD5 = frozenset(['0de4383ebad0adad5eeb8975cd796657'])
|
# DUMMY_IMAGE_MD5 = frozenset(['0de4383ebad0adad5eeb8975cd796657'])
|
||||||
@ -179,7 +174,7 @@ class Douban(Source):
|
|||||||
def get_book_url(self, identifiers): # {{{
|
def get_book_url(self, identifiers): # {{{
|
||||||
db = identifiers.get('douban', None)
|
db = identifiers.get('douban', None)
|
||||||
if db is not None:
|
if db is not None:
|
||||||
return db
|
return DOUBAN_ID_URL % db
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
# }}}
|
# }}}
|
||||||
@ -206,11 +201,11 @@ class Douban(Source):
|
|||||||
q += ((' ' if q != '' else '') +
|
q += ((' ' if q != '' else '') +
|
||||||
build_term('author', author_tokens))
|
build_term('author', author_tokens))
|
||||||
t = 'search'
|
t = 'search'
|
||||||
|
q = q.strip()
|
||||||
if isinstance(q, unicode):
|
if isinstance(q, unicode):
|
||||||
q = q.encode('utf-8')
|
q = q.encode('utf-8')
|
||||||
if not q:
|
if not q:
|
||||||
return None
|
return None
|
||||||
print(q)
|
|
||||||
url = None
|
url = None
|
||||||
if t == "isbn":
|
if t == "isbn":
|
||||||
url = ISBN_URL + q
|
url = ISBN_URL + q
|
||||||
@ -220,7 +215,6 @@ class Douban(Source):
|
|||||||
})
|
})
|
||||||
if self.DOUBAN_API_KEY and self.DOUBAN_API_KEY != '':
|
if self.DOUBAN_API_KEY and self.DOUBAN_API_KEY != '':
|
||||||
url = url + "?apikey=" + self.DOUBAN_API_KEY
|
url = url + "?apikey=" + self.DOUBAN_API_KEY
|
||||||
print(url)
|
|
||||||
return url
|
return url
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
@ -257,10 +251,7 @@ class Douban(Source):
|
|||||||
try:
|
try:
|
||||||
cdata = br.open_novisit(cached_url, timeout=timeout).read()
|
cdata = br.open_novisit(cached_url, timeout=timeout).read()
|
||||||
if cdata:
|
if cdata:
|
||||||
if hashlib.md5(cdata).hexdigest() in self.DUMMY_IMAGE_MD5:
|
result_queue.put((self, cdata))
|
||||||
log.warning('Google returned a dummy image, ignoring')
|
|
||||||
else:
|
|
||||||
result_queue.put((self, cdata))
|
|
||||||
except:
|
except:
|
||||||
log.exception('Failed to download cover from:', cached_url)
|
log.exception('Failed to download cover from:', cached_url)
|
||||||
|
|
||||||
@ -268,13 +259,13 @@ class Douban(Source):
|
|||||||
|
|
||||||
def get_cached_cover_url(self, identifiers): # {{{
|
def get_cached_cover_url(self, identifiers): # {{{
|
||||||
url = None
|
url = None
|
||||||
goog = identifiers.get('google', None)
|
db = identifiers.get('douban', None)
|
||||||
if goog is None:
|
if db is None:
|
||||||
isbn = identifiers.get('isbn', None)
|
isbn = identifiers.get('isbn', None)
|
||||||
if isbn is not None:
|
if isbn is not None:
|
||||||
goog = self.cached_isbn_to_identifier(isbn)
|
db = self.cached_isbn_to_identifier(isbn)
|
||||||
if goog is not None:
|
if db is not None:
|
||||||
url = self.cached_identifier_to_cover_url(goog)
|
url = self.cached_identifier_to_cover_url(db)
|
||||||
|
|
||||||
return url
|
return url
|
||||||
# }}}
|
# }}}
|
||||||
@ -286,12 +277,12 @@ class Douban(Source):
|
|||||||
ans = to_metadata(br, log, i, timeout)
|
ans = to_metadata(br, log, i, timeout)
|
||||||
if isinstance(ans, Metadata):
|
if isinstance(ans, Metadata):
|
||||||
ans.source_relevance = relevance
|
ans.source_relevance = relevance
|
||||||
goog = ans.identifiers['google']
|
db = ans.identifiers['douban']
|
||||||
for isbn in getattr(ans, 'all_isbns', []):
|
for isbn in getattr(ans, 'all_isbns', []):
|
||||||
self.cache_isbn_to_identifier(isbn, goog)
|
self.cache_isbn_to_identifier(isbn, db)
|
||||||
if ans.has_google_cover:
|
if ans.has_douban_cover:
|
||||||
self.cache_identifier_to_cover_url(goog,
|
self.cache_identifier_to_cover_url(db,
|
||||||
self.GOOGLE_COVER%goog)
|
ans.has_douban_cover)
|
||||||
self.clean_downloaded_metadata(ans)
|
self.clean_downloaded_metadata(ans)
|
||||||
result_queue.put(ans)
|
result_queue.put(ans)
|
||||||
except:
|
except:
|
||||||
@ -315,7 +306,6 @@ class Douban(Source):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception('Failed to make identify query: %r'%query)
|
log.exception('Failed to make identify query: %r'%query)
|
||||||
return as_unicode(e)
|
return as_unicode(e)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parser = etree.XMLParser(recover=True, no_network=True)
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
|
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
|
||||||
@ -324,7 +314,8 @@ class Douban(Source):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception('Failed to parse identify results')
|
log.exception('Failed to parse identify results')
|
||||||
return as_unicode(e)
|
return as_unicode(e)
|
||||||
|
if not title:
|
||||||
|
title = ""
|
||||||
if not entries and identifiers and title and authors and \
|
if not entries and identifiers and title and authors and \
|
||||||
not abort.is_set():
|
not abort.is_set():
|
||||||
return self.identify(log, result_queue, abort, title=title,
|
return self.identify(log, result_queue, abort, title=title,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user