mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Finish the Douban.com books metadata source plugin
This commit is contained in:
parent
7bd9cd20fe
commit
4bdbab22ca
@ -40,8 +40,8 @@ publisher = XPath("descendant::db:attribute[@name='publisher']")
|
|||||||
isbn = XPath("descendant::db:attribute[@name='isbn13']")
|
isbn = XPath("descendant::db:attribute[@name='isbn13']")
|
||||||
date = XPath("descendant::db:attribute[@name='pubdate']")
|
date = XPath("descendant::db:attribute[@name='pubdate']")
|
||||||
creator = XPath("descendant::db:attribute[@name='author']")
|
creator = XPath("descendant::db:attribute[@name='author']")
|
||||||
tag = XPath("descendant::db:tag")
|
booktag = XPath("descendant::db:tag/attribute::name")
|
||||||
rating = XPath("descendant::gd:rating[@name='average']")
|
rating = XPath("descendant::gd:rating/attribute::average")
|
||||||
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
|
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
|
||||||
|
|
||||||
def get_details(browser, url, timeout): # {{{
|
def get_details(browser, url, timeout): # {{{
|
||||||
@ -51,7 +51,7 @@ def get_details(browser, url, timeout): # {{{
|
|||||||
gc = getattr(e, 'getcode', lambda : -1)
|
gc = getattr(e, 'getcode', lambda : -1)
|
||||||
if gc() != 403:
|
if gc() != 403:
|
||||||
raise
|
raise
|
||||||
# Google is throttling us, wait a little
|
# Douban is throttling us, wait a little
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
raw = browser.open_novisit(url, timeout=timeout).read()
|
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||||
|
|
||||||
@ -59,7 +59,6 @@ def get_details(browser, url, timeout): # {{{
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def to_metadata(browser, log, entry_, timeout): # {{{
|
def to_metadata(browser, log, entry_, timeout): # {{{
|
||||||
|
|
||||||
def get_text(extra, x):
|
def get_text(extra, x):
|
||||||
try:
|
try:
|
||||||
ans = x(extra)
|
ans = x(extra)
|
||||||
@ -71,7 +70,6 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
|||||||
log.exception('Programming error:')
|
log.exception('Programming error:')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
id_url = entry_id(entry_)[0].text
|
id_url = entry_id(entry_)[0].text
|
||||||
douban_id = id_url.split('/')[-1]
|
douban_id = id_url.split('/')[-1]
|
||||||
title_ = ': '.join([x.text for x in title(entry_)]).strip()
|
title_ = ': '.join([x.text for x in title(entry_)]).strip()
|
||||||
@ -92,9 +90,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
|||||||
except:
|
except:
|
||||||
log.exception('Failed to get additional details for', mi.title)
|
log.exception('Failed to get additional details for', mi.title)
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
mi.comments = get_text(extra, description)
|
mi.comments = get_text(extra, description)
|
||||||
#mi.language = get_text(extra, language)
|
|
||||||
mi.publisher = get_text(extra, publisher)
|
mi.publisher = get_text(extra, publisher)
|
||||||
|
|
||||||
# ISBN
|
# ISBN
|
||||||
@ -108,7 +104,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
|||||||
|
|
||||||
# Tags
|
# Tags
|
||||||
try:
|
try:
|
||||||
btags = [x.text for x in subject(extra) if x.text]
|
btags = [x for x in booktag(extra) if x]
|
||||||
tags = []
|
tags = []
|
||||||
for t in btags:
|
for t in btags:
|
||||||
atags = [y.strip() for y in t.split('/')]
|
atags = [y.strip() for y in t.split('/')]
|
||||||
@ -120,7 +116,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
|||||||
tags = []
|
tags = []
|
||||||
if tags:
|
if tags:
|
||||||
mi.tags = [x.replace(',', ';') for x in tags]
|
mi.tags = [x.replace(',', ';') for x in tags]
|
||||||
|
|
||||||
# pubdate
|
# pubdate
|
||||||
pubdate = get_text(extra, date)
|
pubdate = get_text(extra, date)
|
||||||
if pubdate:
|
if pubdate:
|
||||||
@ -133,7 +129,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
|||||||
# Ratings
|
# Ratings
|
||||||
if rating(extra):
|
if rating(extra):
|
||||||
try:
|
try:
|
||||||
mi.rating = float(rating(extra).text) / 2.0
|
mi.rating = float(rating(extra)[0]) / 2.0
|
||||||
except:
|
except:
|
||||||
log.exception('Failed to parse rating')
|
log.exception('Failed to parse rating')
|
||||||
mi.rating = 0
|
mi.rating = 0
|
||||||
@ -141,10 +137,8 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
|||||||
# Cover
|
# Cover
|
||||||
mi.has_douban_cover = None
|
mi.has_douban_cover = None
|
||||||
u = cover_url(extra)
|
u = cover_url(extra)
|
||||||
print(u)
|
|
||||||
if u:
|
if u:
|
||||||
u = u[0].replace('/spic/', '/lpic/');
|
u = u[0].replace('/spic/', '/lpic/');
|
||||||
print(u)
|
|
||||||
# If URL contains "book-default", the book doesn't have a cover
|
# If URL contains "book-default", the book doesn't have a cover
|
||||||
if u.find('book-default') == -1:
|
if u.find('book-default') == -1:
|
||||||
mi.has_douban_cover = u
|
mi.has_douban_cover = u
|
||||||
@ -155,26 +149,24 @@ class Douban(Source):
|
|||||||
|
|
||||||
name = 'Douban Books'
|
name = 'Douban Books'
|
||||||
author = _('Li Fanxi')
|
author = _('Li Fanxi')
|
||||||
|
version = (2, 0, 0)
|
||||||
|
|
||||||
description = _('Downloads metadata from Douban.com')
|
description = _('Downloads metadata from Douban.com')
|
||||||
|
|
||||||
capabilities = frozenset(['identify', 'cover'])
|
capabilities = frozenset(['identify', 'cover'])
|
||||||
touched_fields = frozenset(['title', 'authors', 'tags',
|
touched_fields = frozenset(['title', 'authors', 'tags',
|
||||||
'comments', 'publisher', 'identifier:isbn', 'rating',
|
'pubdate', 'comments', 'publisher', 'identifier:isbn', 'rating',
|
||||||
'identifier:douban']) # language currently disabled
|
'identifier:douban']) # language currently disabled
|
||||||
supports_gzip_transfer_encoding = True
|
supports_gzip_transfer_encoding = True
|
||||||
cached_cover_url_is_reliable = True
|
cached_cover_url_is_reliable = True
|
||||||
|
|
||||||
DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
|
DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
|
||||||
DOUBAN_ID_URL = 'http://api.douban.com/book/subject/%s'
|
DOUBAN_BOOK_URL = 'http://book.douban.com/subject/%s/'
|
||||||
# GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
|
|
||||||
|
|
||||||
# DUMMY_IMAGE_MD5 = frozenset(['0de4383ebad0adad5eeb8975cd796657'])
|
|
||||||
|
|
||||||
def get_book_url(self, identifiers): # {{{
|
def get_book_url(self, identifiers): # {{{
|
||||||
db = identifiers.get('douban', None)
|
db = identifiers.get('douban', None)
|
||||||
if db is not None:
|
if db is not None:
|
||||||
return DOUBAN_ID_URL % db
|
return ('douban', db, self.DOUBAN_BOOK_URL%db)
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
# }}}
|
# }}}
|
||||||
@ -182,13 +174,18 @@ class Douban(Source):
|
|||||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||||
SEARCH_URL = 'http://api.douban.com/book/subjects?'
|
SEARCH_URL = 'http://api.douban.com/book/subjects?'
|
||||||
ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
|
ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
|
||||||
|
SUBJECT_URL = 'http://api.douban.com/book/subject/'
|
||||||
|
|
||||||
q = ''
|
q = ''
|
||||||
t = None
|
t = None
|
||||||
isbn = check_isbn(identifiers.get('isbn', None))
|
isbn = check_isbn(identifiers.get('isbn', None))
|
||||||
|
subject = identifiers.get('douban', None)
|
||||||
if isbn is not None:
|
if isbn is not None:
|
||||||
q = isbn
|
q = isbn
|
||||||
t = 'isbn'
|
t = 'isbn'
|
||||||
|
elif subject is not None:
|
||||||
|
q = subject
|
||||||
|
t = 'subject'
|
||||||
elif title or authors:
|
elif title or authors:
|
||||||
def build_term(prefix, parts):
|
def build_term(prefix, parts):
|
||||||
return ' '.join(x for x in parts)
|
return ' '.join(x for x in parts)
|
||||||
@ -209,6 +206,8 @@ class Douban(Source):
|
|||||||
url = None
|
url = None
|
||||||
if t == "isbn":
|
if t == "isbn":
|
||||||
url = ISBN_URL + q
|
url = ISBN_URL + q
|
||||||
|
elif t == 'subject':
|
||||||
|
url = SUBJECT_URL + q
|
||||||
else:
|
else:
|
||||||
url = SEARCH_URL + urlencode({
|
url = SEARCH_URL + urlencode({
|
||||||
'q': q,
|
'q': q,
|
||||||
@ -314,14 +313,12 @@ class Douban(Source):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception('Failed to parse identify results')
|
log.exception('Failed to parse identify results')
|
||||||
return as_unicode(e)
|
return as_unicode(e)
|
||||||
if not title:
|
|
||||||
title = ""
|
|
||||||
if not entries and identifiers and title and authors and \
|
if not entries and identifiers and title and authors and \
|
||||||
not abort.is_set():
|
not abort.is_set():
|
||||||
return self.identify(log, result_queue, abort, title=title,
|
return self.identify(log, result_queue, abort, title=title,
|
||||||
authors=authors, timeout=timeout)
|
authors=authors, timeout=timeout)
|
||||||
|
|
||||||
# There is no point running these queries in threads as google
|
# There is no point running these queries in threads as douban
|
||||||
# throttles requests returning 403 Forbidden errors
|
# throttles requests returning 403 Forbidden errors
|
||||||
self.get_all_details(br, log, entries, abort, result_queue, timeout)
|
self.get_all_details(br, log, entries, abort, result_queue, timeout)
|
||||||
|
|
||||||
@ -329,23 +326,23 @@ class Douban(Source):
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
if __name__ == '__main__': # tests {{{
|
if __name__ == '__main__': # tests {{{
|
||||||
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
|
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/douban.py
|
||||||
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
|
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
|
||||||
title_test, authors_test)
|
title_test, authors_test)
|
||||||
test_identify_plugin(GoogleBooks.name,
|
test_identify_plugin(Douban.name,
|
||||||
[
|
[
|
||||||
|
|
||||||
|
|
||||||
(
|
(
|
||||||
{'identifiers':{'isbn': '0743273567'}, 'title':'Great Gatsby',
|
{'identifiers':{'isbn': '9787536692930'}, 'title':'三体',
|
||||||
'authors':['Fitzgerald']},
|
'authors':['刘慈欣']},
|
||||||
[title_test('The great gatsby', exact=True),
|
[title_test('三体', exact=True),
|
||||||
authors_test(['Francis Scott Fitzgerald'])]
|
authors_test(['刘慈欣'])]
|
||||||
),
|
),
|
||||||
|
|
||||||
(
|
(
|
||||||
{'title': 'Flatland', 'authors':['Abbott']},
|
{'title': 'Linux内核修炼之道', 'authors':['任桥伟']},
|
||||||
[title_test('Flatland', exact=False)]
|
[title_test('Linux内核修炼之道', exact=False)]
|
||||||
),
|
),
|
||||||
])
|
])
|
||||||
# }}}
|
# }}}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user