From fabef627e3dd85d06989551614db5277e72021c7 Mon Sep 17 00:00:00 2001
From: Byron Li <byron_li@nj-byron-li-2>
Date: Mon, 25 Apr 2011 21:11:24 +0800
Subject: [PATCH 1/3] Add a douban.com plugin stub. Not working yet.

---
 src/calibre/customize/builtins.py             |   5 +-
 src/calibre/ebooks/metadata/sources/douban.py | 361 ++++++++++++++++++
 2 files changed, 364 insertions(+), 2 deletions(-)
 create mode 100644 src/calibre/ebooks/metadata/sources/douban.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index c27fa2a57b..3c769f8dc7 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -628,8 +628,9 @@ if test_eight_code:
     from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
     from calibre.ebooks.metadata.sources.isbndb import ISBNDB
     from calibre.ebooks.metadata.sources.overdrive import OverDrive
-
-    plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive]
+    from calibre.ebooks.metadata.sources.douban import Douban
+    
+    plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
 
 # }}}
 else:
diff --git a/src/calibre/ebooks/metadata/sources/douban.py b/src/calibre/ebooks/metadata/sources/douban.py
new file mode 100644
index 0000000000..b50bb6ff85
--- /dev/null
+++ b/src/calibre/ebooks/metadata/sources/douban.py
@@ -0,0 +1,361 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>; 2011, Li Fanxi <lifanxi@freemindworld.com>'
+__docformat__ = 'restructuredtext en'
+
+import time, hashlib
+from urllib import urlencode
+from functools import partial
+from Queue import Queue, Empty
+
+from lxml import etree
+
+from calibre.ebooks.metadata import check_isbn
+from calibre.ebooks.metadata.sources.base import Source
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.utils.date import parse_date, utcnow
+from calibre.utils.cleantext import clean_ascii_chars
+from calibre import as_unicode
+
+NAMESPACES = {
+              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
+              'atom' : 'http://www.w3.org/2005/Atom',
+              'dc'   : 'http://purl.org/dc/terms',
+              'gd'   : 'http://schemas.google.com/g/2005'
+            }
+
+NAMESPACES = {
+              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
+              'atom' : 'http://www.w3.org/2005/Atom',
+              'db': 'http://www.douban.com/xmlns/'
+            }
+XPath = partial(etree.XPath, namespaces=NAMESPACES)
+total_results  = XPath('//openSearch:totalResults')
+start_index    = XPath('//openSearch:startIndex')
+items_per_page = XPath('//openSearch:itemsPerPage')
+entry          = XPath('//atom:entry')
+entry_id       = XPath('descendant::atom:id')
+title          = XPath('descendant::atom:title')
+description    = XPath('descendant::atom:summary')
+publisher      = XPath("descendant::db:attribute[@name='publisher']")
+isbn           = XPath("descendant::db:attribute[@name='isbn13']")
+date           = XPath("descendant::db:attribute[@name='pubdate']")
+creator        = XPath("descendant::db:attribute[@name='author']")
+tag            = XPath("descendant::db:tag")
+
+def get_details(browser, url, timeout): # {{{
+    try:
+        raw = browser.open_novisit(url, timeout=timeout).read()
+    except Exception as e:
+        gc = getattr(e, 'getcode', lambda : -1)
+        if gc() != 403:
+            raise
+        # Google is throttling us, wait a little
+        time.sleep(2)
+        raw = browser.open_novisit(url, timeout=timeout).read()
+
+    return raw
+# }}}
+
+def to_metadata(browser, log, entry_, timeout): # {{{
+
+    def get_text(extra, x):
+        try:
+            ans = x(extra)
+            if ans:
+                ans = ans[0].text
+                if ans and ans.strip():
+                    return ans.strip()
+        except:
+            log.exception('Programming error:')
+        return None
+
+
+    id_url = entry_id(entry_)[0].text
+    google_id = id_url.split('/')[-1]
+    title_ = ': '.join([x.text for x in title(entry_)]).strip()
+    authors = [x.text.strip() for x in creator(entry_) if x.text]
+    if not authors:
+        authors = [_('Unknown')]
+    if not id_url or not title:
+        # Silently discard this entry
+        return None
+
+    mi = Metadata(title_, authors)
+    mi.identifiers = {'google':google_id}
+    try:
+        raw = get_details(browser, id_url, timeout)
+        feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
+            strip_encoding_pats=True)[0])
+        extra = entry(feed)[0]
+    except:
+        log.exception('Failed to get additional details for', mi.title)
+        return mi
+
+    mi.comments = get_text(extra, description)
+    #mi.language = get_text(extra, language)
+    mi.publisher = get_text(extra, publisher)
+
+    # ISBN
+    isbns = []
+    for x in identifier(extra):
+        t = str(x.text).strip()
+        if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
+            if t[:5].upper() == 'ISBN:':
+                t = check_isbn(t[5:])
+                if t:
+                    isbns.append(t)
+    if isbns:
+        mi.isbn = sorted(isbns, key=len)[-1]
+    mi.all_isbns = isbns
+
+    # Tags
+    try:
+        btags = [x.text for x in subject(extra) if x.text]
+        tags = []
+        for t in btags:
+            atags = [y.strip() for y in t.split('/')]
+            for tag in atags:
+                if tag not in tags:
+                    tags.append(tag)
+    except:
+        log.exception('Failed to parse tags:')
+        tags = []
+    if tags:
+        mi.tags = [x.replace(',', ';') for x in tags]
+
+    # pubdate
+    pubdate = get_text(extra, date)
+    if pubdate:
+        try:
+            default = utcnow().replace(day=15)
+            mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
+        except:
+            log.error('Failed to parse pubdate %r'%pubdate)
+
+    # Ratings
+    for x in rating(extra):
+        try:
+            mi.rating = float(x.get('average'))
+            if mi.rating > 5:
+                mi.rating /= 2
+        except:
+            log.exception('Failed to parse rating')
+
+    # Cover
+    mi.has_google_cover = None
+    for x in extra.xpath(
+            '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'):
+        mi.has_google_cover = x.get('href')
+        break
+
+    return mi
+# }}}
+
+class Douban(Source):
+
+    name = 'Douban Books'
+    author = _('Li Fanxi')
+    
+    description = _('Downloads metadata from Douban.com')
+
+    capabilities = frozenset(['identify', 'cover'])
+    touched_fields = frozenset(['title', 'authors', 'tags', 
+        'comments', 'publisher', 'identifier:isbn', 'rating',
+        'identifier:douban']) # language currently disabled
+    supports_gzip_transfer_encoding = True
+    cached_cover_url_is_reliable = True
+
+    DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
+#    GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
+
+#    DUMMY_IMAGE_MD5 = frozenset(['0de4383ebad0adad5eeb8975cd796657'])
+
+    def get_book_url(self, identifiers): # {{{
+        db = identifiers.get('douban', None)
+        if db is not None:
+            return db
+        else:
+            return None
+    # }}}
+
+    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
+        SEARCH_URL = 'http://api.douban.com/book/subjects?'
+        ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
+
+        q = ''
+        t = None
+        isbn = check_isbn(identifiers.get('isbn', None))
+        if isbn is not None:
+            q = isbn
+            t = 'isbn'
+        elif title or authors:
+            def build_term(prefix, parts):
+                return ' '.join(x for x in parts)
+            title_tokens = list(self.get_title_tokens(title))
+            if title_tokens:
+                q += build_term('title', title_tokens)
+            author_tokens = self.get_author_tokens(authors,
+                    only_first_author=True)
+            if author_tokens:
+                q += ((' ' if q != '' else '') + 
+                    build_term('author', author_tokens))
+            t = 'search'
+        if isinstance(q, unicode):
+            q = q.encode('utf-8')
+        if not q:
+            return None
+        print(q)
+        url = None
+        if t == "isbn":
+            url = ISBN_URL + q
+        else:
+            url = SEARCH_URL + urlencode({
+                    'q': q,
+                    })
+        if self.DOUBAN_API_KEY and self.DOUBAN_API_KEY != '':
+            url = url + "?apikey=" + self.DOUBAN_API_KEY
+        print(url)
+        return url
+    # }}}
+
+    def download_cover(self, log, result_queue, abort, # {{{
+            title=None, authors=None, identifiers={}, timeout=30):
+        cached_url = self.get_cached_cover_url(identifiers)
+        if cached_url is None:
+            log.info('No cached cover found, running identify')
+            rq = Queue()
+            self.identify(log, rq, abort, title=title, authors=authors,
+                    identifiers=identifiers)
+            if abort.is_set():
+                return
+            results = []
+            while True:
+                try:
+                    results.append(rq.get_nowait())
+                except Empty:
+                    break
+            results.sort(key=self.identify_results_keygen(
+                title=title, authors=authors, identifiers=identifiers))
+            for mi in results:
+                cached_url = self.get_cached_cover_url(mi.identifiers)
+                if cached_url is not None:
+                    break
+        if cached_url is None:
+            log.info('No cover found')
+            return
+
+        if abort.is_set():
+            return
+        br = self.browser
+        log('Downloading cover from:', cached_url)
+        try:
+            cdata = br.open_novisit(cached_url, timeout=timeout).read()
+            if cdata:
+                if hashlib.md5(cdata).hexdigest() in self.DUMMY_IMAGE_MD5:
+                    log.warning('Google returned a dummy image, ignoring')
+                else:
+                    result_queue.put((self, cdata))
+        except:
+            log.exception('Failed to download cover from:', cached_url)
+
+    # }}}
+
+    def get_cached_cover_url(self, identifiers): # {{{
+        url = None
+        goog = identifiers.get('google', None)
+        if goog is None:
+            isbn = identifiers.get('isbn', None)
+            if isbn is not None:
+                goog = self.cached_isbn_to_identifier(isbn)
+        if goog is not None:
+            url = self.cached_identifier_to_cover_url(goog)
+
+        return url
+    # }}}
+
+    def get_all_details(self, br, log, entries, abort, # {{{
+            result_queue, timeout):
+        for relevance, i in enumerate(entries):
+            try:
+                ans = to_metadata(br, log, i, timeout)
+                if isinstance(ans, Metadata):
+                    ans.source_relevance = relevance
+                    goog = ans.identifiers['google']
+                    for isbn in getattr(ans, 'all_isbns', []):
+                        self.cache_isbn_to_identifier(isbn, goog)
+                    if ans.has_google_cover:
+                        self.cache_identifier_to_cover_url(goog,
+                                self.GOOGLE_COVER%goog)
+                    self.clean_downloaded_metadata(ans)
+                    result_queue.put(ans)
+            except:
+                log.exception(
+                    'Failed to get metadata for identify entry:',
+                    etree.tostring(i))
+            if abort.is_set():
+                break
+    # }}}
+
+    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
+            identifiers={}, timeout=30):
+        query = self.create_query(log, title=title, authors=authors,
+                identifiers=identifiers)
+        if not query:
+            log.error('Insufficient metadata to construct query')
+            return
+        br = self.browser
+        try:
+            raw = br.open_novisit(query, timeout=timeout).read()
+        except Exception as e:
+            log.exception('Failed to make identify query: %r'%query)
+            return as_unicode(e)
+
+        try:
+            parser = etree.XMLParser(recover=True, no_network=True)
+            feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
+                strip_encoding_pats=True)[0], parser=parser)
+            entries = entry(feed)
+        except Exception as e:
+            log.exception('Failed to parse identify results')
+            return as_unicode(e)
+
+        if not entries and identifiers and title and authors and \
+                not abort.is_set():
+            return self.identify(log, result_queue, abort, title=title,
+                    authors=authors, timeout=timeout)
+
+        # There is no point running these queries in threads as google
+        # throttles requests returning 403 Forbidden errors
+        self.get_all_details(br, log, entries, abort, result_queue, timeout)
+
+        return None
+    # }}}
+
+if __name__ == '__main__': # tests {{{
+    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
+    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
+            title_test, authors_test)
+    test_identify_plugin(GoogleBooks.name,
+        [
+
+
+            (
+                {'identifiers':{'isbn': '0743273567'}, 'title':'Great Gatsby',
+                    'authors':['Fitzgerald']},
+                [title_test('The great gatsby', exact=True),
+                    authors_test(['Francis Scott Fitzgerald'])]
+            ),
+
+            (
+                {'title': 'Flatland', 'authors':['Abbott']},
+                [title_test('Flatland', exact=False)]
+            ),
+    ])
+# }}}
+

From ea4b5b9054765bb737179d904c9168846def2e45 Mon Sep 17 00:00:00 2001
From: Byron Li <byron_li@nj-byron-li-2>
Date: Fri, 29 Apr 2011 16:29:57 +0800
Subject: [PATCH 2/3] First working version of Douban book plugin.

---
 src/calibre/ebooks/metadata/sources/douban.py | 83 +++++++++----------
 1 file changed, 37 insertions(+), 46 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/douban.py b/src/calibre/ebooks/metadata/sources/douban.py
index b50bb6ff85..8f1794b33f 100644
--- a/src/calibre/ebooks/metadata/sources/douban.py
+++ b/src/calibre/ebooks/metadata/sources/douban.py
@@ -25,14 +25,8 @@ from calibre import as_unicode
 NAMESPACES = {
               'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
               'atom' : 'http://www.w3.org/2005/Atom',
-              'dc'   : 'http://purl.org/dc/terms',
-              'gd'   : 'http://schemas.google.com/g/2005'
-            }
-
-NAMESPACES = {
-              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
-              'atom' : 'http://www.w3.org/2005/Atom',
-              'db': 'http://www.douban.com/xmlns/'
+              'db': 'http://www.douban.com/xmlns/',
+              'gd': 'http://schemas.google.com/g/2005'
             }
 XPath = partial(etree.XPath, namespaces=NAMESPACES)
 total_results  = XPath('//openSearch:totalResults')
@@ -47,6 +41,8 @@ isbn           = XPath("descendant::db:attribute[@name='isbn13']")
 date           = XPath("descendant::db:attribute[@name='pubdate']")
 creator        = XPath("descendant::db:attribute[@name='author']")
 tag            = XPath("descendant::db:tag")
+rating         = XPath("descendant::gd:rating[@name='average']")
+cover_url      = XPath("descendant::atom:link[@rel='image']/attribute::href")
 
 def get_details(browser, url, timeout): # {{{
     try:
@@ -77,7 +73,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
 
 
     id_url = entry_id(entry_)[0].text
-    google_id = id_url.split('/')[-1]
+    douban_id = id_url.split('/')[-1]
     title_ = ': '.join([x.text for x in title(entry_)]).strip()
     authors = [x.text.strip() for x in creator(entry_) if x.text]
     if not authors:
@@ -87,7 +83,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
         return None
 
     mi = Metadata(title_, authors)
-    mi.identifiers = {'google':google_id}
+    mi.identifiers = {'douban':douban_id}
     try:
         raw = get_details(browser, id_url, timeout)
         feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
@@ -103,13 +99,9 @@ def to_metadata(browser, log, entry_, timeout): # {{{
 
     # ISBN
     isbns = []
-    for x in identifier(extra):
-        t = str(x.text).strip()
-        if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
-            if t[:5].upper() == 'ISBN:':
-                t = check_isbn(t[5:])
-                if t:
-                    isbns.append(t)
+    for x in [t.text for t in isbn(extra)]:
+        if check_isbn(x):
+            isbns.append(x)
     if isbns:
         mi.isbn = sorted(isbns, key=len)[-1]
     mi.all_isbns = isbns
@@ -139,21 +131,23 @@ def to_metadata(browser, log, entry_, timeout): # {{{
             log.error('Failed to parse pubdate %r'%pubdate)
 
     # Ratings
-    for x in rating(extra):
+    if rating(extra):
         try:
-            mi.rating = float(x.get('average'))
-            if mi.rating > 5:
-                mi.rating /= 2
+            mi.rating = float(rating(extra).text) / 2.0
         except:
             log.exception('Failed to parse rating')
+            mi.rating = 0
 
     # Cover
-    mi.has_google_cover = None
-    for x in extra.xpath(
-            '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'):
-        mi.has_google_cover = x.get('href')
-        break
-
+    mi.has_douban_cover = None
+    u = cover_url(extra)
+    print(u)
+    if u:
+        u = u[0].replace('/spic/', '/lpic/');
+        print(u)
+        # If URL contains "book-default", the book doesn't have a cover
+        if u.find('book-default') == -1:
+            mi.has_douban_cover = u
     return mi
 # }}}
 
@@ -172,6 +166,7 @@ class Douban(Source):
     cached_cover_url_is_reliable = True
 
     DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
+    DOUBAN_ID_URL = 'http://api.douban.com/book/subject/%s'
 #    GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
 
 #    DUMMY_IMAGE_MD5 = frozenset(['0de4383ebad0adad5eeb8975cd796657'])
@@ -179,7 +174,7 @@ class Douban(Source):
     def get_book_url(self, identifiers): # {{{
         db = identifiers.get('douban', None)
         if db is not None:
-            return db
+            return DOUBAN_ID_URL % db
         else:
             return None
     # }}}
@@ -206,11 +201,11 @@ class Douban(Source):
                 q += ((' ' if q != '' else '') + 
                     build_term('author', author_tokens))
             t = 'search'
+        q = q.strip()
         if isinstance(q, unicode):
             q = q.encode('utf-8')
         if not q:
             return None
-        print(q)
         url = None
         if t == "isbn":
             url = ISBN_URL + q
@@ -220,7 +215,6 @@ class Douban(Source):
                     })
         if self.DOUBAN_API_KEY and self.DOUBAN_API_KEY != '':
             url = url + "?apikey=" + self.DOUBAN_API_KEY
-        print(url)
         return url
     # }}}
 
@@ -257,10 +251,7 @@ class Douban(Source):
         try:
             cdata = br.open_novisit(cached_url, timeout=timeout).read()
             if cdata:
-                if hashlib.md5(cdata).hexdigest() in self.DUMMY_IMAGE_MD5:
-                    log.warning('Google returned a dummy image, ignoring')
-                else:
-                    result_queue.put((self, cdata))
+                result_queue.put((self, cdata))
         except:
             log.exception('Failed to download cover from:', cached_url)
 
@@ -268,13 +259,13 @@ class Douban(Source):
 
     def get_cached_cover_url(self, identifiers): # {{{
         url = None
-        goog = identifiers.get('google', None)
-        if goog is None:
+        db = identifiers.get('douban', None)
+        if db is None:
             isbn = identifiers.get('isbn', None)
             if isbn is not None:
-                goog = self.cached_isbn_to_identifier(isbn)
-        if goog is not None:
-            url = self.cached_identifier_to_cover_url(goog)
+                db = self.cached_isbn_to_identifier(isbn)
+        if db is not None:
+            url = self.cached_identifier_to_cover_url(db)
 
         return url
     # }}}
@@ -286,12 +277,12 @@ class Douban(Source):
                 ans = to_metadata(br, log, i, timeout)
                 if isinstance(ans, Metadata):
                     ans.source_relevance = relevance
-                    goog = ans.identifiers['google']
+                    db = ans.identifiers['douban']
                     for isbn in getattr(ans, 'all_isbns', []):
-                        self.cache_isbn_to_identifier(isbn, goog)
-                    if ans.has_google_cover:
-                        self.cache_identifier_to_cover_url(goog,
-                                self.GOOGLE_COVER%goog)
+                        self.cache_isbn_to_identifier(isbn, db)
+                    if ans.has_douban_cover:
+                        self.cache_identifier_to_cover_url(db,
+                                ans.has_douban_cover)
                     self.clean_downloaded_metadata(ans)
                     result_queue.put(ans)
             except:
@@ -315,7 +306,6 @@ class Douban(Source):
         except Exception as e:
             log.exception('Failed to make identify query: %r'%query)
             return as_unicode(e)
-
         try:
             parser = etree.XMLParser(recover=True, no_network=True)
             feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
@@ -324,7 +314,8 @@ class Douban(Source):
         except Exception as e:
             log.exception('Failed to parse identify results')
             return as_unicode(e)
-
+        if not title:
+            title = ""
         if not entries and identifiers and title and authors and \
                 not abort.is_set():
             return self.identify(log, result_queue, abort, title=title,

From 4bdbab22ca6e8818b76e0ae98ec30094dd00622d Mon Sep 17 00:00:00 2001
From: Li Fanxi <lifanxi@freemindworld.com>
Date: Sun, 8 May 2011 22:28:47 +0800
Subject: [PATCH 3/3] Finish the Douban.com books metadata source plugin

---
 src/calibre/ebooks/metadata/sources/douban.py | 55 +++++++++----------
 1 file changed, 26 insertions(+), 29 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/douban.py b/src/calibre/ebooks/metadata/sources/douban.py
index 8f1794b33f..7a8619261b 100644
--- a/src/calibre/ebooks/metadata/sources/douban.py
+++ b/src/calibre/ebooks/metadata/sources/douban.py
@@ -40,8 +40,8 @@ publisher      = XPath("descendant::db:attribute[@name='publisher']")
 isbn           = XPath("descendant::db:attribute[@name='isbn13']")
 date           = XPath("descendant::db:attribute[@name='pubdate']")
 creator        = XPath("descendant::db:attribute[@name='author']")
-tag            = XPath("descendant::db:tag")
-rating         = XPath("descendant::gd:rating[@name='average']")
+booktag        = XPath("descendant::db:tag/attribute::name")
+rating         = XPath("descendant::gd:rating/attribute::average")
 cover_url      = XPath("descendant::atom:link[@rel='image']/attribute::href")
 
 def get_details(browser, url, timeout): # {{{
@@ -51,7 +51,7 @@ def get_details(browser, url, timeout): # {{{
         gc = getattr(e, 'getcode', lambda : -1)
         if gc() != 403:
             raise
-        # Google is throttling us, wait a little
+        # Douban is throttling us, wait a little
         time.sleep(2)
         raw = browser.open_novisit(url, timeout=timeout).read()
 
@@ -59,7 +59,6 @@ def get_details(browser, url, timeout): # {{{
 # }}}
 
 def to_metadata(browser, log, entry_, timeout): # {{{
-
     def get_text(extra, x):
         try:
             ans = x(extra)
@@ -71,7 +70,6 @@ def to_metadata(browser, log, entry_, timeout): # {{{
             log.exception('Programming error:')
         return None
 
-
     id_url = entry_id(entry_)[0].text
     douban_id = id_url.split('/')[-1]
     title_ = ': '.join([x.text for x in title(entry_)]).strip()
@@ -92,9 +90,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
     except:
         log.exception('Failed to get additional details for', mi.title)
         return mi
-
     mi.comments = get_text(extra, description)
-    #mi.language = get_text(extra, language)
     mi.publisher = get_text(extra, publisher)
 
     # ISBN
@@ -108,7 +104,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
 
     # Tags
     try:
-        btags = [x.text for x in subject(extra) if x.text]
+        btags = [x for x in booktag(extra) if x]
         tags = []
         for t in btags:
             atags = [y.strip() for y in t.split('/')]
@@ -120,7 +116,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
         tags = []
     if tags:
         mi.tags = [x.replace(',', ';') for x in tags]
-
+        
     # pubdate
     pubdate = get_text(extra, date)
     if pubdate:
@@ -133,7 +129,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
     # Ratings
     if rating(extra):
         try:
-            mi.rating = float(rating(extra).text) / 2.0
+            mi.rating = float(rating(extra)[0]) / 2.0
         except:
             log.exception('Failed to parse rating')
             mi.rating = 0
@@ -141,10 +137,8 @@ def to_metadata(browser, log, entry_, timeout): # {{{
     # Cover
     mi.has_douban_cover = None
     u = cover_url(extra)
-    print(u)
     if u:
         u = u[0].replace('/spic/', '/lpic/');
-        print(u)
         # If URL contains "book-default", the book doesn't have a cover
         if u.find('book-default') == -1:
             mi.has_douban_cover = u
@@ -155,26 +149,24 @@ class Douban(Source):
 
     name = 'Douban Books'
     author = _('Li Fanxi')
+    version = (2, 0, 0)
     
     description = _('Downloads metadata from Douban.com')
 
     capabilities = frozenset(['identify', 'cover'])
     touched_fields = frozenset(['title', 'authors', 'tags', 
-        'comments', 'publisher', 'identifier:isbn', 'rating',
+        'pubdate', 'comments', 'publisher', 'identifier:isbn', 'rating',
         'identifier:douban']) # language currently disabled
     supports_gzip_transfer_encoding = True
     cached_cover_url_is_reliable = True
 
     DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
-    DOUBAN_ID_URL = 'http://api.douban.com/book/subject/%s'
-#    GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
-
-#    DUMMY_IMAGE_MD5 = frozenset(['0de4383ebad0adad5eeb8975cd796657'])
+    DOUBAN_BOOK_URL = 'http://book.douban.com/subject/%s/'
 
     def get_book_url(self, identifiers): # {{{
         db = identifiers.get('douban', None)
         if db is not None:
-            return DOUBAN_ID_URL % db
+            return ('douban', db, self.DOUBAN_BOOK_URL%db)
         else:
             return None
     # }}}
@@ -182,13 +174,18 @@ class Douban(Source):
     def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
         SEARCH_URL = 'http://api.douban.com/book/subjects?'
         ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
+        SUBJECT_URL = 'http://api.douban.com/book/subject/'
 
         q = ''
         t = None
         isbn = check_isbn(identifiers.get('isbn', None))
+        subject = identifiers.get('douban', None)
         if isbn is not None:
             q = isbn
             t = 'isbn'
+        elif subject is not None:
+            q = subject
+            t = 'subject'
         elif title or authors:
             def build_term(prefix, parts):
                 return ' '.join(x for x in parts)
@@ -209,6 +206,8 @@ class Douban(Source):
         url = None
         if t == "isbn":
             url = ISBN_URL + q
+        elif t == 'subject':
+            url = SUBJECT_URL + q
         else:
             url = SEARCH_URL + urlencode({
                     'q': q,
@@ -314,14 +313,12 @@ class Douban(Source):
         except Exception as e:
             log.exception('Failed to parse identify results')
             return as_unicode(e)
-        if not title:
-            title = ""
         if not entries and identifiers and title and authors and \
                 not abort.is_set():
             return self.identify(log, result_queue, abort, title=title,
                     authors=authors, timeout=timeout)
 
-        # There is no point running these queries in threads as google
+        # There is no point running these queries in threads as douban
         # throttles requests returning 403 Forbidden errors
         self.get_all_details(br, log, entries, abort, result_queue, timeout)
 
@@ -329,23 +326,23 @@ class Douban(Source):
     # }}}
 
 if __name__ == '__main__': # tests {{{
-    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
+    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/douban.py
     from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
             title_test, authors_test)
-    test_identify_plugin(GoogleBooks.name,
+    test_identify_plugin(Douban.name,
         [
 
 
             (
-                {'identifiers':{'isbn': '0743273567'}, 'title':'Great Gatsby',
-                    'authors':['Fitzgerald']},
-                [title_test('The great gatsby', exact=True),
-                    authors_test(['Francis Scott Fitzgerald'])]
+                {'identifiers':{'isbn': '9787536692930'}, 'title':'三体',
+                    'authors':['刘慈欣']},
+                [title_test('三体', exact=True),
+                    authors_test(['刘慈欣'])]
             ),
 
             (
-                {'title': 'Flatland', 'authors':['Abbott']},
-                [title_test('Flatland', exact=False)]
+                {'title': 'Linux内核修炼之道', 'authors':['任桥伟']},
+                [title_test('Linux内核修炼之道', exact=False)]
             ),
     ])
 # }}}