Complete migration of ISBNDB plugin. However, I'm not enabling it, as it seems to provide largely useless results anyway.

2026-01-04 11:10:20 -05:00 · 2011-04-12 22:39:38 -06:00 · 2011-04-12 22:39:38 -06:00 · 2bdc0c48a4
commit 2bdc0c48a4
parent eecf3ec73e
2 changed files with 141 additions and 1 deletions
--- a/src/calibre/ebooks/metadata/sources/isbndb.py
+++ b/src/calibre/ebooks/metadata/sources/isbndb.py
@ -9,8 +9,14 @@ __docformat__ = 'restructuredtext en'

 from urllib import quote

+from lxml import etree
+
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import Source, Option
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.utils.cleantext import clean_ascii_chars
+from calibre.utils.icu import lower
+from calibre.ebooks.metadata.book.base import Metadata

 BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%s&page_number=1&results=subjects,authors,texts&'

@ -56,7 +62,7 @@ class ISBNDB(Source):
    def is_configured(self):
        return self.isbndb_key is not None

-    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
+    def create_query(self, title=None, authors=None, identifiers={}): # {{{
        base_url = BASE_URL%self.isbndb_key
        isbn = check_isbn(identifiers.get('isbn', None))
        q = ''
@ -78,4 +84,136 @@ class ISBNDB(Source):
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        return base_url + q
+    # }}}

+    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
+            identifiers={}, timeout=30):
+        if not self.is_configured():
+            return
+        query = self.create_query(title=title, authors=authors,
+                identifiers=identifiers)
+        if not query:
+            err = 'Insufficient metadata to construct query'
+            log.error(err)
+            return err
+
+        results = []
+        try:
+            results = self.make_query(query, abort, title=title, authors=authors,
+                    identifiers=identifiers, timeout=timeout)
+        except:
+            err = 'Failed to make query to ISBNDb, aborting.'
+            log.exception(err)
+            return err
+
+        if not results and identifiers.get('isbn', False) and title and authors and \
+                not abort.is_set():
+            return self.identify(log, result_queue, abort, title=title,
+                    authors=authors, timeout=timeout)
+
+        for result in results:
+            self.clean_downloaded_metadata(result)
+            result_queue.put(result)
+
+    def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers):
+
+        def tostring(x):
+            if x is None:
+                return ''
+            return etree.tostring(x, method='text', encoding=unicode).strip()
+
+        orig_isbn = identifiers.get('isbn', None)
+        title_tokens = self.get_title_tokens(orig_title)
+        author_tokens = self.get_author_tokens(orig_authors)
+        results = []
+
+        def ismatch(title, authors):
+            authors = lower(' '.join(authors))
+            title = lower(title)
+            match = False
+            for t in title_tokens:
+                if lower(t) in title:
+                    match = True
+                    break
+            if not title_tokens: match = True
+            amatch = False
+            for a in author_tokens:
+                if a in authors:
+                    amatch = True
+                    break
+            if not author_tokens: amatch = True
+            return match and amatch
+
+        bl = feed.find('BookList')
+        if bl is None:
+            err = tostring(etree.find('errormessage'))
+            raise ValueError('ISBNDb query failed:' + err)
+        total_results = int(bl.get('total_results'))
+        shown_results = int(bl.get('shown_results'))
+        for bd in bl.xpath('.//BookData'):
+            isbn = check_isbn(bd.get('isbn13', bd.get('isbn', None)))
+            if not isbn:
+                continue
+            if orig_isbn and isbn != orig_isbn:
+                continue
+            title = tostring(bd.find('Title'))
+            if not title:
+                continue
+            authors = []
+            for au in bd.xpath('.//Authors/Person'):
+                au = tostring(au)
+                if au:
+                    if ',' in au:
+                        ln, _, fn = au.partition(',')
+                        au = fn.strip() + ' ' + ln.strip()
+                authors.append(au)
+            if not authors:
+                continue
+            id_ = (title, tuple(authors))
+            if id_ in seen:
+                continue
+            seen.add(id_)
+            if not ismatch(title, authors):
+                continue
+            publisher = tostring(bd.find('PublisherText'))
+            if not publisher: publisher = None
+            comments = tostring(bd.find('Summary'))
+            if not comments: comments = None
+            mi = Metadata(title, authors)
+            mi.isbn = isbn
+            mi.publisher = publisher
+            mi.comments = comments
+            results.append(mi)
+        return total_results, shown_results, results
+
+    def make_query(self, q, abort, title=None, authors=None, identifiers={},
+            max_pages=10, timeout=30):
+        page_num = 1
+        parser = etree.XMLParser(recover=True, no_network=True)
+        br = self.browser
+
+        seen = set()
+
+        candidates = []
+        total_found = 0
+        while page_num <= max_pages and not abort.is_set():
+            url = q.replace('&page_number=1&', '&page_number=%d&'%page_num)
+            page_num += 1
+            raw = br.open_novisit(url, timeout=timeout).read()
+            feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
+                strip_encoding_pats=True)[0], parser=parser)
+            total, found, results = self.parse_feed(
+                    feed, seen, title, authors, identifiers)
+            total_found += found
+            if results or total_found >= total:
+                candidates += results
+                break
+
+        return candidates
+    # }}}
+
+if __name__ == '__main__':
+    s = ISBNDB(None)
+    t, a = 'great gatsby', ['fitzgerald']
+    q = s.create_query(title=t, authors=a)
+    s.make_query(q, title=t, authors=a)
--- a/src/calibre/manual/server.rst
+++ b/src/calibre/manual/server.rst
@ -22,6 +22,8 @@ First start the |app| content server as shown below::

    calibre-server --url-prefix /calibre --port 8080 

+The key parameter here is ``--url-prefix /calibre``. This causes the content server to serve all URLs prefixed by calibre. To see this in action, visit ``http://localhost:8080/calibre`` in your browser. You should see the normal content server website, but now it will run under /calibre.
+
 Now suppose you are using Apache as your main server. First enable the proxy modules in apache, by adding the following to :file:`httpd.conf`::

    LoadModule proxy_module modules/mod_proxy.so