mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Complete migration of ISBNDB plugin. However, I'm not enabling it, as it seems to provide largely useless results anyway.
This commit is contained in:
parent
eecf3ec73e
commit
2bdc0c48a4
@ -9,8 +9,14 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks.metadata import check_isbn
|
from calibre.ebooks.metadata import check_isbn
|
||||||
from calibre.ebooks.metadata.sources.base import Source, Option
|
from calibre.ebooks.metadata.sources.base import Source, Option
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
|
from calibre.utils.icu import lower
|
||||||
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
|
|
||||||
BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%s&page_number=1&results=subjects,authors,texts&'
|
BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%s&page_number=1&results=subjects,authors,texts&'
|
||||||
|
|
||||||
@ -56,7 +62,7 @@ class ISBNDB(Source):
|
|||||||
def is_configured(self):
|
def is_configured(self):
|
||||||
return self.isbndb_key is not None
|
return self.isbndb_key is not None
|
||||||
|
|
||||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
def create_query(self, title=None, authors=None, identifiers={}): # {{{
|
||||||
base_url = BASE_URL%self.isbndb_key
|
base_url = BASE_URL%self.isbndb_key
|
||||||
isbn = check_isbn(identifiers.get('isbn', None))
|
isbn = check_isbn(identifiers.get('isbn', None))
|
||||||
q = ''
|
q = ''
|
||||||
@ -78,4 +84,136 @@ class ISBNDB(Source):
|
|||||||
if isinstance(q, unicode):
|
if isinstance(q, unicode):
|
||||||
q = q.encode('utf-8')
|
q = q.encode('utf-8')
|
||||||
return base_url + q
|
return base_url + q
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||||
|
identifiers={}, timeout=30):
|
||||||
|
if not self.is_configured():
|
||||||
|
return
|
||||||
|
query = self.create_query(title=title, authors=authors,
|
||||||
|
identifiers=identifiers)
|
||||||
|
if not query:
|
||||||
|
err = 'Insufficient metadata to construct query'
|
||||||
|
log.error(err)
|
||||||
|
return err
|
||||||
|
|
||||||
|
results = []
|
||||||
|
try:
|
||||||
|
results = self.make_query(query, abort, title=title, authors=authors,
|
||||||
|
identifiers=identifiers, timeout=timeout)
|
||||||
|
except:
|
||||||
|
err = 'Failed to make query to ISBNDb, aborting.'
|
||||||
|
log.exception(err)
|
||||||
|
return err
|
||||||
|
|
||||||
|
if not results and identifiers.get('isbn', False) and title and authors and \
|
||||||
|
not abort.is_set():
|
||||||
|
return self.identify(log, result_queue, abort, title=title,
|
||||||
|
authors=authors, timeout=timeout)
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
self.clean_downloaded_metadata(result)
|
||||||
|
result_queue.put(result)
|
||||||
|
|
||||||
|
def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers):
|
||||||
|
|
||||||
|
def tostring(x):
|
||||||
|
if x is None:
|
||||||
|
return ''
|
||||||
|
return etree.tostring(x, method='text', encoding=unicode).strip()
|
||||||
|
|
||||||
|
orig_isbn = identifiers.get('isbn', None)
|
||||||
|
title_tokens = self.get_title_tokens(orig_title)
|
||||||
|
author_tokens = self.get_author_tokens(orig_authors)
|
||||||
|
results = []
|
||||||
|
|
||||||
|
def ismatch(title, authors):
|
||||||
|
authors = lower(' '.join(authors))
|
||||||
|
title = lower(title)
|
||||||
|
match = False
|
||||||
|
for t in title_tokens:
|
||||||
|
if lower(t) in title:
|
||||||
|
match = True
|
||||||
|
break
|
||||||
|
if not title_tokens: match = True
|
||||||
|
amatch = False
|
||||||
|
for a in author_tokens:
|
||||||
|
if a in authors:
|
||||||
|
amatch = True
|
||||||
|
break
|
||||||
|
if not author_tokens: amatch = True
|
||||||
|
return match and amatch
|
||||||
|
|
||||||
|
bl = feed.find('BookList')
|
||||||
|
if bl is None:
|
||||||
|
err = tostring(etree.find('errormessage'))
|
||||||
|
raise ValueError('ISBNDb query failed:' + err)
|
||||||
|
total_results = int(bl.get('total_results'))
|
||||||
|
shown_results = int(bl.get('shown_results'))
|
||||||
|
for bd in bl.xpath('.//BookData'):
|
||||||
|
isbn = check_isbn(bd.get('isbn13', bd.get('isbn', None)))
|
||||||
|
if not isbn:
|
||||||
|
continue
|
||||||
|
if orig_isbn and isbn != orig_isbn:
|
||||||
|
continue
|
||||||
|
title = tostring(bd.find('Title'))
|
||||||
|
if not title:
|
||||||
|
continue
|
||||||
|
authors = []
|
||||||
|
for au in bd.xpath('.//Authors/Person'):
|
||||||
|
au = tostring(au)
|
||||||
|
if au:
|
||||||
|
if ',' in au:
|
||||||
|
ln, _, fn = au.partition(',')
|
||||||
|
au = fn.strip() + ' ' + ln.strip()
|
||||||
|
authors.append(au)
|
||||||
|
if not authors:
|
||||||
|
continue
|
||||||
|
id_ = (title, tuple(authors))
|
||||||
|
if id_ in seen:
|
||||||
|
continue
|
||||||
|
seen.add(id_)
|
||||||
|
if not ismatch(title, authors):
|
||||||
|
continue
|
||||||
|
publisher = tostring(bd.find('PublisherText'))
|
||||||
|
if not publisher: publisher = None
|
||||||
|
comments = tostring(bd.find('Summary'))
|
||||||
|
if not comments: comments = None
|
||||||
|
mi = Metadata(title, authors)
|
||||||
|
mi.isbn = isbn
|
||||||
|
mi.publisher = publisher
|
||||||
|
mi.comments = comments
|
||||||
|
results.append(mi)
|
||||||
|
return total_results, shown_results, results
|
||||||
|
|
||||||
|
def make_query(self, q, abort, title=None, authors=None, identifiers={},
|
||||||
|
max_pages=10, timeout=30):
|
||||||
|
page_num = 1
|
||||||
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
|
br = self.browser
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
|
||||||
|
candidates = []
|
||||||
|
total_found = 0
|
||||||
|
while page_num <= max_pages and not abort.is_set():
|
||||||
|
url = q.replace('&page_number=1&', '&page_number=%d&'%page_num)
|
||||||
|
page_num += 1
|
||||||
|
raw = br.open_novisit(url, timeout=timeout).read()
|
||||||
|
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
|
||||||
|
strip_encoding_pats=True)[0], parser=parser)
|
||||||
|
total, found, results = self.parse_feed(
|
||||||
|
feed, seen, title, authors, identifiers)
|
||||||
|
total_found += found
|
||||||
|
if results or total_found >= total:
|
||||||
|
candidates += results
|
||||||
|
break
|
||||||
|
|
||||||
|
return candidates
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
s = ISBNDB(None)
|
||||||
|
t, a = 'great gatsby', ['fitzgerald']
|
||||||
|
q = s.create_query(title=t, authors=a)
|
||||||
|
s.make_query(q, title=t, authors=a)
|
||||||
|
@ -22,6 +22,8 @@ First start the |app| content server as shown below::
|
|||||||
|
|
||||||
calibre-server --url-prefix /calibre --port 8080
|
calibre-server --url-prefix /calibre --port 8080
|
||||||
|
|
||||||
|
The key parameter here is ``--url-prefix /calibre``. This causes the content server to serve all URLs prefixed by calibre. To see this in action, visit ``http://localhost:8080/calibre`` in your browser. You should see the normal content server website, but now it will run under /calibre.
|
||||||
|
|
||||||
Now suppose you are using Apache as your main server. First enable the proxy modules in apache, by adding the following to :file:`httpd.conf`::
|
Now suppose you are using Apache as your main server. First enable the proxy modules in apache, by adding the following to :file:`httpd.conf`::
|
||||||
|
|
||||||
LoadModule proxy_module modules/mod_proxy.so
|
LoadModule proxy_module modules/mod_proxy.so
|
||||||
|
Loading…
x
Reference in New Issue
Block a user