Finish to add threading to google_books & minor changes

This commit is contained in:
Sengian 2010-12-13 08:59:20 +01:00
parent a54cbc1a91
commit aa7630f392
3 changed files with 69 additions and 50 deletions

View File

@ -480,8 +480,9 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO from calibre.devices.kobo.driver import KOBO
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, LibraryThing from calibre.ebooks.metadata.fetch import ISBNDB, LibraryThing
from calibre.ebooks.metadata.douban import DoubanBooks from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.google_books import GoogleBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
from calibre.ebooks.metadata.amazon import Amazon, AmazonSocial from calibre.ebooks.metadata.amazon import Amazon, AmazonSocial
from calibre.ebooks.metadata.fictionwise import Fictionwise from calibre.ebooks.metadata.fictionwise import Fictionwise

View File

@ -172,20 +172,20 @@ class MetadataSource(Plugin): # {{{
# }}} # }}}
class GoogleBooks(MetadataSource): # {{{ # class GoogleBooks(MetadataSource): # {{{
name = 'Google Books' # name = 'Google Books'
description = _('Downloads metadata from Google Books') # description = _('Downloads metadata from Google Books')
def fetch(self): # def fetch(self):
from calibre.ebooks.metadata.google_books import search # from calibre.ebooks.metadata.google_books import search
try: # try:
self.results = search(self.title, self.book_author, self.publisher, # self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10, # self.isbn, max_results=10,
verbose=self.verbose) # verbose=self.verbose)
except Exception, e: # except Exception, e:
self.exception = e # self.exception = e
self.tb = traceback.format_exc() # self.tb = traceback.format_exc()
# }}} # }}}

View File

@ -12,7 +12,9 @@ from functools import partial
from lxml import etree from lxml import etree
from calibre import browser, preferred_encoding from calibre import browser, preferred_encoding
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation, check_isbn, \
authors_to_sort_string
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.utils.date import parse_date, utcnow from calibre.utils.date import parse_date, utcnow
@ -39,6 +41,22 @@ subject = XPath('descendant::dc:subject')
description = XPath('descendant::dc:description') description = XPath('descendant::dc:description')
language = XPath('descendant::dc:language') language = XPath('descendant::dc:language')
class GoogleBooks(MetadataSource):
name = 'Google Books'
description = _('Downloads metadata from Google Books')
version = (1, 0, 1)
def fetch(self):
try:
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10, verbose=self.verbose)
except Exception, e:
self.exception = e
self.tb = traceback.format_exc()
class GoogleBooksError(Exception): class GoogleBooksError(Exception):
pass pass
@ -158,7 +176,7 @@ class ResultList(list):
try: try:
desc = description(entry) desc = description(entry)
if desc: if desc:
return 'SUMMARY:\n'+desc[0].text return _('SUMMARY:\n %s') % desc[0].text
except: except:
report(verbose) report(verbose)
@ -171,29 +189,27 @@ class ResultList(list):
report(verbose) report(verbose)
def get_title(self, entry): def get_title(self, entry):
candidates = [x.text for x in title(entry)] return ': '.join([x.text for x in title(entry)])
return ': '.join(candidates)
def get_authors(self, entry): def get_authors(self, entry):
m = creator(entry) m = creator(entry)
if not m: return [x.text for x in m] if m else []
m = []
m = [x.text for x in m]
return m
def get_author_sort(self, entry, verbose): def get_author_sort(self, entry, verbose):
for x in creator(entry): for x in creator(entry):
for key, val in x.attrib.items(): for key, val in x.attrib.iteritems():
if key.endswith('file-as'): if key.endswith('file-as'):
return val return val
def get_identifiers(self, entry, mi): def get_identifiers(self, entry, mi):
isbns = [] isbns = [str(x.text).strip() for x in identifier(entry)]
for x in identifier(entry): isbns = [t[5:] for t in isbns \
t = str(x.text).strip() if t[:5].upper() == 'ISBN:' and check_isbn(t[5:])]
if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'): # for x in identifier(entry):
if t[:5].upper() == 'ISBN:': # t = str(x.text).strip()
isbns.append(t[5:]) # if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
# if t[:5].upper() == 'ISBN:':
# isbns.append(t[5:])
if isbns: if isbns:
mi.isbn = sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1] mi.isbn = sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
@ -211,28 +227,26 @@ class ResultList(list):
def get_publisher(self, entry, verbose): def get_publisher(self, entry, verbose):
try: try:
pub = publisher(entry)[0].text return publisher(entry)[0].text
except: except:
pub = None return None
return pub
def get_date(self, entry, verbose): def get_date(self, entry, verbose):
try: try:
d = date(entry) d = date(entry)
if d: if d:
default = utcnow().replace(day=15) default = utcnow().replace(day=15)
d = parse_date(d[0].text, assume_utc=True, default=default) return parse_date(d[0].text, assume_utc=True, default=default)
else: else:
d = None return None
except: except:
report(verbose) report(verbose)
d = None return None
return d
def fill_MI(self, entry, data, verbose): def fill_MI(self, ent, data, verbose):
x = entry x = ent
try: try:
title = self.get_title(entry) title = self.get_title(x)
x = entry(data)[0] x = entry(data)[0]
except Exception, e: except Exception, e:
if verbose: if verbose:
@ -240,7 +254,9 @@ class ResultList(list):
print e print e
authors = self.get_authors(x) authors = self.get_authors(x)
mi = MetaInformation(title, authors) mi = MetaInformation(title, authors)
mi.author_sort = self.get_author_sort(x, verbose) tmpautsort = self.get_author_sort(x, verbose)
mi.author_sort = tmpautsort if tmpautsort \
else authors_to_sort_string(authors)
mi.comments = self.get_description(x, verbose) mi.comments = self.get_description(x, verbose)
self.get_identifiers(x, mi) self.get_identifiers(x, mi)
mi.tags = self.get_tags(x, verbose) mi.tags = self.get_tags(x, verbose)
@ -315,7 +331,6 @@ class ResultList(list):
return res return res
def populate(self, entries, br, verbose=False, brcall=3): def populate(self, entries, br, verbose=False, brcall=3):
#multiple entries
pbr = Queue(brcall) pbr = Queue(brcall)
sync = Queue(1) sync = Queue(1)
for i in xrange(brcall-1): for i in xrange(brcall-1):
@ -344,23 +359,23 @@ def search(title=None, author=None, publisher=None, isbn=None,
def option_parser(): def option_parser():
parser = OptionParser(textwrap.dedent( parser = OptionParser(textwrap.dedent(
'''\ _('''\
%prog [options] %prog [options]
Fetch book metadata from Google. You must specify one of title, author, Fetch book metadata from Google. You must specify one of title, author,
publisher or ISBN. If you specify ISBN the others are ignored. Will publisher or ISBN. If you specify ISBN the others are ignored. Will
fetch a maximum of 20 matches, so you should make your query as fetch a maximum of 40 matches, so you should make your query as
specific as possible. specific as possible.
''' '''
)) )))
parser.add_option('-t', '--title', help='Book title') parser.add_option('-t', '--title', help=_('Book title'))
parser.add_option('-a', '--author', help='Book author(s)') parser.add_option('-a', '--author', help=_('Book author(s)'))
parser.add_option('-p', '--publisher', help='Book publisher') parser.add_option('-p', '--publisher', help=_('Book publisher'))
parser.add_option('-i', '--isbn', help='Book ISBN') parser.add_option('-i', '--isbn', help=_('Book ISBN'))
parser.add_option('-m', '--max-results', default=10, parser.add_option('-m', '--max-results', default=10,
help='Maximum number of results to fetch') help=_('Maximum number of results to fetch'))
parser.add_option('-v', '--verbose', default=0, action='count', parser.add_option('-v', '--verbose', default=0, action='count',
help='Be more verbose about errors') help=_('Be more verbose about errors'))
return parser return parser
def main(args=sys.argv): def main(args=sys.argv):
@ -373,6 +388,9 @@ def main(args=sys.argv):
report(True) report(True)
parser.print_help() parser.print_help()
return 1 return 1
if results is None or len(results) == 0:
print _('No result found for this search!')
return 0
for result in results: for result in results:
print unicode(result).encode(preferred_encoding) print unicode(result).encode(preferred_encoding)
print print
@ -380,4 +398,4 @@ def main(args=sys.argv):
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main()) sys.exit(main())
# C:\Users\Pierre>calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\google_books.py" -m 5 -a gore -v>data.html # calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\google_books.py" -m 5 -a gore -v>data.html