mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix threading in amazon
This commit is contained in:
parent
ae781ae614
commit
5c89b576e3
@ -4,6 +4,7 @@ __copyright__ = '2010, sengian <sengian1@gmail.com>'
|
|||||||
|
|
||||||
import sys, textwrap, re, traceback, socket
|
import sys, textwrap, re, traceback, socket
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
from Queue import Queue
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from math import ceil
|
from math import ceil
|
||||||
|
|
||||||
@ -21,57 +22,6 @@ from calibre.utils.config import OptionParser
|
|||||||
from calibre.library.comments import sanitize_comments_html
|
from calibre.library.comments import sanitize_comments_html
|
||||||
|
|
||||||
|
|
||||||
# class AmazonFr(MetadataSource):
|
|
||||||
|
|
||||||
# name = 'Amazon French'
|
|
||||||
# description = _('Downloads metadata from amazon.fr')
|
|
||||||
# supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
# author = 'Sengian'
|
|
||||||
# version = (1, 0, 0)
|
|
||||||
# has_html_comments = True
|
|
||||||
|
|
||||||
# def fetch(self):
|
|
||||||
# try:
|
|
||||||
# self.results = search(self.title, self.book_author, self.publisher,
|
|
||||||
# self.isbn, max_results=10, verbose=self.verbose, lang='fr')
|
|
||||||
# except Exception, e:
|
|
||||||
# self.exception = e
|
|
||||||
# self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
# class AmazonEs(MetadataSource):
|
|
||||||
|
|
||||||
# name = 'Amazon Spanish'
|
|
||||||
# description = _('Downloads metadata from amazon.com in spanish')
|
|
||||||
# supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
# author = 'Sengian'
|
|
||||||
# version = (1, 0, 0)
|
|
||||||
# has_html_comments = True
|
|
||||||
|
|
||||||
# def fetch(self):
|
|
||||||
# try:
|
|
||||||
# self.results = search(self.title, self.book_author, self.publisher,
|
|
||||||
# self.isbn, max_results=10, verbose=self.verbose, lang='es')
|
|
||||||
# except Exception, e:
|
|
||||||
# self.exception = e
|
|
||||||
# self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
# class AmazonDe(MetadataSource):
|
|
||||||
|
|
||||||
# name = 'Amazon German'
|
|
||||||
# description = _('Downloads metadata from amazon.de')
|
|
||||||
# supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
# author = 'Sengian'
|
|
||||||
# version = (1, 0, 0)
|
|
||||||
# has_html_comments = True
|
|
||||||
|
|
||||||
# def fetch(self):
|
|
||||||
# try:
|
|
||||||
# self.results = search(self.title, self.book_author, self.publisher,
|
|
||||||
# self.isbn, max_results=10, verbose=self.verbose, lang='de')
|
|
||||||
# except Exception, e:
|
|
||||||
# self.exception = e
|
|
||||||
# self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
class Amazon(MetadataSource):
|
class Amazon(MetadataSource):
|
||||||
|
|
||||||
name = 'Amazon'
|
name = 'Amazon'
|
||||||
@ -83,8 +33,33 @@ class Amazon(MetadataSource):
|
|||||||
|
|
||||||
def fetch(self):
|
def fetch(self):
|
||||||
try:
|
try:
|
||||||
|
lang = get_lang()
|
||||||
|
lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
|
||||||
|
if lang == 'all':
|
||||||
self.results = search(self.title, self.book_author, self.publisher,
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
self.isbn, max_results=5, verbose=self.verbose, lang='all')
|
self.isbn, max_results=5, verbose=self.verbose, lang='all')
|
||||||
|
else:
|
||||||
|
tmploc = ThreadwithResults(search, self.title, self.book_author,
|
||||||
|
self.publisher,self.isbn, max_results=5,
|
||||||
|
verbose=self.verbose, lang=lang)
|
||||||
|
tmpnoloc = ThreadwithResults(search, self.title, self.book_author,
|
||||||
|
self.publisher, self.isbn, max_results=5,
|
||||||
|
verbose=self.verbose, lang='all')
|
||||||
|
tmploc.start()
|
||||||
|
tmpnoloc.start()
|
||||||
|
tmploc.join()
|
||||||
|
tmpnoloc.join()
|
||||||
|
tmploc= tmploc.get_result()
|
||||||
|
tmpnoloc= tmpnoloc.get_result()
|
||||||
|
|
||||||
|
tempres = None
|
||||||
|
if tmpnoloc is not None:
|
||||||
|
tempres = tmpnoloc
|
||||||
|
if tmploc is not None:
|
||||||
|
tempres = tmploc
|
||||||
|
if tmpnoloc is not None:
|
||||||
|
tempres.extend(tmpnoloc)
|
||||||
|
self.results = tmpres
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self.exception = e
|
self.exception = e
|
||||||
self.tb = traceback.format_exc()
|
self.tb = traceback.format_exc()
|
||||||
@ -109,10 +84,10 @@ class AmazonSocial(MetadataSource):
|
|||||||
self.results = get_social_metadata(self.title, self.book_author, self.publisher,
|
self.results = get_social_metadata(self.title, self.book_author, self.publisher,
|
||||||
self.isbn, verbose=self.verbose, lang='all')[0]
|
self.isbn, verbose=self.verbose, lang='all')[0]
|
||||||
else:
|
else:
|
||||||
tmploc = ThreadwithResults(AmazonError, self.verbose, get_social_metadata, self.title,
|
tmploc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
|
||||||
self.book_author, self.publisher,self.isbn, verbose=self.verbose, lang=lang)
|
self.publisher,self.isbn, verbose=self.verbose, lang=lang)
|
||||||
tmpnoloc = ThreadwithResults(AmazonError, self.verbose, get_social_metadata, self.title,
|
tmpnoloc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
|
||||||
self.book_author, self.publisher, self.isbn, verbose=self.verbose, lang='all')
|
self.publisher, self.isbn, verbose=self.verbose, lang='all')
|
||||||
tmploc.start()
|
tmploc.start()
|
||||||
tmpnoloc.start()
|
tmpnoloc.start()
|
||||||
tmploc.join()
|
tmploc.join()
|
||||||
@ -123,9 +98,7 @@ class AmazonSocial(MetadataSource):
|
|||||||
tmpnoloc= tmpnoloc.get_result()
|
tmpnoloc= tmpnoloc.get_result()
|
||||||
if tmpnoloc is not None:
|
if tmpnoloc is not None:
|
||||||
tmpnoloc = tmpnoloc[0]
|
tmpnoloc = tmpnoloc[0]
|
||||||
print tmpnoloc
|
if tmpnoloc is not None:
|
||||||
|
|
||||||
if tmploc is not None and tmpnoloc is not None:
|
|
||||||
if tmploc.rating is None:
|
if tmploc.rating is None:
|
||||||
tmploc.rating = tmpnoloc.rating
|
tmploc.rating = tmpnoloc.rating
|
||||||
if tmploc.comments is not None:
|
if tmploc.comments is not None:
|
||||||
@ -146,12 +119,10 @@ class AmazonError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
class ThreadwithResults(Thread):
|
class ThreadwithResults(Thread):
|
||||||
def __init__(self, error, verb, func, *args, **kargs):
|
def __init__(self, func, *args, **kargs):
|
||||||
self.func = func
|
self.func = func
|
||||||
self.args = args
|
self.args = args
|
||||||
self.kargs = kargs
|
self.kargs = kargs
|
||||||
self.verbose = verb
|
|
||||||
self.ex = error
|
|
||||||
self.result = None
|
self.result = None
|
||||||
Thread.__init__(self)
|
Thread.__init__(self)
|
||||||
|
|
||||||
@ -159,11 +130,8 @@ class ThreadwithResults(Thread):
|
|||||||
return self.result
|
return self.result
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
try:
|
|
||||||
self.result = self.func(*self.args, **self.kargs)
|
self.result = self.func(*self.args, **self.kargs)
|
||||||
except Exception, e:
|
|
||||||
report(self.verbose)
|
|
||||||
raise self.ex(_('An error was encountered in the function threading'))
|
|
||||||
|
|
||||||
class Query(object):
|
class Query(object):
|
||||||
|
|
||||||
@ -172,10 +140,10 @@ class Query(object):
|
|||||||
BASE_URL_DE = 'http://www.amazon.de'
|
BASE_URL_DE = 'http://www.amazon.de'
|
||||||
|
|
||||||
def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
|
def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
|
||||||
max_results=10, rlang='all'):
|
max_results=20, rlang='all'):
|
||||||
assert not(title is None and author is None and publisher is None \
|
assert not(title is None and author is None and publisher is None \
|
||||||
and isbn is None and keywords is None)
|
and isbn is None and keywords is None)
|
||||||
assert (max_results < 11)
|
assert (max_results < 21)
|
||||||
|
|
||||||
self.max_results = int(max_results)
|
self.max_results = int(max_results)
|
||||||
self.renbres = re.compile(u'\s*([0-9.,]+)\s*')
|
self.renbres = re.compile(u'\s*([0-9.,]+)\s*')
|
||||||
@ -304,6 +272,9 @@ class ResultList(object):
|
|||||||
def __init__(self, baseurl, lang = 'all'):
|
def __init__(self, baseurl, lang = 'all'):
|
||||||
self.baseurl = baseurl
|
self.baseurl = baseurl
|
||||||
self.lang = lang
|
self.lang = lang
|
||||||
|
self.thread = []
|
||||||
|
self.res = []
|
||||||
|
self.nbtag = 0
|
||||||
self.repub = re.compile(u'\((.*)\)')
|
self.repub = re.compile(u'\((.*)\)')
|
||||||
self.rerat = re.compile(u'([0-9.]+)')
|
self.rerat = re.compile(u'([0-9.]+)')
|
||||||
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
|
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
|
||||||
@ -499,20 +470,72 @@ class ResultList(object):
|
|||||||
report(verbose)
|
report(verbose)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def populate(self, entries, br, verbose=False):
|
def fetchdatathread(self, qbr, qsync, nb, url, verbose):
|
||||||
res = []
|
try:
|
||||||
for x in entries:
|
browser = qbr.get(True)
|
||||||
entry = self.get_individual_metadata(x, br, verbose)
|
entry = self.get_individual_metadata(url, browser, verbose)
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
entry = None
|
||||||
|
finally:
|
||||||
|
qbr.put(browser, True)
|
||||||
|
qsync.put(nb, True)
|
||||||
|
return entry
|
||||||
|
|
||||||
|
def producer(self, sync, urls, br, verbose=False):
|
||||||
|
for i in xrange(len(urls)):
|
||||||
|
thread = ThreadwithResults(self.fetchdatathread, br, sync,
|
||||||
|
i, urls[i], verbose)
|
||||||
|
thread.start()
|
||||||
|
self.thread.append(thread)
|
||||||
|
|
||||||
|
def consumer(self, sync, syncbis, br, total_entries, verbose=False):
|
||||||
|
i=0
|
||||||
|
while i < total_entries:
|
||||||
|
nb = int(sync.get(True))
|
||||||
|
self.thread[nb].join()
|
||||||
|
entry = self.thread[nb].get_result()
|
||||||
|
i+=1
|
||||||
if entry is not None:
|
if entry is not None:
|
||||||
mi = self.fill_MI(entry, verbose)
|
mi = self.fill_MI(entry, verbose)
|
||||||
if mi is not None:
|
if mi is not None:
|
||||||
mi.tags, atag = self.get_tags(entry, verbose)
|
mi.tags, atag = self.get_tags(entry, verbose)
|
||||||
|
self.res[nb] = mi
|
||||||
if atag:
|
if atag:
|
||||||
tags = self.get_individual_metadata(mi.tags, br, verbose)
|
threadbis = ThreadwithResults(self.fetchdatathread,
|
||||||
|
br, syncbis, nb, mi.tags, verbose)
|
||||||
|
self.thread[nb] = threadbis
|
||||||
|
self.nbtag +=1
|
||||||
|
threadbis.start()
|
||||||
|
|
||||||
|
def populate(self, entries, ibr, verbose=False, brcall=3):
|
||||||
|
br = Queue(brcall)
|
||||||
|
cbr = Queue(brcall-1)
|
||||||
|
|
||||||
|
syncp = Queue(1)
|
||||||
|
syncc = Queue(len(entries))
|
||||||
|
|
||||||
|
for i in xrange(brcall-1):
|
||||||
|
br.put(browser(), True)
|
||||||
|
cbr.put(browser(), True)
|
||||||
|
br.put(ibr, True)
|
||||||
|
|
||||||
|
self.res = [None]*len(entries)
|
||||||
|
|
||||||
|
prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
|
||||||
|
cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
|
||||||
|
prod_thread.start()
|
||||||
|
cons_thread.start()
|
||||||
|
prod_thread.join()
|
||||||
|
cons_thread.join()
|
||||||
|
|
||||||
|
#finish processing
|
||||||
|
for i in xrange(self.nbtag):
|
||||||
|
nb = int(syncc.get(True))
|
||||||
|
tags = self.thread[nb].get_result()
|
||||||
if tags is not None:
|
if tags is not None:
|
||||||
mi.tags = self.get_tags(tags, verbose)[0]
|
self.res[nb].tags = self.get_tags(tags, verbose)[0]
|
||||||
res.append(mi)
|
return self.res
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def search(title=None, author=None, publisher=None, isbn=None,
|
def search(title=None, author=None, publisher=None, isbn=None,
|
||||||
@ -561,7 +584,7 @@ def option_parser():
|
|||||||
%prog [options]
|
%prog [options]
|
||||||
|
|
||||||
Fetch book metadata from Amazon. You must specify one of title, author,
|
Fetch book metadata from Amazon. You must specify one of title, author,
|
||||||
ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
|
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
|
||||||
so you should make your query as specific as possible.
|
so you should make your query as specific as possible.
|
||||||
You can chose the language for metadata retrieval:
|
You can chose the language for metadata retrieval:
|
||||||
english & french & german
|
english & french & german
|
||||||
|
Loading…
x
Reference in New Issue
Block a user