Fix threading in amazon

This commit is contained in:
Sengian 2010-12-12 11:57:00 +01:00
parent ae781ae614
commit 5c89b576e3

View File

@ -4,6 +4,7 @@ __copyright__ = '2010, sengian <sengian1@gmail.com>'
import sys, textwrap, re, traceback, socket
from threading import Thread
from Queue import Queue
from urllib import urlencode
from math import ceil
@ -21,57 +22,6 @@ from calibre.utils.config import OptionParser
from calibre.library.comments import sanitize_comments_html
# class AmazonFr(MetadataSource):
# name = 'Amazon French'
# description = _('Downloads metadata from amazon.fr')
# supported_platforms = ['windows', 'osx', 'linux']
# author = 'Sengian'
# version = (1, 0, 0)
# has_html_comments = True
# def fetch(self):
# try:
# self.results = search(self.title, self.book_author, self.publisher,
# self.isbn, max_results=10, verbose=self.verbose, lang='fr')
# except Exception, e:
# self.exception = e
# self.tb = traceback.format_exc()
# class AmazonEs(MetadataSource):
# name = 'Amazon Spanish'
# description = _('Downloads metadata from amazon.com in spanish')
# supported_platforms = ['windows', 'osx', 'linux']
# author = 'Sengian'
# version = (1, 0, 0)
# has_html_comments = True
# def fetch(self):
# try:
# self.results = search(self.title, self.book_author, self.publisher,
# self.isbn, max_results=10, verbose=self.verbose, lang='es')
# except Exception, e:
# self.exception = e
# self.tb = traceback.format_exc()
# class AmazonDe(MetadataSource):
# name = 'Amazon German'
# description = _('Downloads metadata from amazon.de')
# supported_platforms = ['windows', 'osx', 'linux']
# author = 'Sengian'
# version = (1, 0, 0)
# has_html_comments = True
# def fetch(self):
# try:
# self.results = search(self.title, self.book_author, self.publisher,
# self.isbn, max_results=10, verbose=self.verbose, lang='de')
# except Exception, e:
# self.exception = e
# self.tb = traceback.format_exc()
class Amazon(MetadataSource):
name = 'Amazon'
@ -83,8 +33,33 @@ class Amazon(MetadataSource):
def fetch(self):
try:
self.results = search(self.title, self.book_author, self.publisher,
lang = get_lang()
lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
if lang == 'all':
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=5, verbose=self.verbose, lang='all')
else:
tmploc = ThreadwithResults(search, self.title, self.book_author,
self.publisher,self.isbn, max_results=5,
verbose=self.verbose, lang=lang)
tmpnoloc = ThreadwithResults(search, self.title, self.book_author,
self.publisher, self.isbn, max_results=5,
verbose=self.verbose, lang='all')
tmploc.start()
tmpnoloc.start()
tmploc.join()
tmpnoloc.join()
tmploc= tmploc.get_result()
tmpnoloc= tmpnoloc.get_result()
tempres = None
if tmpnoloc is not None:
tempres = tmpnoloc
if tmploc is not None:
tempres = tmploc
if tmpnoloc is not None:
tempres.extend(tmpnoloc)
self.results = tmpres
except Exception, e:
self.exception = e
self.tb = traceback.format_exc()
@ -107,12 +82,12 @@ class AmazonSocial(MetadataSource):
lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
if lang == 'all':
self.results = get_social_metadata(self.title, self.book_author, self.publisher,
self.isbn, verbose=self.verbose, lang='all')[0]
self.isbn, verbose=self.verbose, lang='all')[0]
else:
tmploc = ThreadwithResults(AmazonError, self.verbose, get_social_metadata, self.title,
self.book_author, self.publisher,self.isbn, verbose=self.verbose, lang=lang)
tmpnoloc = ThreadwithResults(AmazonError, self.verbose, get_social_metadata, self.title,
self.book_author, self.publisher, self.isbn, verbose=self.verbose, lang='all')
tmploc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
self.publisher,self.isbn, verbose=self.verbose, lang=lang)
tmpnoloc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
self.publisher, self.isbn, verbose=self.verbose, lang='all')
tmploc.start()
tmpnoloc.start()
tmploc.join()
@ -123,15 +98,13 @@ class AmazonSocial(MetadataSource):
tmpnoloc= tmpnoloc.get_result()
if tmpnoloc is not None:
tmpnoloc = tmpnoloc[0]
print tmpnoloc
if tmploc is not None and tmpnoloc is not None:
if tmploc.rating is None:
tmploc.rating = tmpnoloc.rating
if tmploc.comments is not None:
tmploc.comments = tmpnoloc.comments
if tmploc.tags is None:
tmploc.tags = tmpnoloc.tags
if tmpnoloc is not None:
if tmploc.rating is None:
tmploc.rating = tmpnoloc.rating
if tmploc.comments is not None:
tmploc.comments = tmpnoloc.comments
if tmploc.tags is None:
tmploc.tags = tmpnoloc.tags
self.results = tmploc
except Exception, e:
self.exception = e
@ -146,12 +119,10 @@ class AmazonError(Exception):
pass
class ThreadwithResults(Thread):
def __init__(self, error, verb, func, *args, **kargs):
def __init__(self, func, *args, **kargs):
self.func = func
self.args = args
self.kargs = kargs
self.verbose = verb
self.ex = error
self.result = None
Thread.__init__(self)
@ -159,11 +130,8 @@ class ThreadwithResults(Thread):
return self.result
def run(self):
try:
self.result = self.func(*self.args, **self.kargs)
except Exception, e:
report(self.verbose)
raise self.ex(_('An error was encountered in the function threading'))
self.result = self.func(*self.args, **self.kargs)
class Query(object):
@ -172,10 +140,10 @@ class Query(object):
BASE_URL_DE = 'http://www.amazon.de'
def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
max_results=10, rlang='all'):
max_results=20, rlang='all'):
assert not(title is None and author is None and publisher is None \
and isbn is None and keywords is None)
assert (max_results < 11)
assert (max_results < 21)
self.max_results = int(max_results)
self.renbres = re.compile(u'\s*([0-9.,]+)\s*')
@ -304,6 +272,9 @@ class ResultList(object):
def __init__(self, baseurl, lang = 'all'):
self.baseurl = baseurl
self.lang = lang
self.thread = []
self.res = []
self.nbtag = 0
self.repub = re.compile(u'\((.*)\)')
self.rerat = re.compile(u'([0-9.]+)')
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
@ -499,20 +470,72 @@ class ResultList(object):
report(verbose)
return None
def populate(self, entries, br, verbose=False):
res = []
for x in entries:
entry = self.get_individual_metadata(x, br, verbose)
def fetchdatathread(self, qbr, qsync, nb, url, verbose):
try:
browser = qbr.get(True)
entry = self.get_individual_metadata(url, browser, verbose)
except:
report(verbose)
entry = None
finally:
qbr.put(browser, True)
qsync.put(nb, True)
return entry
def producer(self, sync, urls, br, verbose=False):
for i in xrange(len(urls)):
thread = ThreadwithResults(self.fetchdatathread, br, sync,
i, urls[i], verbose)
thread.start()
self.thread.append(thread)
def consumer(self, sync, syncbis, br, total_entries, verbose=False):
i=0
while i < total_entries:
nb = int(sync.get(True))
self.thread[nb].join()
entry = self.thread[nb].get_result()
i+=1
if entry is not None:
mi = self.fill_MI(entry, verbose)
if mi is not None:
mi.tags, atag = self.get_tags(entry, verbose)
self.res[nb] = mi
if atag:
tags = self.get_individual_metadata(mi.tags, br, verbose)
if tags is not None:
mi.tags = self.get_tags(tags, verbose)[0]
res.append(mi)
return res
threadbis = ThreadwithResults(self.fetchdatathread,
br, syncbis, nb, mi.tags, verbose)
self.thread[nb] = threadbis
self.nbtag +=1
threadbis.start()
def populate(self, entries, ibr, verbose=False, brcall=3):
br = Queue(brcall)
cbr = Queue(brcall-1)
syncp = Queue(1)
syncc = Queue(len(entries))
for i in xrange(brcall-1):
br.put(browser(), True)
cbr.put(browser(), True)
br.put(ibr, True)
self.res = [None]*len(entries)
prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
prod_thread.start()
cons_thread.start()
prod_thread.join()
cons_thread.join()
#finish processing
for i in xrange(self.nbtag):
nb = int(syncc.get(True))
tags = self.thread[nb].get_result()
if tags is not None:
self.res[nb].tags = self.get_tags(tags, verbose)[0]
return self.res
def search(title=None, author=None, publisher=None, isbn=None,
@ -561,7 +584,7 @@ def option_parser():
%prog [options]
Fetch book metadata from Amazon. You must specify one of title, author,
ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
so you should make your query as specific as possible.
You can chose the language for metadata retrieval:
english & french & german