Remove threading (no gain)

This commit is contained in:
Sengian 2010-12-11 18:22:33 +01:00
parent 8aa50c106e
commit 34c6caeeec

View File

@ -2,9 +2,7 @@ from __future__ import with_statement
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2010, sengian <sengian1@gmail.com>' __copyright__ = '2010, sengian <sengian1@gmail.com>'
import sys, textwrap, re, traceback import sys, textwrap, re, traceback, socket
from threading import Thread, Lock
from Queue import Queue
from urllib import urlencode from urllib import urlencode
from math import ceil from math import ceil
@ -108,10 +106,6 @@ class Amazon(MetadataSource):
self.exception = e self.exception = e
self.tb = traceback.format_exc() self.tb = traceback.format_exc()
# @property
# def string_customization_help(self):
# return _('You can select here the language for metadata search with amazon.com')
def report(verbose): def report(verbose):
if verbose: if verbose:
@ -120,56 +114,6 @@ def report(verbose):
class AmazonError(Exception): class AmazonError(Exception):
pass pass
class BrowserThread(Thread):
def __init__(self, url, qbr, qsync, nb, verbose=False, timeout=10., ex=Exception, name='Meta'):
self.url = url
self.ex = ex
self.qbr = qbr
self.qsync = qsync
self.nb = nb
self.plugname = name
self.verbose = verbose
self.timeout = timeout
self.result = None
self.br = browser()
Thread.__init__(self)
def get_result(self):
return self.result
def run(self):
try:
browser = self.qbr.get(True)
raw = self.br.open_novisit(self.url, timeout=self.timeout).read()
except Exception, e:
report(self.verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
self.result = None
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise self.ex(_('%s timed out. Try again later.') % self.plugname)
raise self.ex(_('%s encountered an error.') % self.plugname)
finally:
self.qbr.put(browser, True)
if '<title>404 - ' in raw:
report(self.verbose)
self.result = None
return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
self.result = soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
self.result = soupparser.fromstring(clean_ascii_chars(raw))
except:
self.result = None
finally:
self.qsync.put(self.nb, True)
class Query(object): class Query(object):
@ -305,14 +249,11 @@ class Query(object):
for i in x.xpath("//a/span[@class='srTitle']")]) for i in x.xpath("//a/span[@class='srTitle']")])
return results[:self.max_results], self.baseurl return results[:self.max_results], self.baseurl
class ResultList(object): class ResultList(list):
def __init__(self, baseurl, lang = 'all'): def __init__(self, baseurl, lang = 'all'):
self.baseurl = baseurl self.baseurl = baseurl
self.lang = lang self.lang = lang
self.thread = []
self.res = []
self.nbtag = 0
self.repub = re.compile(u'\((.*)\)') self.repub = re.compile(u'\((.*)\)')
self.rerat = re.compile(u'([0-9.]+)') self.rerat = re.compile(u'([0-9.]+)')
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>') self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
@ -482,61 +423,45 @@ class ResultList(object):
pass pass
return mi return mi
def producer(self, sync, data, br, verbose=False): def get_individual_metadata(self, url, br, verbose):
for i in xrange(len(data)): try:
thread = BrowserThread(data[i], br, sync, i, verbose=verbose, ex=AmazonError, raw = br.open_novisit(url).read()
name='Amazon') except Exception, e:
thread.start() report(verbose)
self.thread.append(thread) if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise AmazonError(_('Amazon timed out. Try again later.'))
raise AmazonError(_('Amazon encountered an error.'))
if '<title>404 - ' in raw:
report(verbose)
return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
return soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
return soupparser.fromstring(clean_ascii_chars(raw))
except:
report(verbose)
return None
def consumer(self, sync, syncbis, br, total_entries, verbose=False): def populate(self, entries, br, verbose=False):
i=0 #multiple entries
while i < total_entries: for x in entries:
nb = int(sync.get(True)) entry = self.get_individual_metadata(x, br, verbose)
entry = self.thread[nb].get_result()
i+=1
if entry is not None: if entry is not None:
mi = self.fill_MI(entry, verbose) mi = self.fill_MI(entry, verbose)
if mi is not None: if mi is not None:
mi.tags, atag = self.get_tags(entry, verbose) mi.tags, atag = self.get_tags(entry, verbose)
self.res[nb] = mi
if atag: if atag:
threadbis = BrowserThread(mi.tags, br, syncbis, nb, verbose=verbose, ex=AmazonError, tags = self.get_individual_metadata(mi.tags, br, verbose)
name='Amazon')
self.thread[nb] = threadbis
self.nbtag +=1
threadbis.start()
def populate(self, entries, ibr, verbose=False, brcall=3):
#multiple entries
br = Queue(brcall)
cbr = Queue(brcall-1)
syncp = Queue(1)
syncc = Queue(len(entries))
for i in xrange(brcall-1):
br.put(browser(), True)
cbr.put(browser(), True)
br.put(ibr, True)
self.res = [None]*len(entries)
prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
prod_thread.start()
cons_thread.start()
prod_thread.join()
cons_thread.join()
#finish processing
for i in xrange(self.nbtag):
nb = int(syncc.get(True))
tags = self.thread[nb].get_result()
if tags is not None: if tags is not None:
self.res[nb].tags = self.get_tags(tags, verbose)[0] mi.tags = self.get_tags(tags, verbose)[0]
self.append(mi)
return self.res
def search(title=None, author=None, publisher=None, isbn=None, def search(title=None, author=None, publisher=None, isbn=None,
@ -550,7 +475,8 @@ def search(title=None, author=None, publisher=None, isbn=None,
#List of entry #List of entry
ans = ResultList(baseurl, lang) ans = ResultList(baseurl, lang)
return [x for x in ans.populate(entries, br, verbose) if x is not None] ans.populate(entries, br, verbose)
return [x for x in ans if x is not None]
def option_parser(): def option_parser():
parser = OptionParser(textwrap.dedent(\ parser = OptionParser(textwrap.dedent(\
@ -599,6 +525,6 @@ if __name__ == '__main__':
# sys.exit(main()) # sys.exit(main())
import cProfile import cProfile
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()")) # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()"))
sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()", "profile_tmp_threading_1")) sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()", "profile_tmp_2"))
# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonfr.py" -m 5 -a gore -v>data.html # calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonfr.py" -m 5 -a gore -v>data.html