mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Threading optimisation (last I hope), now faster than light at least pratchett's for amazon
This commit is contained in:
parent
81af8382d6
commit
99921673d6
@ -121,20 +121,6 @@ def report(verbose):
|
||||
class AmazonError(Exception):
|
||||
pass
|
||||
|
||||
class ThreadwithResults(Thread):
|
||||
def __init__(self, func, *args, **kargs):
|
||||
self.func = func
|
||||
self.args = args
|
||||
self.kargs = kargs
|
||||
self.result = None
|
||||
Thread.__init__(self)
|
||||
|
||||
def get_result(self):
|
||||
return self.result
|
||||
|
||||
def run(self):
|
||||
self.result = self.func(*self.args, **self.kargs)
|
||||
|
||||
|
||||
class Query(object):
|
||||
|
||||
@ -269,14 +255,11 @@ class Query(object):
|
||||
for i in x.xpath("//a/span[@class='srTitle']")])
|
||||
return results[:self.max_results], self.baseurl
|
||||
|
||||
class ResultList(object):
|
||||
class ResultList(list):
|
||||
|
||||
def __init__(self, baseurl, lang = 'all'):
|
||||
self.baseurl = baseurl
|
||||
self.lang = lang
|
||||
self.thread = []
|
||||
self.res = []
|
||||
self.nbtag = 0
|
||||
self.repub = re.compile(u'\((.*)\)')
|
||||
self.rerat = re.compile(u'([0-9.]+)')
|
||||
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
|
||||
@ -484,63 +467,65 @@ class ResultList(object):
|
||||
entry = None
|
||||
finally:
|
||||
qbr.put(browser, True)
|
||||
qsync.put(nb, True)
|
||||
return entry
|
||||
qsync.put((nb, entry), True)
|
||||
|
||||
def producer(self, sync, urls, br, verbose=False):
|
||||
for i in xrange(len(urls)):
|
||||
thread = ThreadwithResults(self.fetchdatathread, br, sync,
|
||||
i, urls[i], verbose)
|
||||
thread = Thread(target=self.fetchdatathread,
|
||||
args=(br, sync, i, urls[i], verbose))
|
||||
thread.start()
|
||||
self.thread.append(thread)
|
||||
|
||||
def consumer(self, sync, syncbis, br, total_entries, verbose=False):
|
||||
i=0
|
||||
self.extend([None]*total_entries)
|
||||
while i < total_entries:
|
||||
nb = int(sync.get(True))
|
||||
self.thread[nb].join()
|
||||
entry = self.thread[nb].get_result()
|
||||
rq = sync.get(True)
|
||||
nb = int(rq[0])
|
||||
entry = rq[1]
|
||||
i+=1
|
||||
if entry is not None:
|
||||
mi = self.fill_MI(entry, verbose)
|
||||
if mi is not None:
|
||||
mi.tags, atag = self.get_tags(entry, verbose)
|
||||
self.res[nb] = mi
|
||||
self[nb] = mi
|
||||
if atag:
|
||||
threadbis = ThreadwithResults(self.fetchdatathread,
|
||||
br, syncbis, nb, mi.tags, verbose)
|
||||
self.thread[nb] = threadbis
|
||||
self.nbtag +=1
|
||||
threadbis.start()
|
||||
thread = Thread(target=self.fetchdatathread,
|
||||
args=(br, syncbis, nb, mi.tags, verbose))
|
||||
thread.start()
|
||||
else:
|
||||
syncbis.put((nb, None), True)
|
||||
|
||||
def final(self, sync, total_entries, verbose):
|
||||
i=0
|
||||
while i < total_entries:
|
||||
rq = sync.get(True)
|
||||
nb = int(rq[0])
|
||||
tags = rq[1]
|
||||
i+=1
|
||||
if tags is not None:
|
||||
self[nb].tags = self.get_tags(tags, verbose)[0]
|
||||
|
||||
def populate(self, entries, ibr, verbose=False, brcall=3):
|
||||
br = Queue(brcall)
|
||||
cbr = Queue(brcall-1)
|
||||
|
||||
syncp = Queue(1)
|
||||
syncc = Queue(len(entries))
|
||||
syncc = Queue(1)
|
||||
|
||||
for i in xrange(brcall-1):
|
||||
br.put(browser(), True)
|
||||
cbr.put(browser(), True)
|
||||
br.put(ibr, True)
|
||||
|
||||
self.res = [None]*len(entries)
|
||||
|
||||
prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
|
||||
cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
|
||||
fin_thread = Thread(target=self.final, args=(syncc, len(entries), verbose))
|
||||
prod_thread.start()
|
||||
cons_thread.start()
|
||||
fin_thread.start()
|
||||
prod_thread.join()
|
||||
cons_thread.join()
|
||||
|
||||
#finish processing
|
||||
for i in xrange(self.nbtag):
|
||||
nb = int(syncc.get(True))
|
||||
tags = self.thread[nb].get_result()
|
||||
if tags is not None:
|
||||
self.res[nb].tags = self.get_tags(tags, verbose)[0]
|
||||
return self.res
|
||||
fin_thread.join()
|
||||
|
||||
|
||||
def search(title=None, author=None, publisher=None, isbn=None,
|
||||
@ -554,7 +539,8 @@ def search(title=None, author=None, publisher=None, isbn=None,
|
||||
|
||||
#List of entry
|
||||
ans = ResultList(baseurl, lang)
|
||||
return [x for x in ans.populate(entries, br, verbose) if x is not None]
|
||||
ans.populate(entries, br, verbose)
|
||||
return [x for x in ans if x is not None]
|
||||
|
||||
def get_social_metadata(title, authors, publisher, isbn, verbose=False,
|
||||
max_results=1, lang='all'):
|
||||
|
@ -41,20 +41,6 @@ class Fictionwise(MetadataSource):
|
||||
class FictionwiseError(Exception):
|
||||
pass
|
||||
|
||||
class ThreadwithResults(Thread):
|
||||
def __init__(self, func, *args, **kargs):
|
||||
self.func = func
|
||||
self.args = args
|
||||
self.kargs = kargs
|
||||
self.result = None
|
||||
Thread.__init__(self)
|
||||
|
||||
def get_result(self):
|
||||
return self.result
|
||||
|
||||
def run(self):
|
||||
self.result = self.func(*self.args, **self.kargs)
|
||||
|
||||
def report(verbose):
|
||||
if verbose:
|
||||
import traceback
|
||||
@ -155,7 +141,6 @@ class ResultList(list):
|
||||
|
||||
def __init__(self, islink):
|
||||
self.islink = islink
|
||||
self.thread = []
|
||||
self.retitle = re.compile(r'\[[^\[\]]+\]')
|
||||
self.rechkauth = re.compile(r'.*book\s*by', re.I)
|
||||
self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
|
||||
@ -361,27 +346,21 @@ class ResultList(list):
|
||||
entry = None
|
||||
finally:
|
||||
qbr.put(browser, True)
|
||||
qsync.put(nb, True)
|
||||
return entry
|
||||
qsync.put((nb, entry), True)
|
||||
|
||||
def producer(self, sync, urls, br, verbose=False):
|
||||
for i in xrange(len(urls)):
|
||||
thread = ThreadwithResults(self.fetchdatathread, br, sync,
|
||||
i, self.BASE_URL+urls[i], verbose)
|
||||
thread = Thread(target=self.fetchdatathread,
|
||||
args=(br, sync, i, self.BASE_URL+urls[i], verbose))
|
||||
thread.start()
|
||||
self.thread.append(thread)
|
||||
|
||||
def consumer(self, sync, total_entries, verbose=False):
|
||||
res=[None]*total_entries
|
||||
self.extend([None]*total_entries)
|
||||
i=0
|
||||
while i < total_entries:
|
||||
nb = int(sync.get(True))
|
||||
self.thread[nb].join()
|
||||
entry = self.thread[nb].get_result()
|
||||
rq = sync.get(True)
|
||||
self[int(rq[0])] = self.fill_MI(rq[1], verbose)
|
||||
i+=1
|
||||
if entry is not None:
|
||||
res[nb] = self.fill_MI(entry, verbose)
|
||||
return res
|
||||
|
||||
def populate(self, entries, br, verbose=False, brcall=3):
|
||||
if not self.islink:
|
||||
@ -396,12 +375,11 @@ class ResultList(list):
|
||||
pbr.put(br, True)
|
||||
|
||||
prod_thread = Thread(target=self.producer, args=(sync, entries, pbr, verbose))
|
||||
cons_thread = ThreadwithResults(self.consumer, sync, len(entries), verbose)
|
||||
cons_thread = Thread(target=self.consumer, args=(sync, len(entries), verbose))
|
||||
prod_thread.start()
|
||||
cons_thread.start()
|
||||
prod_thread.join()
|
||||
cons_thread.join()
|
||||
self.extend(cons_thread.get_result())
|
||||
|
||||
|
||||
def search(title=None, author=None, publisher=None, isbn=None,
|
||||
|
@ -61,20 +61,6 @@ class GoogleBooks(MetadataSource):
|
||||
class GoogleBooksError(Exception):
|
||||
pass
|
||||
|
||||
class ThreadwithResults(Thread):
|
||||
def __init__(self, func, *args, **kargs):
|
||||
self.func = func
|
||||
self.args = args
|
||||
self.kargs = kargs
|
||||
self.result = None
|
||||
Thread.__init__(self)
|
||||
|
||||
def get_result(self):
|
||||
return self.result
|
||||
|
||||
def run(self):
|
||||
self.result = self.func(*self.args, **self.kargs)
|
||||
|
||||
def report(verbose):
|
||||
if verbose:
|
||||
import traceback
|
||||
@ -173,8 +159,6 @@ class Query(object):
|
||||
return entries
|
||||
|
||||
class ResultList(list):
|
||||
def __init__(self):
|
||||
self.thread = []
|
||||
|
||||
def get_description(self, entry, verbose):
|
||||
try:
|
||||
@ -206,8 +190,7 @@ class ResultList(list):
|
||||
return val
|
||||
|
||||
def get_identifiers(self, entry, mi):
|
||||
isbns = [str(x.text).strip() for x in identifier(entry)]
|
||||
isbns = [t[5:] for t in isbns \
|
||||
isbns = [t[5:] for t in [str(x.text).strip() for x in identifier(entry)] \
|
||||
if t[:5].upper() == 'ISBN:' and check_isbn(t[5:])]
|
||||
# for x in identifier(entry):
|
||||
# t = str(x.text).strip()
|
||||
@ -309,8 +292,7 @@ class ResultList(list):
|
||||
entry = None
|
||||
finally:
|
||||
qbr.put(browser, True)
|
||||
qsync.put(nb, True)
|
||||
return entry
|
||||
qsync.put((nb, entry), True)
|
||||
|
||||
def producer(self, sync, entries, br, verbose=False):
|
||||
for i in xrange(len(entries)):
|
||||
@ -319,21 +301,18 @@ class ResultList(list):
|
||||
except:
|
||||
id_url = None
|
||||
report(verbose)
|
||||
thread = ThreadwithResults(self.fetchdatathread, br, sync,
|
||||
i, id_url, verbose)
|
||||
thread = Thread(target=self.fetchdatathread,
|
||||
args=(br, sync, i, id_url, verbose))
|
||||
thread.start()
|
||||
self.thread.append(thread)
|
||||
|
||||
def consumer(self, entries, sync, total_entries, verbose=False):
|
||||
res=[None]*total_entries #remove?
|
||||
self.extend([None]*total_entries)
|
||||
i=0
|
||||
while i < total_entries:
|
||||
nb = int(sync.get(True))
|
||||
self.thread[nb].join()
|
||||
data = self.thread[nb].get_result()
|
||||
res[nb] = self.fill_MI(entries[nb], data, verbose)
|
||||
rq = sync.get(True)
|
||||
nb = int(rq[0])
|
||||
self[nb] = self.fill_MI(entries[nb], rq[1], verbose)
|
||||
i+=1
|
||||
return res
|
||||
|
||||
def populate(self, entries, br, verbose=False, brcall=3):
|
||||
pbr = Queue(brcall)
|
||||
@ -343,12 +322,11 @@ class ResultList(list):
|
||||
pbr.put(br, True)
|
||||
|
||||
prod_thread = Thread(target=self.producer, args=(sync, entries, pbr, verbose))
|
||||
cons_thread = ThreadwithResults(self.consumer, entries, sync, len(entries), verbose)
|
||||
cons_thread = Thread(target=self.consumer, args=(entries, sync, len(entries), verbose))
|
||||
prod_thread.start()
|
||||
cons_thread.start()
|
||||
prod_thread.join()
|
||||
cons_thread.join()
|
||||
self.extend(cons_thread.get_result())
|
||||
|
||||
|
||||
def search(title=None, author=None, publisher=None, isbn=None,
|
||||
|
@ -82,20 +82,6 @@ class NiceBooksError(Exception):
|
||||
class ISBNNotFound(NiceBooksError):
|
||||
pass
|
||||
|
||||
class ThreadwithResults(Thread):
|
||||
def __init__(self, func, *args, **kargs):
|
||||
self.func = func
|
||||
self.args = args
|
||||
self.kargs = kargs
|
||||
self.result = None
|
||||
Thread.__init__(self)
|
||||
|
||||
def get_result(self):
|
||||
return self.result
|
||||
|
||||
def run(self):
|
||||
self.result = self.func(*self.args, **self.kargs)
|
||||
|
||||
def report(verbose):
|
||||
if verbose:
|
||||
import traceback
|
||||
@ -191,7 +177,6 @@ class ResultList(list):
|
||||
|
||||
def __init__(self, islink):
|
||||
self.islink = islink
|
||||
self.thread = []
|
||||
self.repub = re.compile(u'\s*.diteur\s*', re.I)
|
||||
self.reauteur = re.compile(u'\s*auteur.*', re.I)
|
||||
self.reautclean = re.compile(u'\s*\(.*\)\s*')
|
||||
@ -302,27 +287,21 @@ class ResultList(list):
|
||||
entry = None
|
||||
finally:
|
||||
qbr.put(browser, True)
|
||||
qsync.put(nb, True)
|
||||
return entry
|
||||
qsync.put((nb, entry), True)
|
||||
|
||||
def producer(self, sync, urls, br, verbose=False):
|
||||
for i in xrange(len(urls)):
|
||||
thread = ThreadwithResults(self.fetchdatathread, br, sync,
|
||||
i, self.BASE_URL+urls[i], verbose)
|
||||
thread = Thread(target=self.fetchdatathread,
|
||||
args=(br, sync, i, self.BASE_URL+urls[i], verbose))
|
||||
thread.start()
|
||||
self.thread.append(thread)
|
||||
|
||||
def consumer(self, sync, total_entries, verbose=False):
|
||||
res=[None]*total_entries
|
||||
self.extend([None]*total_entries)
|
||||
i=0
|
||||
while i < total_entries:
|
||||
nb = int(sync.get(True))
|
||||
self.thread[nb].join()
|
||||
entry = self.thread[nb].get_result()
|
||||
rq = sync.get(True)
|
||||
self[int(rq[0])] = self.fill_MI(rq[1], verbose)
|
||||
i+=1
|
||||
if entry is not None:
|
||||
res[nb] = self.fill_MI(entry, verbose)
|
||||
return res
|
||||
|
||||
def populate(self, entries, br, verbose=False, brcall=3):
|
||||
if not self.islink:
|
||||
@ -337,12 +316,11 @@ class ResultList(list):
|
||||
pbr.put(br, True)
|
||||
|
||||
prod_thread = Thread(target=self.producer, args=(sync, entries, pbr, verbose))
|
||||
cons_thread = ThreadwithResults(self.consumer, sync, len(entries), verbose)
|
||||
cons_thread = Thread(target=self.consumer, args=(sync, len(entries), verbose))
|
||||
prod_thread.start()
|
||||
cons_thread.start()
|
||||
prod_thread.join()
|
||||
cons_thread.join()
|
||||
self.extend(cons_thread.get_result())
|
||||
|
||||
class Covers(object):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user