Fix threading in amazon

2025-07-09 03:04:10 -04:00 · 2010-12-12 11:57:00 +01:00 · 2010-12-12 11:57:00 +01:00 · 5c89b576e3
commit 5c89b576e3
parent ae781ae614
1 changed files with 109 additions and 86 deletions
--- a/src/calibre/ebooks/metadata/amazonbis.py
+++ b/src/calibre/ebooks/metadata/amazonbis.py
@ -4,6 +4,7 @@ __copyright__ = '2010, sengian <sengian1@gmail.com>'
 import sys, textwrap, re, traceback, socket
 from threading import Thread
 from Queue import Queue
 from urllib import urlencode
 from math import ceil
@ -21,57 +22,6 @@ from calibre.utils.config import OptionParser
 from calibre.library.comments import sanitize_comments_html
 # class AmazonFr(MetadataSource):
    # name = 'Amazon French'
    # description = _('Downloads metadata from amazon.fr')
    # supported_platforms = ['windows', 'osx', 'linux']
    # author = 'Sengian'
    # version = (1, 0, 0)
    # has_html_comments = True
    # def fetch(self):
        # try:
            # self.results = search(self.title, self.book_author, self.publisher,
                                  # self.isbn, max_results=10, verbose=self.verbose, lang='fr')
        # except Exception, e:
            # self.exception = e
            # self.tb = traceback.format_exc()
 # class AmazonEs(MetadataSource):
    # name = 'Amazon Spanish'
    # description = _('Downloads metadata from amazon.com in spanish')
    # supported_platforms = ['windows', 'osx', 'linux']
    # author = 'Sengian'
    # version = (1, 0, 0)
    # has_html_comments = True
    # def fetch(self):
        # try:
            # self.results = search(self.title, self.book_author, self.publisher,
                                  # self.isbn, max_results=10, verbose=self.verbose, lang='es')
        # except Exception, e:
            # self.exception = e
            # self.tb = traceback.format_exc()
 # class AmazonDe(MetadataSource):
    # name = 'Amazon German'
    # description = _('Downloads metadata from amazon.de')
    # supported_platforms = ['windows', 'osx', 'linux']
    # author = 'Sengian'
    # version = (1, 0, 0)
    # has_html_comments = True
    # def fetch(self):
        # try:
            # self.results = search(self.title, self.book_author, self.publisher,
                                  # self.isbn, max_results=10, verbose=self.verbose, lang='de')
        # except Exception, e:
            # self.exception = e
            # self.tb = traceback.format_exc()
 class Amazon(MetadataSource):
    name = 'Amazon'
@ -83,8 +33,33 @@ class Amazon(MetadataSource):
    def fetch(self):
        try:
            lang = get_lang()
            lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
            if lang == 'all':
                self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=5, verbose=self.verbose, lang='all')
            else:
                tmploc = ThreadwithResults(search, self.title, self.book_author, 
                                self.publisher,self.isbn, max_results=5,
                                    verbose=self.verbose, lang=lang)
                tmpnoloc = ThreadwithResults(search, self.title, self.book_author,
                                self.publisher, self.isbn, max_results=5,
                                    verbose=self.verbose, lang='all')
                tmploc.start()
                tmpnoloc.start()
                tmploc.join()
                tmpnoloc.join()
                tmploc= tmploc.get_result()
                tmpnoloc= tmpnoloc.get_result()
                tempres = None
                if tmpnoloc is not None:
                    tempres = tmpnoloc
                if tmploc is not None:
                    tempres = tmploc
                    if tmpnoloc is not None:
                        tempres.extend(tmpnoloc)
                self.results = tmpres
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
@ -109,10 +84,10 @@ class AmazonSocial(MetadataSource):
                self.results = get_social_metadata(self.title, self.book_author, self.publisher,
                                    self.isbn, verbose=self.verbose, lang='all')[0]
            else:
-                tmploc = ThreadwithResults(AmazonError, self.verbose, get_social_metadata, self.title,
+                tmploc = ThreadwithResults(get_social_metadata, self.title, self.book_author, 
-                            self.book_author, self.publisher,self.isbn, verbose=self.verbose, lang=lang)
+                                    self.publisher,self.isbn, verbose=self.verbose, lang=lang)
-                tmpnoloc = ThreadwithResults(AmazonError, self.verbose, get_social_metadata, self.title,
+                tmpnoloc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
-                            self.book_author, self.publisher, self.isbn, verbose=self.verbose, lang='all')
+                                    self.publisher, self.isbn, verbose=self.verbose, lang='all')
                tmploc.start()
                tmpnoloc.start()
                tmploc.join()
@ -123,9 +98,7 @@ class AmazonSocial(MetadataSource):
                tmpnoloc= tmpnoloc.get_result()
                if tmpnoloc is not None:
                    tmpnoloc = tmpnoloc[0]
-                print tmpnoloc
+                    if tmpnoloc is not None:
                if tmploc is not None and tmpnoloc is not None:
                        if tmploc.rating is None:
                            tmploc.rating = tmpnoloc.rating
                        if tmploc.comments is not None:
@ -146,12 +119,10 @@ class AmazonError(Exception):
    pass
 class ThreadwithResults(Thread):
-    def __init__(self, error, verb, func, *args, **kargs):
+    def __init__(self, func, *args, **kargs):
        self.func = func
        self.args = args
        self.kargs = kargs
        self.verbose = verb
        self.ex = error
        self.result = None
        Thread.__init__(self)
@ -159,11 +130,8 @@ class ThreadwithResults(Thread):
        return self.result
    def run(self):
        try:
        self.result = self.func(*self.args, **self.kargs)
-        except Exception, e:
+
            report(self.verbose)
            raise self.ex(_('An error was encountered in the function threading'))
 class Query(object):
@ -172,10 +140,10 @@ class Query(object):
    BASE_URL_DE = 'http://www.amazon.de'
    def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
-        max_results=10, rlang='all'):
+        max_results=20, rlang='all'):
        assert not(title is None and author is None and publisher is None \
            and isbn is None and keywords is None)
-        assert (max_results < 11)
+        assert (max_results < 21)
        self.max_results = int(max_results)
        self.renbres = re.compile(u'\s*([0-9.,]+)\s*')
@ -304,6 +272,9 @@ class ResultList(object):
    def __init__(self, baseurl, lang = 'all'):
        self.baseurl = baseurl
        self.lang = lang
        self.thread = []
        self.res = []
        self.nbtag = 0
        self.repub = re.compile(u'\((.*)\)')
        self.rerat = re.compile(u'([0-9.]+)')
        self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
@ -499,20 +470,72 @@ class ResultList(object):
                report(verbose)
                return None
-    def populate(self, entries, br, verbose=False):
+    def fetchdatathread(self, qbr, qsync, nb, url, verbose):
-        res = []
+        try:
-        for x in entries:
+            browser = qbr.get(True)
-            entry = self.get_individual_metadata(x, br, verbose)
+            entry = self.get_individual_metadata(url, browser, verbose)
        except:
            report(verbose)
            entry = None
        finally:
            qbr.put(browser, True)
            qsync.put(nb, True)
            return entry
    def producer(self, sync, urls, br, verbose=False):
        for i in xrange(len(urls)):
            thread = ThreadwithResults(self.fetchdatathread, br, sync,
                                            i, urls[i], verbose)
            thread.start()
            self.thread.append(thread)
    def consumer(self, sync, syncbis, br, total_entries, verbose=False):
        i=0
        while i < total_entries:
            nb = int(sync.get(True))
            self.thread[nb].join()
            entry = self.thread[nb].get_result()
            i+=1
            if entry is not None:
                mi = self.fill_MI(entry, verbose)
                if mi is not None:
                    mi.tags, atag = self.get_tags(entry, verbose)
                    self.res[nb] = mi
                    if atag:
-                        tags = self.get_individual_metadata(mi.tags, br, verbose)
+                        threadbis = ThreadwithResults(self.fetchdatathread,
                                        br, syncbis, nb, mi.tags, verbose)
                        self.thread[nb] = threadbis
                        self.nbtag +=1
                        threadbis.start()
    def populate(self, entries, ibr, verbose=False, brcall=3):
        br = Queue(brcall)
        cbr = Queue(brcall-1)
        syncp = Queue(1)
        syncc = Queue(len(entries))
        for i in xrange(brcall-1):
            br.put(browser(), True)
            cbr.put(browser(), True)
        br.put(ibr, True)
        self.res = [None]*len(entries)
        prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
        cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
        prod_thread.start()
        cons_thread.start()
        prod_thread.join()
        cons_thread.join()
        #finish processing
        for i in xrange(self.nbtag):
            nb = int(syncc.get(True))
            tags = self.thread[nb].get_result()
            if tags is not None:
-                            mi.tags = self.get_tags(tags, verbose)[0]
+                self.res[nb].tags = self.get_tags(tags, verbose)[0]
-                    res.append(mi)
+        return self.res
        return res
 def search(title=None, author=None, publisher=None, isbn=None,
@ -561,7 +584,7 @@ def option_parser():
        %prog [options]
        Fetch book metadata from Amazon. You must specify one of title, author,
-        ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
+        ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
        so you should make your query as specific as possible.
        You can chose the language for metadata retrieval:
        english & french & german