Remove threading from fictionwise

2025-07-09 03:04:10 -04:00 · 2010-12-11 22:41:37 +01:00 · 2010-12-11 22:41:37 +01:00 · b2004ad77b
commit b2004ad77b
parent d5bc18b5c2
1 changed files with 38 additions and 74 deletions
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@ -4,8 +4,6 @@ __copyright__ = '2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'

 import sys, textwrap, re, traceback, socket
-from threading import Thread
-from Queue import Queue
 from urllib import urlencode

 from lxml.html import soupparser, tostring
@ -20,7 +18,7 @@ from calibre.utils.config import OptionParser
 from calibre.utils.date import parse_date, utcnow
 from calibre.utils.cleantext import clean_ascii_chars, unescape

-class Fictionwise(MetadataSource): # {{{
+class Fictionwise(MetadataSource):

    author = 'Sengian'
    name = 'Fictionwise'
@ -36,51 +34,10 @@ class Fictionwise(MetadataSource): # {{{
            self.exception = e
            self.tb = traceback.format_exc()

-    # }}}

 class FictionwiseError(Exception):
    pass

-class BrowserThread(Thread):
-
-    def __init__(self, url, verbose=False, timeout=10., ex=Exception, name='Meta'):
-        self.url = url
-        self.ex = ex
-        self.plugname = name
-        self.verbose = verbose
-        self.timeout = timeout
-        self.result = None
-        Thread.__init__(self)
-
-    def get_result(self):
-        return self.result
-
-    def run(self):
-        try:
-            raw = browser().open_novisit(self.url, timeout=self.timeout).read()
-        except Exception, e:
-            report(self.verbose)
-            if callable(getattr(e, 'getcode', None)) and \
-                    e.getcode() == 404:
-                self.result = None
-            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
-                raise self.ex(_('%s timed out. Try again later.') % self.plugname)
-            raise self.ex(_('%s encountered an error.') % self.plugname)
-        if '<title>404 - ' in raw:
-            report(self.verbose)
-            self.result = None
-            return None
-        raw = xml_to_unicode(raw, strip_encoding_pats=True,
-                resolve_entities=True)[0]
-        try:
-            self.result = soupparser.fromstring(raw)
-        except:
-            try:
-                #remove ASCII invalid chars
-                self.result = soupparser.fromstring(clean_ascii_chars(raw))
-            except:
-                self.result = None
-
 def report(verbose):
    if verbose:
        traceback.print_exc()
@ -161,15 +118,16 @@ class Query(object):
        results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
        #return feed if no links ie normally a single book or nothing
        if not results:
-            results = [feed]
-        return results
+            return [feed], False
+        return results, True

 class ResultList(list):

    BASE_URL = 'http://www.fictionwise.com'
    COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}

-    def __init__(self):
+    def __init__(self, islink):
+        self.islink = islink
        self.retitle = re.compile(r'\[[^\[\]]+\]')
        self.rechkauth = re.compile(r'.*book\s*by', re.I)
        self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
@ -337,47 +295,53 @@ class ResultList(list):
            pass
        return mi

-    def producer(self, q, data, verbose=False):
-        for x in data:
-            thread = BrowserThread(self.BASE_URL+x, verbose=verbose, ex=FictionwiseError,
-                name='Fictionwise')
-            thread.start()
-            q.put(thread, True)
+    def get_individual_metadata(self, url, br, verbose):
+        try:
+            raw = br.open_novisit(url).read()
+        except Exception, e:
+            report(verbose)
+            if callable(getattr(e, 'getcode', None)) and \
+                    e.getcode() == 404:
+                return None
+            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+                raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
+            raise FictionwiseError(_('Fictionwise encountered an error.'))
+        if '<title>404 - ' in raw:
+            report(verbose)
+            return None
+        raw = xml_to_unicode(raw, strip_encoding_pats=True,
+                resolve_entities=True)[0]
+        try:
+            return soupparser.fromstring(raw)
+        except:
+            try:
+                #remove ASCII invalid chars
+                return soupparser.fromstring(clean_ascii_chars(raw))
+            except:
+                report(verbose)
+                return None

-    def consumer(self, q, total_entries, verbose=False):
-        while len(self) < total_entries:
-            thread = q.get(True)
-            thread.join()
-            mi = thread.get_result()
-            if mi is None:
-                self.append(None)
-            else:
-                self.append(self.fill_MI(mi, verbose))
-
-    def populate(self, entries, verbose=False, brcall=3):
-        if len(entries) == 1 and not isinstance(entries[0], str):
+    def populate(self, entries, br, verbose=False):
+        if not self.islink:
            #single entry
            self.append(self.fill_MI(entries[0], verbose))
        else:
            #multiple entries
-            q = Queue(brcall)
-            prod_thread = Thread(target=self.producer, args=(q, entries, verbose))
-            cons_thread = Thread(target=self.consumer, args=(q, len(entries), verbose))
-            prod_thread.start()
-            cons_thread.start()
-            prod_thread.join()
-            cons_thread.join()
+            for x in entries:
+                entry = self.get_individual_metadata(self.BASE_URL+x, br, verbose)
+                if entry is not None:
+                    self.append(self.fill_MI(entry, verbose))


 def search(title=None, author=None, publisher=None, isbn=None,
           min_viewability='none', verbose=False, max_results=5,
            keywords=None):
    br = browser()
-    entries = Query(title=title, author=author, publisher=publisher,
+    entries, islink = Query(title=title, author=author, publisher=publisher,
        keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)

    #List of entry
-    ans = ResultList()
+    ans = ResultList(islink)
    ans.populate(entries, br, verbose)
    return [x for x in ans if x is not None]